Merge pull request #23 from lordmathis/feat/start-on-request

feat: On-Demand Instance Start
2025-12-23 01:24:24 +00:00 · 2025-08-20 16:04:59 +02:00
parent 651c8b9b2c 9181c3d7bc
commit a6e3cb4a9b
7 changed files with 139 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
 ⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  
 ![Dashboard Screenshot](docs/images/screenshot.png)
@@ -116,6 +117,10 @@ instances:
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
  default_restart_delay: 5       # Restart delay (seconds) for new instances
  default_on_demand_start: true  # Default on-demand start setting
  on_demand_start_timeout: 120   # Default on-demand start timeout in seconds
  timeout_check_interval: 5      # Idle instance timeout check in minutes
 auth:
  require_inference_auth: true   # Require auth for inference endpoints
@@ -183,6 +188,8 @@ instances:
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
  default_restart_delay: 5                          # Default restart delay in seconds
  default_on_demand_start: true                     # Default on-demand start setting
  on_demand_start_timeout: 120                      # Default on-demand start timeout in seconds
  timeout_check_interval: 5                         # Default instance timeout check interval in minutes
 ```
@@ -197,8 +204,11 @@ instances:
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
 - `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
 - `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
 - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
 #### Authentication Configuration
 ```yaml
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -67,6 +67,12 @@ type InstancesConfig struct {
 	// Default restart delay for new instances (in seconds)
 	DefaultRestartDelay int `yaml:"default_restart_delay"`
 	// Default on-demand start setting for new instances
 	DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
 	// How long to wait for an instance to start on demand (in seconds)
 	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
 	// Interval for checking instance timeouts (in minutes)
 	TimeoutCheckInterval int `yaml:"timeout_check_interval"`
 }
@@ -111,7 +117,9 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
-			TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
+			DefaultOnDemandStart: true,
 			OnDemandStartTimeout: 120, // 2 minutes
 			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
 		},
 		Auth: AuthConfig{
 			RequireInferenceAuth:  true,
@@ -221,6 +229,16 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.DefaultRestartDelay = seconds
 		}
 	}
 	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
 		if b, err := strconv.ParseBool(onDemandStart); err == nil {
 			cfg.Instances.DefaultOnDemandStart = b
 		}
 	}
 	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
 		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
 			cfg.Instances.OnDemandStartTimeout = seconds
 		}
 	}
 	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
 		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
 			cfg.Instances.TimeoutCheckInterval = minutes
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -34,7 +34,9 @@ type CreateInstanceOptions struct {
 	AutoRestart  *bool `json:"auto_restart,omitempty"`
 	MaxRestarts  *int  `json:"max_restarts,omitempty"`
 	RestartDelay *int  `json:"restart_delay,omitempty"`
-	// Timeout
+	// On demand start
 	OnDemandStart *bool `json:"on_demand_start,omitempty"`
 	// Idle timeout
 	IdleTimeout *int `json:"idle_timeout,omitempty"`
 	// LlamaServerOptions contains the options for the llama server
 	llamacpp.LlamaServerOptions `json:",inline"`
@@ -46,10 +48,11 @@ type CreateInstanceOptions struct {
 func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	// First, unmarshal into a temporary struct without the embedded type
 	type tempCreateOptions struct {
-		AutoRestart  *bool `json:"auto_restart,omitempty"`
+		AutoRestart   *bool `json:"auto_restart,omitempty"`
-		MaxRestarts  *int  `json:"max_restarts,omitempty"`
+		MaxRestarts   *int  `json:"max_restarts,omitempty"`
-		RestartDelay *int  `json:"restart_delay,omitempty"`
+		RestartDelay  *int  `json:"restart_delay,omitempty"`
-		IdleTimeout  *int  `json:"idle_timeout,omitempty"`
+		OnDemandStart *bool `json:"on_demand_start,omitempty"`
 		IdleTimeout   *int  `json:"idle_timeout,omitempty"`
 	}
 	var temp tempCreateOptions
@@ -61,6 +64,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	c.AutoRestart = temp.AutoRestart
 	c.MaxRestarts = temp.MaxRestarts
 	c.RestartDelay = temp.RestartDelay
 	c.OnDemandStart = temp.OnDemandStart
 	c.IdleTimeout = temp.IdleTimeout
 	// Now unmarshal the embedded LlamaServerOptions
@@ -138,6 +142,11 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
 			optionsCopy.RestartDelay = &restartDelay
 		}
 		if options.OnDemandStart != nil {
 			onDemandStart := *options.OnDemandStart
 			optionsCopy.OnDemandStart = &onDemandStart
 		}
 		if options.IdleTimeout != nil {
 			idleTimeout := *options.IdleTimeout
 			if idleTimeout < 0 {
@@ -172,6 +181,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
 		options.RestartDelay = &defaultRestartDelay
 	}
 	if options.OnDemandStart == nil {
 		defaultOnDemandStart := globalSettings.DefaultOnDemandStart
 		options.OnDemandStart = &defaultOnDemandStart
 	}
 	if options.IdleTimeout == nil {
 		defaultIdleTimeout := 0
 		options.IdleTimeout = &defaultIdleTimeout
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"log"
 	"net/http"
 	"os/exec"
 	"runtime"
 	"syscall"
@@ -143,6 +144,74 @@ func (i *Process) Stop() error {
 	return nil
 }
 func (i *Process) WaitForHealthy(timeout int) error {
 	if !i.Running {
 		return fmt.Errorf("instance %s is not running", i.Name)
 	}
 	if timeout <= 0 {
 		timeout = 30 // Default to 30 seconds if no timeout is specified
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
 	defer cancel()
 	// Get instance options to build the health check URL
 	opts := i.GetOptions()
 	if opts == nil {
 		return fmt.Errorf("instance %s has no options set", i.Name)
 	}
 	// Build the health check URL directly
 	host := opts.Host
 	if host == "" {
 		host = "localhost"
 	}
 	healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
 	// Create a dedicated HTTP client for health checks
 	client := &http.Client{
 		Timeout: 5 * time.Second, // 5 second timeout per request
 	}
 	// Helper function to check health directly
 	checkHealth := func() bool {
 		req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
 		if err != nil {
 			return false
 		}
 		resp, err := client.Do(req)
 		if err != nil {
 			return false
 		}
 		defer resp.Body.Close()
 		return resp.StatusCode == http.StatusOK
 	}
 	// Try immediate check first
 	if checkHealth() {
 		return nil // Instance is healthy
 	}
 	// If immediate check failed, start polling
 	ticker := time.NewTicker(1 * time.Second)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-ctx.Done():
 			return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
 		case <-ticker.C:
 			if checkHealth() {
 				return nil // Instance is healthy
 			}
 			// Continue polling
 		}
 	}
 }
 func (i *Process) monitorProcess() {
 	defer func() {
 		i.mu.Lock()
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -575,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 		}
 		if !inst.Running {
-			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+			if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
-			return
+				// If on-demand start is enabled, start the instance
 				if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
 					http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
 					return
 				}
 				// Wait for the instance to become healthy before proceeding
 				if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
 					http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
 					return
 				}
 			} else {
 				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
 				return
 			}
 		}
 		proxy, err := inst.GetProxy()
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -26,6 +26,10 @@ export const basicFieldsConfig: Record<string, {
    placeholder: '60',
    description: 'Time in minutes before instance is considered idle and stopped'
  },
  on_demand_start: {
    label: 'On-Demand Start',
    description: 'Start instance upon receiving OpenAI-compatible API request'
  },
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -7,6 +7,7 @@ export const CreateInstanceOptionsSchema = z.object({
  max_restarts: z.number().optional(),
  restart_delay: z.number().optional(),
  idle_timeout: z.number().optional(),
  on_demand_start: z.boolean().optional(),
  // Common params
  verbose_prompt: z.boolean().optional(),