Merge pull request #23 from lordmathis/feat/start-on-request

feat: On-Demand Instance Start
2025-11-06 00:54:23 +00:00 · 2025-08-20 16:04:59 +02:00
parent 651c8b9b2c 9181c3d7bc
commit a6e3cb4a9b
7 changed files with 139 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
 ⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
+💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  

 ![Dashboard Screenshot](docs/images/screenshot.png)
@@ -116,6 +117,10 @@ instances:
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
  default_restart_delay: 5       # Restart delay (seconds) for new instances
+  default_on_demand_start: true  # Default on-demand start setting
+  on_demand_start_timeout: 120   # Default on-demand start timeout in seconds
+  timeout_check_interval: 5      # Idle instance timeout check in minutes
+

 auth:
  require_inference_auth: true   # Require auth for inference endpoints
@@ -183,6 +188,8 @@ instances:
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
  default_restart_delay: 5                          # Default restart delay in seconds
+  default_on_demand_start: true                     # Default on-demand start setting
+  on_demand_start_timeout: 120                      # Default on-demand start timeout in seconds
  timeout_check_interval: 5                         # Default instance timeout check interval in minutes
 ```

@@ -197,8 +204,11 @@ instances:
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
+- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
+- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
 - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes

+
 #### Authentication Configuration

 ```yaml
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -67,6 +67,12 @@ type InstancesConfig struct {
 	// Default restart delay for new instances (in seconds)
 	DefaultRestartDelay int `yaml:"default_restart_delay"`

+	// Default on-demand start setting for new instances
+	DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
+
+	// How long to wait for an instance to start on demand (in seconds)
+	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
+
 	// Interval for checking instance timeouts (in minutes)
 	TimeoutCheckInterval int `yaml:"timeout_check_interval"`
 }
@@ -111,7 +117,9 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
-			TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
+			DefaultOnDemandStart: true,
+			OnDemandStartTimeout: 120, // 2 minutes
+			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
 		},
 		Auth: AuthConfig{
 			RequireInferenceAuth:  true,
@@ -221,6 +229,16 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.DefaultRestartDelay = seconds
 		}
 	}
+	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
+		if b, err := strconv.ParseBool(onDemandStart); err == nil {
+			cfg.Instances.DefaultOnDemandStart = b
+		}
+	}
+	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
+		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
+			cfg.Instances.OnDemandStartTimeout = seconds
+		}
+	}
 	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
 		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
 			cfg.Instances.TimeoutCheckInterval = minutes
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -34,7 +34,9 @@ type CreateInstanceOptions struct {
 	AutoRestart  *bool `json:"auto_restart,omitempty"`
 	MaxRestarts  *int  `json:"max_restarts,omitempty"`
 	RestartDelay *int  `json:"restart_delay,omitempty"`
-	// Timeout
+	// On demand start
+	OnDemandStart *bool `json:"on_demand_start,omitempty"`
+	// Idle timeout
 	IdleTimeout *int `json:"idle_timeout,omitempty"`
 	// LlamaServerOptions contains the options for the llama server
 	llamacpp.LlamaServerOptions `json:",inline"`
@@ -46,10 +48,11 @@ type CreateInstanceOptions struct {
 func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	// First, unmarshal into a temporary struct without the embedded type
 	type tempCreateOptions struct {
-		AutoRestart  *bool `json:"auto_restart,omitempty"`
-		MaxRestarts  *int  `json:"max_restarts,omitempty"`
-		RestartDelay *int  `json:"restart_delay,omitempty"`
-		IdleTimeout  *int  `json:"idle_timeout,omitempty"`
+		AutoRestart   *bool `json:"auto_restart,omitempty"`
+		MaxRestarts   *int  `json:"max_restarts,omitempty"`
+		RestartDelay  *int  `json:"restart_delay,omitempty"`
+		OnDemandStart *bool `json:"on_demand_start,omitempty"`
+		IdleTimeout   *int  `json:"idle_timeout,omitempty"`
 	}

 	var temp tempCreateOptions
@@ -61,6 +64,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	c.AutoRestart = temp.AutoRestart
 	c.MaxRestarts = temp.MaxRestarts
 	c.RestartDelay = temp.RestartDelay
+	c.OnDemandStart = temp.OnDemandStart
 	c.IdleTimeout = temp.IdleTimeout

 	// Now unmarshal the embedded LlamaServerOptions
@@ -138,6 +142,11 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
 			optionsCopy.RestartDelay = &restartDelay
 		}

+		if options.OnDemandStart != nil {
+			onDemandStart := *options.OnDemandStart
+			optionsCopy.OnDemandStart = &onDemandStart
+		}
+
 		if options.IdleTimeout != nil {
 			idleTimeout := *options.IdleTimeout
 			if idleTimeout < 0 {
@@ -172,6 +181,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
 		options.RestartDelay = &defaultRestartDelay
 	}

+	if options.OnDemandStart == nil {
+		defaultOnDemandStart := globalSettings.DefaultOnDemandStart
+		options.OnDemandStart = &defaultOnDemandStart
+	}
+
 	if options.IdleTimeout == nil {
 		defaultIdleTimeout := 0
 		options.IdleTimeout = &defaultIdleTimeout
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"log"
+	"net/http"
 	"os/exec"
 	"runtime"
 	"syscall"
@@ -143,6 +144,74 @@ func (i *Process) Stop() error {
 	return nil
 }

+func (i *Process) WaitForHealthy(timeout int) error {
+	if !i.Running {
+		return fmt.Errorf("instance %s is not running", i.Name)
+	}
+
+	if timeout <= 0 {
+		timeout = 30 // Default to 30 seconds if no timeout is specified
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
+	defer cancel()
+
+	// Get instance options to build the health check URL
+	opts := i.GetOptions()
+	if opts == nil {
+		return fmt.Errorf("instance %s has no options set", i.Name)
+	}
+
+	// Build the health check URL directly
+	host := opts.Host
+	if host == "" {
+		host = "localhost"
+	}
+	healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
+
+	// Create a dedicated HTTP client for health checks
+	client := &http.Client{
+		Timeout: 5 * time.Second, // 5 second timeout per request
+	}
+
+	// Helper function to check health directly
+	checkHealth := func() bool {
+		req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
+		if err != nil {
+			return false
+		}
+
+		resp, err := client.Do(req)
+		if err != nil {
+			return false
+		}
+		defer resp.Body.Close()
+
+		return resp.StatusCode == http.StatusOK
+	}
+
+	// Try immediate check first
+	if checkHealth() {
+		return nil // Instance is healthy
+	}
+
+	// If immediate check failed, start polling
+	ticker := time.NewTicker(1 * time.Second)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
+		case <-ticker.C:
+			if checkHealth() {
+				return nil // Instance is healthy
+			}
+			// Continue polling
+		}
+	}
+}
+
 func (i *Process) monitorProcess() {
 	defer func() {
 		i.mu.Lock()
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -575,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 		}

 		if !inst.Running {
-			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-			return
+			if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
+				// If on-demand start is enabled, start the instance
+				if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
+					http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+					return
+				}
+
+				// Wait for the instance to become healthy before proceeding
+				if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
+					http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
+					return
+				}
+
+			} else {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
 		}

 		proxy, err := inst.GetProxy()
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -26,6 +26,10 @@ export const basicFieldsConfig: Record<string, {
    placeholder: '60',
    description: 'Time in minutes before instance is considered idle and stopped'
  },
+  on_demand_start: {
+    label: 'On-Demand Start',
+    description: 'Start instance upon receiving OpenAI-compatible API request'
+  },
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -7,6 +7,7 @@ export const CreateInstanceOptionsSchema = z.object({
  max_restarts: z.number().optional(),
  restart_delay: z.number().optional(),
  idle_timeout: z.number().optional(),
+  on_demand_start: z.boolean().optional(),

  // Common params
  verbose_prompt: z.boolean().optional(),