From 7b4adfa0cdb12aa7d35fffc53cd97b9f6ff0ac5c Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 13:50:43 +0200 Subject: [PATCH 1/8] Add DefaultOnDemandStart configuration and update instance options --- pkg/config/config.go | 9 +++++++++ pkg/instance/instance.go | 24 +++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 386a708..1e2694e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -67,6 +67,9 @@ type InstancesConfig struct { // Default restart delay for new instances (in seconds) DefaultRestartDelay int `yaml:"default_restart_delay"` + // Default on-demand start setting for new instances + DefaultOnDemandStart bool `yaml:"default_on_demand_start"` + // Interval for checking instance timeouts (in minutes) TimeoutCheckInterval int `yaml:"timeout_check_interval"` } @@ -111,6 +114,7 @@ func LoadConfig(configPath string) (AppConfig, error) { DefaultAutoRestart: true, DefaultMaxRestarts: 3, DefaultRestartDelay: 5, + DefaultOnDemandStart: false, TimeoutCheckInterval: 5, // Check timeouts every 5 minutes }, Auth: AuthConfig{ @@ -221,6 +225,11 @@ func loadEnvVars(cfg *AppConfig) { cfg.Instances.DefaultRestartDelay = seconds } } + if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" { + if b, err := strconv.ParseBool(onDemandStart); err == nil { + cfg.Instances.DefaultOnDemandStart = b + } + } if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" { if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil { cfg.Instances.TimeoutCheckInterval = minutes diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index 5ff4bf5..f690ca1 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -34,7 +34,9 @@ type CreateInstanceOptions struct { AutoRestart *bool `json:"auto_restart,omitempty"` MaxRestarts *int `json:"max_restarts,omitempty"` RestartDelay *int `json:"restart_delay,omitempty"` - // Timeout + // On demand start + OnDemandStart *bool `json:"on_demand_start,omitempty"` + // Idle timeout IdleTimeout *int `json:"idle_timeout,omitempty"` // LlamaServerOptions contains the options for the llama server llamacpp.LlamaServerOptions `json:",inline"` @@ -46,10 +48,11 @@ type CreateInstanceOptions struct { func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error { // First, unmarshal into a temporary struct without the embedded type type tempCreateOptions struct { - AutoRestart *bool `json:"auto_restart,omitempty"` - MaxRestarts *int `json:"max_restarts,omitempty"` - RestartDelay *int `json:"restart_delay,omitempty"` - IdleTimeout *int `json:"idle_timeout,omitempty"` + AutoRestart *bool `json:"auto_restart,omitempty"` + MaxRestarts *int `json:"max_restarts,omitempty"` + RestartDelay *int `json:"restart_delay,omitempty"` + OnDemandStart *bool `json:"on_demand_start,omitempty"` + IdleTimeout *int `json:"idle_timeout,omitempty"` } var temp tempCreateOptions @@ -61,6 +64,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error { c.AutoRestart = temp.AutoRestart c.MaxRestarts = temp.MaxRestarts c.RestartDelay = temp.RestartDelay + c.OnDemandStart = temp.OnDemandStart c.IdleTimeout = temp.IdleTimeout // Now unmarshal the embedded LlamaServerOptions @@ -138,6 +142,11 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create optionsCopy.RestartDelay = &restartDelay } + if options.OnDemandStart != nil { + onDemandStart := *options.OnDemandStart + optionsCopy.OnDemandStart = &onDemandStart + } + if options.IdleTimeout != nil { idleTimeout := *options.IdleTimeout if idleTimeout < 0 { @@ -172,6 +181,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config. options.RestartDelay = &defaultRestartDelay } + if options.OnDemandStart == nil { + defaultOnDemandStart := globalSettings.DefaultOnDemandStart + options.OnDemandStart = &defaultOnDemandStart + } + if options.IdleTimeout == nil { defaultIdleTimeout := 0 options.IdleTimeout = &defaultIdleTimeout From 287a5e0817de2f8ec91aa1e162b78ffbfd04b3ea Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 14:19:12 +0200 Subject: [PATCH 2/8] Implement WaitForHealthy method and enhance OpenAIProxy to support on-demand instance start --- pkg/instance/lifecycle.go | 80 +++++++++++++++++++++++++++++++++++++++ pkg/server/handlers.go | 19 +++++++++- 2 files changed, 97 insertions(+), 2 deletions(-) diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 3db3ca7..dd4c2c0 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "log" + "net/http" "os/exec" "runtime" "syscall" @@ -143,6 +144,85 @@ func (i *Process) Stop() error { return nil } +func (i *Process) WaitForHealthy(timeout int) error { + if !i.Running { + return fmt.Errorf("instance %s is not running", i.Name) + } + + if timeout <= 0 { + timeout = 30 // Default to 30 seconds if no timeout is specified + } + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) + defer cancel() + + // Get the reverse proxy for this instance + proxy, err := i.GetProxy() + if err != nil { + return fmt.Errorf("failed to get proxy for instance %s: %w", i.Name, err) + } + + // Polling interval + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + // Helper function to check health using the proxy + checkHealth := func() bool { + // Create a request to /health + req, err := http.NewRequestWithContext(ctx, "GET", "/health", nil) + if err != nil { + return false + } + + // Create a custom ResponseRecorder to capture the proxy response + recorder := &healthResponseRecorder{ + statusCode: 0, + headers: make(http.Header), + } + + // Use the proxy to forward the request + proxy.ServeHTTP(recorder, req) + + return recorder.statusCode == http.StatusOK + } + + // Try immediate check first + if checkHealth() { + return nil // Instance is healthy + } + + // If immediate check failed, start polling + for { + select { + case <-ctx.Done(): + return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout) + case <-ticker.C: + if checkHealth() { + return nil // Instance is healthy + } + // Continue polling + } + } +} + +// healthResponseRecorder implements http.ResponseWriter to capture proxy responses +type healthResponseRecorder struct { + statusCode int + headers http.Header +} + +func (r *healthResponseRecorder) Header() http.Header { + return r.headers +} + +func (r *healthResponseRecorder) Write([]byte) (int, error) { + // We don't need to capture the body for health checks + return 0, nil +} + +func (r *healthResponseRecorder) WriteHeader(statusCode int) { + r.statusCode = statusCode +} + func (i *Process) monitorProcess() { defer func() { i.mu.Lock() diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 0d473e7..843407e 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -575,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { } if !inst.Running { - http.Error(w, "Instance is not running", http.StatusServiceUnavailable) - return + if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart { + // If on-demand start is enabled, start the instance + if _, err := h.InstanceManager.StartInstance(modelName); err != nil { + http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) + return + } + + // Wait for the instance to become healthy before proceeding + if err := inst.WaitForHealthy(120); err != nil { // 2 minutes timeout + http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) + return + } + + } else { + http.Error(w, "Instance is not running", http.StatusServiceUnavailable) + return + } } proxy, err := inst.GetProxy() From 496ab3aa5d1eb34b2bf6ccee1bf24ef0055d79bf Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 14:22:55 +0200 Subject: [PATCH 3/8] Update README to clarify on-demand instance start feature --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 65dfb42..d2d22b9 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ 🔐 **API Key Authentication**: Separate keys for management vs inference access 📊 **Instance Monitoring**: Health checks, auto-restart, log management ⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period +💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests 💾 **State Persistence**: Ensure instances remain intact across server restarts ![Dashboard Screenshot](docs/images/screenshot.png) From ddb54763f61d2f786b19dbbf80beae9127647641 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 14:25:43 +0200 Subject: [PATCH 4/8] Add OnDemandStartTimeout configuration and update OpenAIProxy to use it --- pkg/config/config.go | 13 +++++++++++-- pkg/server/handlers.go | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 1e2694e..4486fa5 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -70,6 +70,9 @@ type InstancesConfig struct { // Default on-demand start setting for new instances DefaultOnDemandStart bool `yaml:"default_on_demand_start"` + // How long to wait for an instance to start on demand (in seconds) + OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"` + // Interval for checking instance timeouts (in minutes) TimeoutCheckInterval int `yaml:"timeout_check_interval"` } @@ -114,8 +117,9 @@ func LoadConfig(configPath string) (AppConfig, error) { DefaultAutoRestart: true, DefaultMaxRestarts: 3, DefaultRestartDelay: 5, - DefaultOnDemandStart: false, - TimeoutCheckInterval: 5, // Check timeouts every 5 minutes + DefaultOnDemandStart: true, + OnDemandStartTimeout: 120, // 2 minutes + TimeoutCheckInterval: 5, // Check timeouts every 5 minutes }, Auth: AuthConfig{ RequireInferenceAuth: true, @@ -230,6 +234,11 @@ func loadEnvVars(cfg *AppConfig) { cfg.Instances.DefaultOnDemandStart = b } } + if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" { + if seconds, err := strconv.Atoi(onDemandTimeout); err == nil { + cfg.Instances.OnDemandStartTimeout = seconds + } + } if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" { if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil { cfg.Instances.TimeoutCheckInterval = minutes diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 843407e..6306a31 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -583,7 +583,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { } // Wait for the instance to become healthy before proceeding - if err := inst.WaitForHealthy(120); err != nil { // 2 minutes timeout + if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) return } From 4bc9362f7aa5cd4a064068717c00f2fe1eea837c Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 14:41:42 +0200 Subject: [PATCH 5/8] Add default on-demand start settings and timeout configuration to README --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index d2d22b9..3a5d29d 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,10 @@ instances: default_auto_restart: true # Auto-restart new instances by default default_max_restarts: 3 # Max restarts for new instances default_restart_delay: 5 # Restart delay (seconds) for new instances + default_on_demand_start: true # Default on-demand start setting + on_demand_start_timeout: 120 # Default on-demand start timeout in seconds + timeout_check_interval: 5 # Idle instance timeout check in minutes + auth: require_inference_auth: true # Require auth for inference endpoints @@ -184,6 +188,8 @@ instances: default_auto_restart: true # Default auto-restart setting default_max_restarts: 3 # Default maximum restart attempts default_restart_delay: 5 # Default restart delay in seconds + default_on_demand_start: true # Default on-demand start setting + on_demand_start_timeout: 120 # Default on-demand start timeout in seconds timeout_check_interval: 5 # Default instance timeout check interval in minutes ``` @@ -198,8 +204,11 @@ instances: - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false) - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds +- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false) +- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes + #### Authentication Configuration ```yaml From 8265a94bf74bbd2677a74cd933776f5e21ef795b Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 14:56:11 +0200 Subject: [PATCH 6/8] Add on-demand start configuration to instance options and basic fields --- webui/src/lib/zodFormUtils.ts | 5 +++++ webui/src/schemas/instanceOptions.ts | 1 + 2 files changed, 6 insertions(+) diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts index 5a964bb..2e09b9c 100644 --- a/webui/src/lib/zodFormUtils.ts +++ b/webui/src/lib/zodFormUtils.ts @@ -1,4 +1,5 @@ import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions' +import { on } from 'events' // Only define the basic fields we want to show by default export const basicFieldsConfig: Record Date: Wed, 20 Aug 2025 15:58:08 +0200 Subject: [PATCH 7/8] Refactor WaitForHealthy method to use direct health check URL and simplify health check logic --- pkg/instance/lifecycle.go | 63 ++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index dd4c2c0..84d5ea3 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -152,37 +152,42 @@ func (i *Process) WaitForHealthy(timeout int) error { if timeout <= 0 { timeout = 30 // Default to 30 seconds if no timeout is specified } + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second) defer cancel() - // Get the reverse proxy for this instance - proxy, err := i.GetProxy() - if err != nil { - return fmt.Errorf("failed to get proxy for instance %s: %w", i.Name, err) + // Get instance options to build the health check URL + opts := i.GetOptions() + if opts == nil { + return fmt.Errorf("instance %s has no options set", i.Name) } - // Polling interval - ticker := time.NewTicker(1 * time.Second) - defer ticker.Stop() + // Build the health check URL directly + host := opts.Host + if host == "" { + host = "localhost" + } + healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port) - // Helper function to check health using the proxy + // Create a dedicated HTTP client for health checks + client := &http.Client{ + Timeout: 5 * time.Second, // 5 second timeout per request + } + + // Helper function to check health directly checkHealth := func() bool { - // Create a request to /health - req, err := http.NewRequestWithContext(ctx, "GET", "/health", nil) + req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil) if err != nil { return false } - // Create a custom ResponseRecorder to capture the proxy response - recorder := &healthResponseRecorder{ - statusCode: 0, - headers: make(http.Header), + resp, err := client.Do(req) + if err != nil { + return false } + defer resp.Body.Close() - // Use the proxy to forward the request - proxy.ServeHTTP(recorder, req) - - return recorder.statusCode == http.StatusOK + return resp.StatusCode == http.StatusOK } // Try immediate check first @@ -191,6 +196,9 @@ func (i *Process) WaitForHealthy(timeout int) error { } // If immediate check failed, start polling + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + for { select { case <-ctx.Done(): @@ -204,25 +212,6 @@ func (i *Process) WaitForHealthy(timeout int) error { } } -// healthResponseRecorder implements http.ResponseWriter to capture proxy responses -type healthResponseRecorder struct { - statusCode int - headers http.Header -} - -func (r *healthResponseRecorder) Header() http.Header { - return r.headers -} - -func (r *healthResponseRecorder) Write([]byte) (int, error) { - // We don't need to capture the body for health checks - return 0, nil -} - -func (r *healthResponseRecorder) WriteHeader(statusCode int) { - r.statusCode = statusCode -} - func (i *Process) monitorProcess() { defer func() { i.mu.Lock() From 9181c3d7bc1caf0ab038fd5e1ce69e97187cf396 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 16:03:09 +0200 Subject: [PATCH 8/8] Remove unused import from zodFormUtils.ts --- webui/src/lib/zodFormUtils.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts index 2e09b9c..660c609 100644 --- a/webui/src/lib/zodFormUtils.ts +++ b/webui/src/lib/zodFormUtils.ts @@ -1,5 +1,4 @@ import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions' -import { on } from 'events' // Only define the basic fields we want to show by default export const basicFieldsConfig: Record