From 7b4adfa0cdb12aa7d35fffc53cd97b9f6ff0ac5c Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 13:50:43 +0200
Subject: [PATCH 1/8] Add DefaultOnDemandStart configuration and update
 instance options

---
 pkg/config/config.go     |  9 +++++++++
 pkg/instance/instance.go | 24 +++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/pkg/config/config.go b/pkg/config/config.go
index 386a708..1e2694e 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -67,6 +67,9 @@ type InstancesConfig struct {
 	// Default restart delay for new instances (in seconds)
 	DefaultRestartDelay int `yaml:"default_restart_delay"`
 
+	// Default on-demand start setting for new instances
+	DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
+
 	// Interval for checking instance timeouts (in minutes)
 	TimeoutCheckInterval int `yaml:"timeout_check_interval"`
 }
@@ -111,6 +114,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
+			DefaultOnDemandStart: false,
 			TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
 		},
 		Auth: AuthConfig{
@@ -221,6 +225,11 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.DefaultRestartDelay = seconds
 		}
 	}
+	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
+		if b, err := strconv.ParseBool(onDemandStart); err == nil {
+			cfg.Instances.DefaultOnDemandStart = b
+		}
+	}
 	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
 		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
 			cfg.Instances.TimeoutCheckInterval = minutes
diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go
index 5ff4bf5..f690ca1 100644
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -34,7 +34,9 @@ type CreateInstanceOptions struct {
 	AutoRestart  *bool `json:"auto_restart,omitempty"`
 	MaxRestarts  *int  `json:"max_restarts,omitempty"`
 	RestartDelay *int  `json:"restart_delay,omitempty"`
-	// Timeout
+	// On demand start
+	OnDemandStart *bool `json:"on_demand_start,omitempty"`
+	// Idle timeout
 	IdleTimeout *int `json:"idle_timeout,omitempty"`
 	// LlamaServerOptions contains the options for the llama server
 	llamacpp.LlamaServerOptions `json:",inline"`
@@ -46,10 +48,11 @@ type CreateInstanceOptions struct {
 func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	// First, unmarshal into a temporary struct without the embedded type
 	type tempCreateOptions struct {
-		AutoRestart  *bool `json:"auto_restart,omitempty"`
-		MaxRestarts  *int  `json:"max_restarts,omitempty"`
-		RestartDelay *int  `json:"restart_delay,omitempty"`
-		IdleTimeout  *int  `json:"idle_timeout,omitempty"`
+		AutoRestart   *bool `json:"auto_restart,omitempty"`
+		MaxRestarts   *int  `json:"max_restarts,omitempty"`
+		RestartDelay  *int  `json:"restart_delay,omitempty"`
+		OnDemandStart *bool `json:"on_demand_start,omitempty"`
+		IdleTimeout   *int  `json:"idle_timeout,omitempty"`
 	}
 
 	var temp tempCreateOptions
@@ -61,6 +64,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	c.AutoRestart = temp.AutoRestart
 	c.MaxRestarts = temp.MaxRestarts
 	c.RestartDelay = temp.RestartDelay
+	c.OnDemandStart = temp.OnDemandStart
 	c.IdleTimeout = temp.IdleTimeout
 
 	// Now unmarshal the embedded LlamaServerOptions
@@ -138,6 +142,11 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
 			optionsCopy.RestartDelay = &restartDelay
 		}
 
+		if options.OnDemandStart != nil {
+			onDemandStart := *options.OnDemandStart
+			optionsCopy.OnDemandStart = &onDemandStart
+		}
+
 		if options.IdleTimeout != nil {
 			idleTimeout := *options.IdleTimeout
 			if idleTimeout < 0 {
@@ -172,6 +181,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
 		options.RestartDelay = &defaultRestartDelay
 	}
 
+	if options.OnDemandStart == nil {
+		defaultOnDemandStart := globalSettings.DefaultOnDemandStart
+		options.OnDemandStart = &defaultOnDemandStart
+	}
+
 	if options.IdleTimeout == nil {
 		defaultIdleTimeout := 0
 		options.IdleTimeout = &defaultIdleTimeout

From 287a5e0817de2f8ec91aa1e162b78ffbfd04b3ea Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 14:19:12 +0200
Subject: [PATCH 2/8] Implement WaitForHealthy method and enhance OpenAIProxy
 to support on-demand instance start

---
 pkg/instance/lifecycle.go | 80 +++++++++++++++++++++++++++++++++++++++
 pkg/server/handlers.go    | 19 +++++++++-
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go
index 3db3ca7..dd4c2c0 100644
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"log"
+	"net/http"
 	"os/exec"
 	"runtime"
 	"syscall"
@@ -143,6 +144,85 @@ func (i *Process) Stop() error {
 	return nil
 }
 
+func (i *Process) WaitForHealthy(timeout int) error {
+	if !i.Running {
+		return fmt.Errorf("instance %s is not running", i.Name)
+	}
+
+	if timeout <= 0 {
+		timeout = 30 // Default to 30 seconds if no timeout is specified
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
+	defer cancel()
+
+	// Get the reverse proxy for this instance
+	proxy, err := i.GetProxy()
+	if err != nil {
+		return fmt.Errorf("failed to get proxy for instance %s: %w", i.Name, err)
+	}
+
+	// Polling interval
+	ticker := time.NewTicker(1 * time.Second)
+	defer ticker.Stop()
+
+	// Helper function to check health using the proxy
+	checkHealth := func() bool {
+		// Create a request to /health
+		req, err := http.NewRequestWithContext(ctx, "GET", "/health", nil)
+		if err != nil {
+			return false
+		}
+
+		// Create a custom ResponseRecorder to capture the proxy response
+		recorder := &healthResponseRecorder{
+			statusCode: 0,
+			headers:    make(http.Header),
+		}
+
+		// Use the proxy to forward the request
+		proxy.ServeHTTP(recorder, req)
+
+		return recorder.statusCode == http.StatusOK
+	}
+
+	// Try immediate check first
+	if checkHealth() {
+		return nil // Instance is healthy
+	}
+
+	// If immediate check failed, start polling
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
+		case <-ticker.C:
+			if checkHealth() {
+				return nil // Instance is healthy
+			}
+			// Continue polling
+		}
+	}
+}
+
+// healthResponseRecorder implements http.ResponseWriter to capture proxy responses
+type healthResponseRecorder struct {
+	statusCode int
+	headers    http.Header
+}
+
+func (r *healthResponseRecorder) Header() http.Header {
+	return r.headers
+}
+
+func (r *healthResponseRecorder) Write([]byte) (int, error) {
+	// We don't need to capture the body for health checks
+	return 0, nil
+}
+
+func (r *healthResponseRecorder) WriteHeader(statusCode int) {
+	r.statusCode = statusCode
+}
+
 func (i *Process) monitorProcess() {
 	defer func() {
 		i.mu.Lock()
diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go
index 0d473e7..843407e 100644
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -575,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 		}
 
 		if !inst.Running {
-			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-			return
+			if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
+				// If on-demand start is enabled, start the instance
+				if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
+					http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+					return
+				}
+
+				// Wait for the instance to become healthy before proceeding
+				if err := inst.WaitForHealthy(120); err != nil { // 2 minutes timeout
+					http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
+					return
+				}
+
+			} else {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
 		}
 
 		proxy, err := inst.GetProxy()

From 496ab3aa5d1eb34b2bf6ccee1bf24ef0055d79bf Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 14:22:55 +0200
Subject: [PATCH 3/8] Update README to clarify on-demand instance start feature

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 65dfb42..d2d22b9 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
 ⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
+💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  
 
 ![Dashboard Screenshot](docs/images/screenshot.png)

From ddb54763f61d2f786b19dbbf80beae9127647641 Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 14:25:43 +0200
Subject: [PATCH 4/8] Add OnDemandStartTimeout configuration and update
 OpenAIProxy to use it

---
 pkg/config/config.go   | 13 +++++++++++--
 pkg/server/handlers.go |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pkg/config/config.go b/pkg/config/config.go
index 1e2694e..4486fa5 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -70,6 +70,9 @@ type InstancesConfig struct {
 	// Default on-demand start setting for new instances
 	DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
 
+	// How long to wait for an instance to start on demand (in seconds)
+	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
+
 	// Interval for checking instance timeouts (in minutes)
 	TimeoutCheckInterval int `yaml:"timeout_check_interval"`
 }
@@ -114,8 +117,9 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
-			DefaultOnDemandStart: false,
-			TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
+			DefaultOnDemandStart: true,
+			OnDemandStartTimeout: 120, // 2 minutes
+			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
 		},
 		Auth: AuthConfig{
 			RequireInferenceAuth:  true,
@@ -230,6 +234,11 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.DefaultOnDemandStart = b
 		}
 	}
+	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
+		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
+			cfg.Instances.OnDemandStartTimeout = seconds
+		}
+	}
 	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
 		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
 			cfg.Instances.TimeoutCheckInterval = minutes
diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go
index 843407e..6306a31 100644
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -583,7 +583,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 				}
 
 				// Wait for the instance to become healthy before proceeding
-				if err := inst.WaitForHealthy(120); err != nil { // 2 minutes timeout
+				if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
 					http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
 					return
 				}

From 4bc9362f7aa5cd4a064068717c00f2fe1eea837c Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 14:41:42 +0200
Subject: [PATCH 5/8] Add default on-demand start settings and timeout
 configuration to README

---
 README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/README.md b/README.md
index d2d22b9..3a5d29d 100644
--- a/README.md
+++ b/README.md
@@ -117,6 +117,10 @@ instances:
   default_auto_restart: true     # Auto-restart new instances by default
   default_max_restarts: 3        # Max restarts for new instances
   default_restart_delay: 5       # Restart delay (seconds) for new instances
+  default_on_demand_start: true  # Default on-demand start setting
+  on_demand_start_timeout: 120   # Default on-demand start timeout in seconds
+  timeout_check_interval: 5      # Idle instance timeout check in minutes
+
 
 auth:
   require_inference_auth: true   # Require auth for inference endpoints
@@ -184,6 +188,8 @@ instances:
   default_auto_restart: true                        # Default auto-restart setting
   default_max_restarts: 3                           # Default maximum restart attempts
   default_restart_delay: 5                          # Default restart delay in seconds
+  default_on_demand_start: true                     # Default on-demand start setting
+  on_demand_start_timeout: 120                      # Default on-demand start timeout in seconds
   timeout_check_interval: 5                         # Default instance timeout check interval in minutes
 ```
 
@@ -198,8 +204,11 @@ instances:
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
+- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
+- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
 - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
 
+
 #### Authentication Configuration
 
 ```yaml

From 8265a94bf74bbd2677a74cd933776f5e21ef795b Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 14:56:11 +0200
Subject: [PATCH 6/8] Add on-demand start configuration to instance options and
 basic fields

---
 webui/src/lib/zodFormUtils.ts        | 5 +++++
 webui/src/schemas/instanceOptions.ts | 1 +
 2 files changed, 6 insertions(+)

diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts
index 5a964bb..2e09b9c 100644
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,4 +1,5 @@
 import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
+import { on } from 'events'
 
 // Only define the basic fields we want to show by default
 export const basicFieldsConfig: Record<string, {
@@ -26,6 +27,10 @@ export const basicFieldsConfig: Record<string, {
     placeholder: '60',
     description: 'Time in minutes before instance is considered idle and stopped'
   },
+  on_demand_start: {
+    label: 'On-Demand Start',
+    description: 'Start instance upon receiving OpenAI-compatible API request'
+  },
   model: {
     label: 'Model Path',
     placeholder: '/path/to/model.gguf',
diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts
index 08f5ae0..f3ef90b 100644
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -7,6 +7,7 @@ export const CreateInstanceOptionsSchema = z.object({
   max_restarts: z.number().optional(),
   restart_delay: z.number().optional(),
   idle_timeout: z.number().optional(),
+  on_demand_start: z.boolean().optional(),
 
   // Common params
   verbose_prompt: z.boolean().optional(),

From 1939b45312372a944f5d72e94fcd6cfc50263401 Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 15:58:08 +0200
Subject: [PATCH 7/8] Refactor WaitForHealthy method to use direct health check
 URL and simplify health check logic

---
 pkg/instance/lifecycle.go | 63 ++++++++++++++++-----------------------
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go
index dd4c2c0..84d5ea3 100644
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -152,37 +152,42 @@ func (i *Process) WaitForHealthy(timeout int) error {
 	if timeout <= 0 {
 		timeout = 30 // Default to 30 seconds if no timeout is specified
 	}
+
 	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
 	defer cancel()
 
-	// Get the reverse proxy for this instance
-	proxy, err := i.GetProxy()
-	if err != nil {
-		return fmt.Errorf("failed to get proxy for instance %s: %w", i.Name, err)
+	// Get instance options to build the health check URL
+	opts := i.GetOptions()
+	if opts == nil {
+		return fmt.Errorf("instance %s has no options set", i.Name)
 	}
 
-	// Polling interval
-	ticker := time.NewTicker(1 * time.Second)
-	defer ticker.Stop()
+	// Build the health check URL directly
+	host := opts.Host
+	if host == "" {
+		host = "localhost"
+	}
+	healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
 
-	// Helper function to check health using the proxy
+	// Create a dedicated HTTP client for health checks
+	client := &http.Client{
+		Timeout: 5 * time.Second, // 5 second timeout per request
+	}
+
+	// Helper function to check health directly
 	checkHealth := func() bool {
-		// Create a request to /health
-		req, err := http.NewRequestWithContext(ctx, "GET", "/health", nil)
+		req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
 		if err != nil {
 			return false
 		}
 
-		// Create a custom ResponseRecorder to capture the proxy response
-		recorder := &healthResponseRecorder{
-			statusCode: 0,
-			headers:    make(http.Header),
+		resp, err := client.Do(req)
+		if err != nil {
+			return false
 		}
+		defer resp.Body.Close()
 
-		// Use the proxy to forward the request
-		proxy.ServeHTTP(recorder, req)
-
-		return recorder.statusCode == http.StatusOK
+		return resp.StatusCode == http.StatusOK
 	}
 
 	// Try immediate check first
@@ -191,6 +196,9 @@ func (i *Process) WaitForHealthy(timeout int) error {
 	}
 
 	// If immediate check failed, start polling
+	ticker := time.NewTicker(1 * time.Second)
+	defer ticker.Stop()
+
 	for {
 		select {
 		case <-ctx.Done():
@@ -204,25 +212,6 @@ func (i *Process) WaitForHealthy(timeout int) error {
 	}
 }
 
-// healthResponseRecorder implements http.ResponseWriter to capture proxy responses
-type healthResponseRecorder struct {
-	statusCode int
-	headers    http.Header
-}
-
-func (r *healthResponseRecorder) Header() http.Header {
-	return r.headers
-}
-
-func (r *healthResponseRecorder) Write([]byte) (int, error) {
-	// We don't need to capture the body for health checks
-	return 0, nil
-}
-
-func (r *healthResponseRecorder) WriteHeader(statusCode int) {
-	r.statusCode = statusCode
-}
-
 func (i *Process) monitorProcess() {
 	defer func() {
 		i.mu.Lock()

From 9181c3d7bc1caf0ab038fd5e1ce69e97187cf396 Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Wed, 20 Aug 2025 16:03:09 +0200
Subject: [PATCH 8/8] Remove unused import from zodFormUtils.ts

---
 webui/src/lib/zodFormUtils.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts
index 2e09b9c..660c609 100644
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,5 +1,4 @@
 import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
-import { on } from 'events'
 
 // Only define the basic fields we want to show by default
 export const basicFieldsConfig: Record<string, {