mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-07 09:34:22 +00:00
Merge pull request #23 from lordmathis/feat/start-on-request
feat: On-Demand Instance Start
This commit is contained in:
10
README.md
10
README.md
@@ -12,6 +12,7 @@
|
|||||||
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
||||||
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
||||||
⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period
|
⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period
|
||||||
|
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
|
||||||
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
||||||
|
|
||||||

|

|
||||||
@@ -116,6 +117,10 @@ instances:
|
|||||||
default_auto_restart: true # Auto-restart new instances by default
|
default_auto_restart: true # Auto-restart new instances by default
|
||||||
default_max_restarts: 3 # Max restarts for new instances
|
default_max_restarts: 3 # Max restarts for new instances
|
||||||
default_restart_delay: 5 # Restart delay (seconds) for new instances
|
default_restart_delay: 5 # Restart delay (seconds) for new instances
|
||||||
|
default_on_demand_start: true # Default on-demand start setting
|
||||||
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
|
timeout_check_interval: 5 # Idle instance timeout check in minutes
|
||||||
|
|
||||||
|
|
||||||
auth:
|
auth:
|
||||||
require_inference_auth: true # Require auth for inference endpoints
|
require_inference_auth: true # Require auth for inference endpoints
|
||||||
@@ -183,6 +188,8 @@ instances:
|
|||||||
default_auto_restart: true # Default auto-restart setting
|
default_auto_restart: true # Default auto-restart setting
|
||||||
default_max_restarts: 3 # Default maximum restart attempts
|
default_max_restarts: 3 # Default maximum restart attempts
|
||||||
default_restart_delay: 5 # Default restart delay in seconds
|
default_restart_delay: 5 # Default restart delay in seconds
|
||||||
|
default_on_demand_start: true # Default on-demand start setting
|
||||||
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -197,8 +204,11 @@ instances:
|
|||||||
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
||||||
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
||||||
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
||||||
|
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
|
||||||
|
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
|
||||||
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
||||||
|
|
||||||
|
|
||||||
#### Authentication Configuration
|
#### Authentication Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
|||||||
@@ -67,6 +67,12 @@ type InstancesConfig struct {
|
|||||||
// Default restart delay for new instances (in seconds)
|
// Default restart delay for new instances (in seconds)
|
||||||
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
||||||
|
|
||||||
|
// Default on-demand start setting for new instances
|
||||||
|
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
|
||||||
|
|
||||||
|
// How long to wait for an instance to start on demand (in seconds)
|
||||||
|
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
|
||||||
|
|
||||||
// Interval for checking instance timeouts (in minutes)
|
// Interval for checking instance timeouts (in minutes)
|
||||||
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
|
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
|
||||||
}
|
}
|
||||||
@@ -111,7 +117,9 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
DefaultAutoRestart: true,
|
DefaultAutoRestart: true,
|
||||||
DefaultMaxRestarts: 3,
|
DefaultMaxRestarts: 3,
|
||||||
DefaultRestartDelay: 5,
|
DefaultRestartDelay: 5,
|
||||||
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
|
DefaultOnDemandStart: true,
|
||||||
|
OnDemandStartTimeout: 120, // 2 minutes
|
||||||
|
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
|
||||||
},
|
},
|
||||||
Auth: AuthConfig{
|
Auth: AuthConfig{
|
||||||
RequireInferenceAuth: true,
|
RequireInferenceAuth: true,
|
||||||
@@ -221,6 +229,16 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
cfg.Instances.DefaultRestartDelay = seconds
|
cfg.Instances.DefaultRestartDelay = seconds
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
|
||||||
|
if b, err := strconv.ParseBool(onDemandStart); err == nil {
|
||||||
|
cfg.Instances.DefaultOnDemandStart = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
|
||||||
|
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
|
||||||
|
cfg.Instances.OnDemandStartTimeout = seconds
|
||||||
|
}
|
||||||
|
}
|
||||||
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
|
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
|
||||||
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
|
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
|
||||||
cfg.Instances.TimeoutCheckInterval = minutes
|
cfg.Instances.TimeoutCheckInterval = minutes
|
||||||
|
|||||||
@@ -34,7 +34,9 @@ type CreateInstanceOptions struct {
|
|||||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||||
RestartDelay *int `json:"restart_delay,omitempty"`
|
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||||
// Timeout
|
// On demand start
|
||||||
|
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||||
|
// Idle timeout
|
||||||
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||||
// LlamaServerOptions contains the options for the llama server
|
// LlamaServerOptions contains the options for the llama server
|
||||||
llamacpp.LlamaServerOptions `json:",inline"`
|
llamacpp.LlamaServerOptions `json:",inline"`
|
||||||
@@ -46,10 +48,11 @@ type CreateInstanceOptions struct {
|
|||||||
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
||||||
// First, unmarshal into a temporary struct without the embedded type
|
// First, unmarshal into a temporary struct without the embedded type
|
||||||
type tempCreateOptions struct {
|
type tempCreateOptions struct {
|
||||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||||
RestartDelay *int `json:"restart_delay,omitempty"`
|
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||||
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||||
|
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var temp tempCreateOptions
|
var temp tempCreateOptions
|
||||||
@@ -61,6 +64,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
|||||||
c.AutoRestart = temp.AutoRestart
|
c.AutoRestart = temp.AutoRestart
|
||||||
c.MaxRestarts = temp.MaxRestarts
|
c.MaxRestarts = temp.MaxRestarts
|
||||||
c.RestartDelay = temp.RestartDelay
|
c.RestartDelay = temp.RestartDelay
|
||||||
|
c.OnDemandStart = temp.OnDemandStart
|
||||||
c.IdleTimeout = temp.IdleTimeout
|
c.IdleTimeout = temp.IdleTimeout
|
||||||
|
|
||||||
// Now unmarshal the embedded LlamaServerOptions
|
// Now unmarshal the embedded LlamaServerOptions
|
||||||
@@ -138,6 +142,11 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
|
|||||||
optionsCopy.RestartDelay = &restartDelay
|
optionsCopy.RestartDelay = &restartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.OnDemandStart != nil {
|
||||||
|
onDemandStart := *options.OnDemandStart
|
||||||
|
optionsCopy.OnDemandStart = &onDemandStart
|
||||||
|
}
|
||||||
|
|
||||||
if options.IdleTimeout != nil {
|
if options.IdleTimeout != nil {
|
||||||
idleTimeout := *options.IdleTimeout
|
idleTimeout := *options.IdleTimeout
|
||||||
if idleTimeout < 0 {
|
if idleTimeout < 0 {
|
||||||
@@ -172,6 +181,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
|
|||||||
options.RestartDelay = &defaultRestartDelay
|
options.RestartDelay = &defaultRestartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.OnDemandStart == nil {
|
||||||
|
defaultOnDemandStart := globalSettings.DefaultOnDemandStart
|
||||||
|
options.OnDemandStart = &defaultOnDemandStart
|
||||||
|
}
|
||||||
|
|
||||||
if options.IdleTimeout == nil {
|
if options.IdleTimeout == nil {
|
||||||
defaultIdleTimeout := 0
|
defaultIdleTimeout := 0
|
||||||
options.IdleTimeout = &defaultIdleTimeout
|
options.IdleTimeout = &defaultIdleTimeout
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"net/http"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -143,6 +144,74 @@ func (i *Process) Stop() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Process) WaitForHealthy(timeout int) error {
|
||||||
|
if !i.Running {
|
||||||
|
return fmt.Errorf("instance %s is not running", i.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = 30 // Default to 30 seconds if no timeout is specified
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Get instance options to build the health check URL
|
||||||
|
opts := i.GetOptions()
|
||||||
|
if opts == nil {
|
||||||
|
return fmt.Errorf("instance %s has no options set", i.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the health check URL directly
|
||||||
|
host := opts.Host
|
||||||
|
if host == "" {
|
||||||
|
host = "localhost"
|
||||||
|
}
|
||||||
|
healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
|
||||||
|
|
||||||
|
// Create a dedicated HTTP client for health checks
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: 5 * time.Second, // 5 second timeout per request
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to check health directly
|
||||||
|
checkHealth := func() bool {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
return resp.StatusCode == http.StatusOK
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try immediate check first
|
||||||
|
if checkHealth() {
|
||||||
|
return nil // Instance is healthy
|
||||||
|
}
|
||||||
|
|
||||||
|
// If immediate check failed, start polling
|
||||||
|
ticker := time.NewTicker(1 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
|
||||||
|
case <-ticker.C:
|
||||||
|
if checkHealth() {
|
||||||
|
return nil // Instance is healthy
|
||||||
|
}
|
||||||
|
// Continue polling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (i *Process) monitorProcess() {
|
func (i *Process) monitorProcess() {
|
||||||
defer func() {
|
defer func() {
|
||||||
i.mu.Lock()
|
i.mu.Lock()
|
||||||
|
|||||||
@@ -575,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !inst.Running {
|
if !inst.Running {
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
|
||||||
return
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
proxy, err := inst.GetProxy()
|
proxy, err := inst.GetProxy()
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ export const basicFieldsConfig: Record<string, {
|
|||||||
placeholder: '60',
|
placeholder: '60',
|
||||||
description: 'Time in minutes before instance is considered idle and stopped'
|
description: 'Time in minutes before instance is considered idle and stopped'
|
||||||
},
|
},
|
||||||
|
on_demand_start: {
|
||||||
|
label: 'On-Demand Start',
|
||||||
|
description: 'Start instance upon receiving OpenAI-compatible API request'
|
||||||
|
},
|
||||||
model: {
|
model: {
|
||||||
label: 'Model Path',
|
label: 'Model Path',
|
||||||
placeholder: '/path/to/model.gguf',
|
placeholder: '/path/to/model.gguf',
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
max_restarts: z.number().optional(),
|
max_restarts: z.number().optional(),
|
||||||
restart_delay: z.number().optional(),
|
restart_delay: z.number().optional(),
|
||||||
idle_timeout: z.number().optional(),
|
idle_timeout: z.number().optional(),
|
||||||
|
on_demand_start: z.boolean().optional(),
|
||||||
|
|
||||||
// Common params
|
// Common params
|
||||||
verbose_prompt: z.boolean().optional(),
|
verbose_prompt: z.boolean().optional(),
|
||||||
|
|||||||
Reference in New Issue
Block a user