mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
Merge pull request #22 from lordmathis/feat/timeout
feat: Implement idle instance timeout
This commit is contained in:
25
README.md
25
README.md
@@ -11,7 +11,8 @@
|
|||||||
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||||
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
||||||
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
||||||
⚡ **Persistent State**: Instances survive server restarts
|
⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period
|
||||||
|
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -172,16 +173,17 @@ server:
|
|||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
|
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
|
||||||
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
|
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
|
||||||
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
|
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
|
||||||
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
|
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
|
||||||
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
|
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
|
||||||
max_instances: -1 # Maximum instances (-1 = unlimited)
|
max_instances: -1 # Maximum instances (-1 = unlimited)
|
||||||
llama_executable: "llama-server" # Path to llama-server executable
|
llama_executable: "llama-server" # Path to llama-server executable
|
||||||
default_auto_restart: true # Default auto-restart setting
|
default_auto_restart: true # Default auto-restart setting
|
||||||
default_max_restarts: 3 # Default maximum restart attempts
|
default_max_restarts: 3 # Default maximum restart attempts
|
||||||
default_restart_delay: 5 # Default restart delay in seconds
|
default_restart_delay: 5 # Default restart delay in seconds
|
||||||
|
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
||||||
```
|
```
|
||||||
|
|
||||||
**Environment Variables:**
|
**Environment Variables:**
|
||||||
@@ -195,6 +197,7 @@ instances:
|
|||||||
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
||||||
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
||||||
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
||||||
|
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
||||||
|
|
||||||
#### Authentication Configuration
|
#### Authentication Configuration
|
||||||
|
|
||||||
|
|||||||
@@ -66,6 +66,9 @@ type InstancesConfig struct {
|
|||||||
|
|
||||||
// Default restart delay for new instances (in seconds)
|
// Default restart delay for new instances (in seconds)
|
||||||
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
||||||
|
|
||||||
|
// Interval for checking instance timeouts (in minutes)
|
||||||
|
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AuthConfig contains authentication settings
|
// AuthConfig contains authentication settings
|
||||||
@@ -98,16 +101,17 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
EnableSwagger: false,
|
EnableSwagger: false,
|
||||||
},
|
},
|
||||||
Instances: InstancesConfig{
|
Instances: InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
DataDir: getDefaultDataDirectory(),
|
DataDir: getDefaultDataDirectory(),
|
||||||
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
||||||
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
||||||
AutoCreateDirs: true,
|
AutoCreateDirs: true,
|
||||||
MaxInstances: -1, // -1 means unlimited
|
MaxInstances: -1, // -1 means unlimited
|
||||||
LlamaExecutable: "llama-server",
|
LlamaExecutable: "llama-server",
|
||||||
DefaultAutoRestart: true,
|
DefaultAutoRestart: true,
|
||||||
DefaultMaxRestarts: 3,
|
DefaultMaxRestarts: 3,
|
||||||
DefaultRestartDelay: 5,
|
DefaultRestartDelay: 5,
|
||||||
|
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
|
||||||
},
|
},
|
||||||
Auth: AuthConfig{
|
Auth: AuthConfig{
|
||||||
RequireInferenceAuth: true,
|
RequireInferenceAuth: true,
|
||||||
@@ -217,6 +221,11 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
cfg.Instances.DefaultRestartDelay = seconds
|
cfg.Instances.DefaultRestartDelay = seconds
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
|
||||||
|
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
|
||||||
|
cfg.Instances.TimeoutCheckInterval = minutes
|
||||||
|
}
|
||||||
|
}
|
||||||
// Auth config
|
// Auth config
|
||||||
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
|
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
|
||||||
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
|
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
|
||||||
|
|||||||
@@ -13,16 +13,30 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TimeProvider interface allows for testing with mock time
|
||||||
|
type TimeProvider interface {
|
||||||
|
Now() time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// realTimeProvider implements TimeProvider using the actual time
|
||||||
|
type realTimeProvider struct{}
|
||||||
|
|
||||||
|
func (realTimeProvider) Now() time.Time {
|
||||||
|
return time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
type CreateInstanceOptions struct {
|
type CreateInstanceOptions struct {
|
||||||
// Auto restart
|
// Auto restart
|
||||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||||
// RestartDelay duration in seconds
|
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||||
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
|
// Timeout
|
||||||
|
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||||
|
// LlamaServerOptions contains the options for the llama server
|
||||||
llamacpp.LlamaServerOptions `json:",inline"`
|
llamacpp.LlamaServerOptions `json:",inline"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,7 +48,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
|||||||
type tempCreateOptions struct {
|
type tempCreateOptions struct {
|
||||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||||
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
|
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||||
|
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var temp tempCreateOptions
|
var temp tempCreateOptions
|
||||||
@@ -46,6 +61,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
|||||||
c.AutoRestart = temp.AutoRestart
|
c.AutoRestart = temp.AutoRestart
|
||||||
c.MaxRestarts = temp.MaxRestarts
|
c.MaxRestarts = temp.MaxRestarts
|
||||||
c.RestartDelay = temp.RestartDelay
|
c.RestartDelay = temp.RestartDelay
|
||||||
|
c.IdleTimeout = temp.IdleTimeout
|
||||||
|
|
||||||
// Now unmarshal the embedded LlamaServerOptions
|
// Now unmarshal the embedded LlamaServerOptions
|
||||||
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
|
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
|
||||||
@@ -83,6 +99,10 @@ type Process struct {
|
|||||||
// Restart control
|
// Restart control
|
||||||
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
|
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
|
||||||
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
|
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
|
||||||
|
|
||||||
|
// Timeout management
|
||||||
|
lastRequestTime atomic.Int64 // Unix timestamp of last request
|
||||||
|
timeProvider TimeProvider `json:"-"` // Time provider for testing
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateAndCopyOptions validates and creates a deep copy of the provided options
|
// validateAndCopyOptions validates and creates a deep copy of the provided options
|
||||||
@@ -117,6 +137,15 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
|
|||||||
}
|
}
|
||||||
optionsCopy.RestartDelay = &restartDelay
|
optionsCopy.RestartDelay = &restartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.IdleTimeout != nil {
|
||||||
|
idleTimeout := *options.IdleTimeout
|
||||||
|
if idleTimeout < 0 {
|
||||||
|
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
|
||||||
|
idleTimeout = 0
|
||||||
|
}
|
||||||
|
optionsCopy.IdleTimeout = &idleTimeout
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return optionsCopy
|
return optionsCopy
|
||||||
@@ -142,6 +171,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
|
|||||||
defaultRestartDelay := globalSettings.DefaultRestartDelay
|
defaultRestartDelay := globalSettings.DefaultRestartDelay
|
||||||
options.RestartDelay = &defaultRestartDelay
|
options.RestartDelay = &defaultRestartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.IdleTimeout == nil {
|
||||||
|
defaultIdleTimeout := 0
|
||||||
|
options.IdleTimeout = &defaultIdleTimeout
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstance creates a new instance with the given name, log path, and options
|
// NewInstance creates a new instance with the given name, log path, and options
|
||||||
@@ -158,10 +192,8 @@ func NewInstance(name string, globalSettings *config.InstancesConfig, options *C
|
|||||||
options: optionsCopy,
|
options: optionsCopy,
|
||||||
globalSettings: globalSettings,
|
globalSettings: globalSettings,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
|
timeProvider: realTimeProvider{},
|
||||||
Running: false,
|
Created: time.Now().Unix(),
|
||||||
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,6 +221,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
|
|||||||
i.proxy = nil
|
i.proxy = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTimeProvider sets a custom time provider for testing
|
||||||
|
func (i *Process) SetTimeProvider(tp TimeProvider) {
|
||||||
|
i.timeProvider = tp
|
||||||
|
}
|
||||||
|
|
||||||
// GetProxy returns the reverse proxy for this instance, creating it if needed
|
// GetProxy returns the reverse proxy for this instance, creating it if needed
|
||||||
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||||
i.mu.Lock()
|
i.mu.Lock()
|
||||||
|
|||||||
@@ -91,38 +91,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
|
|
||||||
globalSettings := &config.InstancesConfig{
|
|
||||||
LogsDir: "/tmp/test",
|
|
||||||
DefaultAutoRestart: true,
|
|
||||||
DefaultMaxRestarts: 3,
|
|
||||||
DefaultRestartDelay: 5,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test with invalid negative values
|
|
||||||
invalidMaxRestarts := -5
|
|
||||||
invalidRestartDelay := -10
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
MaxRestarts: &invalidMaxRestarts,
|
|
||||||
RestartDelay: &invalidRestartDelay,
|
|
||||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
|
||||||
Model: "/path/to/model.gguf",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
instance := instance.NewInstance("test-instance", globalSettings, options)
|
|
||||||
opts := instance.GetOptions()
|
|
||||||
|
|
||||||
// Check that negative values were corrected to 0
|
|
||||||
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
|
|
||||||
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
|
|
||||||
}
|
|
||||||
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
|
|
||||||
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSetOptions(t *testing.T) {
|
func TestSetOptions(t *testing.T) {
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
LogsDir: "/tmp/test",
|
LogsDir: "/tmp/test",
|
||||||
@@ -164,33 +132,6 @@ func TestSetOptions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSetOptions_NilOptions(t *testing.T) {
|
|
||||||
globalSettings := &config.InstancesConfig{
|
|
||||||
LogsDir: "/tmp/test",
|
|
||||||
DefaultAutoRestart: true,
|
|
||||||
DefaultMaxRestarts: 3,
|
|
||||||
DefaultRestartDelay: 5,
|
|
||||||
}
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
|
||||||
Model: "/path/to/model.gguf",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
instance := instance.NewInstance("test-instance", globalSettings, options)
|
|
||||||
originalOptions := instance.GetOptions()
|
|
||||||
|
|
||||||
// Try to set nil options
|
|
||||||
instance.SetOptions(nil)
|
|
||||||
|
|
||||||
// Options should remain unchanged
|
|
||||||
currentOptions := instance.GetOptions()
|
|
||||||
if currentOptions.Model != originalOptions.Model {
|
|
||||||
t.Error("Options should not change when setting nil options")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetProxy(t *testing.T) {
|
func TestGetProxy(t *testing.T) {
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
LogsDir: "/tmp/test",
|
LogsDir: "/tmp/test",
|
||||||
@@ -317,58 +258,6 @@ func TestUnmarshalJSON(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
|
|
||||||
jsonData := `{
|
|
||||||
"name": "test-instance",
|
|
||||||
"running": false,
|
|
||||||
"options": {
|
|
||||||
"model": "/path/to/model.gguf"
|
|
||||||
}
|
|
||||||
}`
|
|
||||||
|
|
||||||
var inst instance.Process
|
|
||||||
err := json.Unmarshal([]byte(jsonData), &inst)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("JSON unmarshal failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
opts := inst.GetOptions()
|
|
||||||
if opts.Model != "/path/to/model.gguf" {
|
|
||||||
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: Defaults are NOT applied during unmarshaling
|
|
||||||
// They should only be applied by NewInstance or SetOptions
|
|
||||||
if opts.AutoRestart != nil {
|
|
||||||
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUnmarshalJSON_NoOptions(t *testing.T) {
|
|
||||||
jsonData := `{
|
|
||||||
"name": "test-instance",
|
|
||||||
"running": false
|
|
||||||
}`
|
|
||||||
|
|
||||||
var inst instance.Process
|
|
||||||
err := json.Unmarshal([]byte(jsonData), &inst)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("JSON unmarshal failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if inst.Name != "test-instance" {
|
|
||||||
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
|
|
||||||
}
|
|
||||||
if inst.Running {
|
|
||||||
t.Error("Expected running to be false")
|
|
||||||
}
|
|
||||||
|
|
||||||
opts := inst.GetOptions()
|
|
||||||
if opts != nil {
|
|
||||||
t.Error("Expected options to be nil when not provided in JSON")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCreateInstanceOptionsValidation(t *testing.T) {
|
func TestCreateInstanceOptionsValidation(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@@ -377,13 +266,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
|||||||
expectedMax int
|
expectedMax int
|
||||||
expectedDelay int
|
expectedDelay int
|
||||||
}{
|
}{
|
||||||
{
|
|
||||||
name: "nil values",
|
|
||||||
maxRestarts: nil,
|
|
||||||
restartDelay: nil,
|
|
||||||
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
|
|
||||||
expectedDelay: 0,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "valid positive values",
|
name: "valid positive values",
|
||||||
maxRestarts: testutil.IntPtr(10),
|
maxRestarts: testutil.IntPtr(10),
|
||||||
@@ -424,20 +306,16 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
|||||||
instance := instance.NewInstance("test", globalSettings, options)
|
instance := instance.NewInstance("test", globalSettings, options)
|
||||||
opts := instance.GetOptions()
|
opts := instance.GetOptions()
|
||||||
|
|
||||||
if tt.maxRestarts != nil {
|
if opts.MaxRestarts == nil {
|
||||||
if opts.MaxRestarts == nil {
|
t.Error("Expected MaxRestarts to be set")
|
||||||
t.Error("Expected MaxRestarts to be set")
|
} else if *opts.MaxRestarts != tt.expectedMax {
|
||||||
} else if *opts.MaxRestarts != tt.expectedMax {
|
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
|
||||||
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if tt.restartDelay != nil {
|
if opts.RestartDelay == nil {
|
||||||
if opts.RestartDelay == nil {
|
t.Error("Expected RestartDelay to be set")
|
||||||
t.Error("Expected RestartDelay to be set")
|
} else if *opts.RestartDelay != tt.expectedDelay {
|
||||||
} else if *opts.RestartDelay != tt.expectedDelay {
|
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
|
||||||
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,6 +30,9 @@ func (i *Process) Start() error {
|
|||||||
i.restarts = 0
|
i.restarts = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize last request time to current time when starting
|
||||||
|
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
|
||||||
|
|
||||||
// Create log files
|
// Create log files
|
||||||
if err := i.logger.Create(); err != nil {
|
if err := i.logger.Create(); err != nil {
|
||||||
return fmt.Errorf("failed to create log files: %w", err)
|
return fmt.Errorf("failed to create log files: %w", err)
|
||||||
|
|||||||
28
pkg/instance/timeout.go
Normal file
28
pkg/instance/timeout.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
package instance
|
||||||
|
|
||||||
|
// UpdateLastRequestTime updates the last request access time for the instance via proxy
|
||||||
|
func (i *Process) UpdateLastRequestTime() {
|
||||||
|
i.mu.Lock()
|
||||||
|
defer i.mu.Unlock()
|
||||||
|
|
||||||
|
lastRequestTime := i.timeProvider.Now().Unix()
|
||||||
|
i.lastRequestTime.Store(lastRequestTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *Process) ShouldTimeout() bool {
|
||||||
|
i.mu.RLock()
|
||||||
|
defer i.mu.RUnlock()
|
||||||
|
|
||||||
|
if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the last request time exceeds the idle timeout
|
||||||
|
lastRequest := i.lastRequestTime.Load()
|
||||||
|
idleTimeoutMinutes := *i.options.IdleTimeout
|
||||||
|
|
||||||
|
// Convert timeout from minutes to seconds for comparison
|
||||||
|
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
|
||||||
|
|
||||||
|
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
|
||||||
|
}
|
||||||
195
pkg/instance/timeout_test.go
Normal file
195
pkg/instance/timeout_test.go
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
package instance_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"llamactl/pkg/backends/llamacpp"
|
||||||
|
"llamactl/pkg/config"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"llamactl/pkg/testutil"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MockTimeProvider implements TimeProvider for testing
|
||||||
|
type MockTimeProvider struct {
|
||||||
|
currentTime atomic.Int64 // Unix timestamp
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
|
||||||
|
m := &MockTimeProvider{}
|
||||||
|
m.currentTime.Store(t.Unix())
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockTimeProvider) Now() time.Time {
|
||||||
|
return time.Unix(m.currentTime.Load(), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockTimeProvider) SetTime(t time.Time) {
|
||||||
|
m.currentTime.Store(t.Unix())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timeout-related tests
|
||||||
|
|
||||||
|
func TestUpdateLastRequestTime(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
|
||||||
|
// Test that UpdateLastRequestTime doesn't panic
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_NotRunning(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
idleTimeout := 1 // 1 minute
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: &idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
|
||||||
|
// Instance is not running, should not timeout regardless of configuration
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
t.Error("Non-running instance should never timeout")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
idleTimeout *int
|
||||||
|
}{
|
||||||
|
{"nil timeout", nil},
|
||||||
|
{"zero timeout", testutil.IntPtr(0)},
|
||||||
|
{"negative timeout", testutil.IntPtr(-5)},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: tt.idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
// Simulate running state
|
||||||
|
inst.Running = true
|
||||||
|
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
t.Errorf("Instance with %s should not timeout", tt.name)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
idleTimeout := 5 // 5 minutes
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: &idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
inst.Running = true
|
||||||
|
|
||||||
|
// Update last request time to now
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Should not timeout immediately
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
t.Error("Instance should not timeout when last request was recent")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
idleTimeout := 1 // 1 minute
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: &idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
inst.Running = true
|
||||||
|
|
||||||
|
// Use MockTimeProvider to simulate old last request time
|
||||||
|
mockTime := NewMockTimeProvider(time.Now())
|
||||||
|
inst.SetTimeProvider(mockTime)
|
||||||
|
|
||||||
|
// Set last request time to now
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Advance time by 2 minutes (exceeds 1 minute timeout)
|
||||||
|
mockTime.SetTime(time.Now().Add(2 * time.Minute))
|
||||||
|
|
||||||
|
if !inst.ShouldTimeout() {
|
||||||
|
t.Error("Instance should timeout when last request exceeds idle timeout")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTimeoutConfiguration_Validation(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
inputTimeout *int
|
||||||
|
expectedTimeout int
|
||||||
|
}{
|
||||||
|
{"default value when nil", nil, 0},
|
||||||
|
{"positive value", testutil.IntPtr(10), 10},
|
||||||
|
{"zero value", testutil.IntPtr(0), 0},
|
||||||
|
{"negative value gets corrected", testutil.IntPtr(-5), 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: tt.inputTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
opts := inst.GetOptions()
|
||||||
|
|
||||||
|
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
|
||||||
|
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// InstanceManager defines the interface for managing instances of the llama server.
|
// InstanceManager defines the interface for managing instances of the llama server.
|
||||||
@@ -31,20 +32,48 @@ type instanceManager struct {
|
|||||||
instances map[string]*instance.Process
|
instances map[string]*instance.Process
|
||||||
ports map[int]bool
|
ports map[int]bool
|
||||||
instancesConfig config.InstancesConfig
|
instancesConfig config.InstancesConfig
|
||||||
|
|
||||||
|
// Timeout checker
|
||||||
|
timeoutChecker *time.Ticker
|
||||||
|
shutdownChan chan struct{}
|
||||||
|
shutdownDone chan struct{}
|
||||||
|
isShutdown bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstanceManager creates a new instance of InstanceManager.
|
// NewInstanceManager creates a new instance of InstanceManager.
|
||||||
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
|
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
|
||||||
|
if instancesConfig.TimeoutCheckInterval <= 0 {
|
||||||
|
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
||||||
|
}
|
||||||
im := &instanceManager{
|
im := &instanceManager{
|
||||||
instances: make(map[string]*instance.Process),
|
instances: make(map[string]*instance.Process),
|
||||||
ports: make(map[int]bool),
|
ports: make(map[int]bool),
|
||||||
instancesConfig: instancesConfig,
|
instancesConfig: instancesConfig,
|
||||||
|
|
||||||
|
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
||||||
|
shutdownChan: make(chan struct{}),
|
||||||
|
shutdownDone: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load existing instances from disk
|
// Load existing instances from disk
|
||||||
if err := im.loadInstances(); err != nil {
|
if err := im.loadInstances(); err != nil {
|
||||||
log.Printf("Error loading instances: %v", err)
|
log.Printf("Error loading instances: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start the timeout checker goroutine after initialization is complete
|
||||||
|
go func() {
|
||||||
|
defer close(im.shutdownDone)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-im.timeoutChecker.C:
|
||||||
|
im.checkAllTimeouts()
|
||||||
|
case <-im.shutdownChan:
|
||||||
|
return // Exit goroutine on shutdown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
return im
|
return im
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,6 +123,27 @@ func (im *instanceManager) Shutdown() {
|
|||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
|
// Check if already shutdown
|
||||||
|
if im.isShutdown {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
im.isShutdown = true
|
||||||
|
|
||||||
|
// Signal the timeout checker to stop
|
||||||
|
close(im.shutdownChan)
|
||||||
|
|
||||||
|
// Release lock temporarily to wait for goroutine
|
||||||
|
im.mu.Unlock()
|
||||||
|
// Wait for the timeout checker goroutine to actually stop
|
||||||
|
<-im.shutdownDone
|
||||||
|
// Reacquire lock
|
||||||
|
im.mu.Lock()
|
||||||
|
|
||||||
|
// Now stop the ticker
|
||||||
|
if im.timeoutChecker != nil {
|
||||||
|
im.timeoutChecker.Stop()
|
||||||
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(im.instances))
|
wg.Add(len(im.instances))
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -27,10 +27,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
return nil, fmt.Errorf("instance options cannot be nil")
|
return nil, fmt.Errorf("instance options cannot be nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
|
||||||
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
|
||||||
}
|
|
||||||
|
|
||||||
name, err := validation.ValidateInstanceName(name)
|
name, err := validation.ValidateInstanceName(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -44,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
|
// Check max instances limit after acquiring the lock
|
||||||
|
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
||||||
|
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
||||||
|
}
|
||||||
|
|
||||||
// Check if instance with this name already exists
|
// Check if instance with this name already exists
|
||||||
if im.instances[name] != nil {
|
if im.instances[name] != nil {
|
||||||
return nil, fmt.Errorf("instance with name %s already exists", name)
|
return nil, fmt.Errorf("instance with name %s already exists", name)
|
||||||
|
|||||||
26
pkg/manager/timeout.go
Normal file
26
pkg/manager/timeout.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package manager
|
||||||
|
|
||||||
|
import "log"
|
||||||
|
|
||||||
|
func (im *instanceManager) checkAllTimeouts() {
|
||||||
|
im.mu.RLock()
|
||||||
|
var timeoutInstances []string
|
||||||
|
|
||||||
|
// Identify instances that should timeout
|
||||||
|
for _, inst := range im.instances {
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
timeoutInstances = append(timeoutInstances, inst.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
im.mu.RUnlock() // Release read lock before calling StopInstance
|
||||||
|
|
||||||
|
// Stop the timed-out instances
|
||||||
|
for _, name := range timeoutInstances {
|
||||||
|
log.Printf("Instance %s has timed out, stopping it", name)
|
||||||
|
if _, err := im.StopInstance(name); err != nil {
|
||||||
|
log.Printf("Error stopping instance %s: %v", name, err)
|
||||||
|
} else {
|
||||||
|
log.Printf("Instance %s stopped successfully", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -472,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
|||||||
proxyPath = "/" + proxyPath
|
proxyPath = "/" + proxyPath
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
// Modify the request to remove the proxy prefix
|
// Modify the request to remove the proxy prefix
|
||||||
originalPath := r.URL.Path
|
originalPath := r.URL.Path
|
||||||
r.URL.Path = proxyPath
|
r.URL.Path = proxyPath
|
||||||
@@ -582,6 +585,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
// Recreate the request body from the bytes we read
|
// Recreate the request body from the bytes we read
|
||||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
r.ContentLength = int64(len(bodyBytes))
|
r.ContentLength = int64(len(bodyBytes))
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
|
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
|
||||||
|
|
||||||
// Only define the basic fields we want to show by default
|
// Only define the basic fields we want to show by default
|
||||||
export const basicFieldsConfig: Record<string, {
|
export const basicFieldsConfig: Record<string, {
|
||||||
label: string
|
label: string
|
||||||
description?: string
|
description?: string
|
||||||
placeholder?: string
|
placeholder?: string
|
||||||
@@ -21,6 +21,11 @@ export const basicFieldsConfig: Record<string, {
|
|||||||
placeholder: '5',
|
placeholder: '5',
|
||||||
description: 'Delay in seconds before attempting restart'
|
description: 'Delay in seconds before attempting restart'
|
||||||
},
|
},
|
||||||
|
idle_timeout: {
|
||||||
|
label: 'Idle Timeout (minutes)',
|
||||||
|
placeholder: '60',
|
||||||
|
description: 'Time in minutes before instance is considered idle and stopped'
|
||||||
|
},
|
||||||
model: {
|
model: {
|
||||||
label: 'Model Path',
|
label: 'Model Path',
|
||||||
placeholder: '/path/to/model.gguf',
|
placeholder: '/path/to/model.gguf',
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
auto_restart: z.boolean().optional(),
|
auto_restart: z.boolean().optional(),
|
||||||
max_restarts: z.number().optional(),
|
max_restarts: z.number().optional(),
|
||||||
restart_delay: z.number().optional(),
|
restart_delay: z.number().optional(),
|
||||||
|
idle_timeout: z.number().optional(),
|
||||||
|
|
||||||
// Common params
|
// Common params
|
||||||
verbose_prompt: z.boolean().optional(),
|
verbose_prompt: z.boolean().optional(),
|
||||||
|
|||||||
Reference in New Issue
Block a user