Merge pull request #22 from lordmathis/feat/timeout

feat: Implement idle instance timeout
2025-12-24 10:04:26 +00:00 · 2025-08-20 13:34:38 +02:00
parent eb9599f26a 7194e1fdd1
commit 651c8b9b2c
14 changed files with 666 additions and 807 deletions
--- a/README.md
+++ b/README.md
@@ -11,7 +11,8 @@
 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
-⚡ **Persistent State**: Instances survive server restarts
+⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  
 ![Dashboard Screenshot](docs/images/screenshot.png)
@@ -172,16 +173,17 @@ server:
 ```yaml
 instances:
-  port_range: [8000, 9000]           # Port range for instances (default: [8000, 9000])
+  port_range: [8000, 9000]                          # Port range for instances (default: [8000, 9000])
-  data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
+  data_dir: "~/.local/share/llamactl"               # Directory for all llamactl data (default varies by OS)
-  configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
+  configs_dir: "~/.local/share/llamactl/instances"  # Directory for instance configs (default: data_dir/instances)
-  logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
+  logs_dir: "~/.local/share/llamactl/logs"          # Directory for instance logs (default: data_dir/logs)
-  auto_create_dirs: true             # Automatically create data/config/logs directories (default: true)
+  auto_create_dirs: true                            # Automatically create data/config/logs directories (default: true)
-  max_instances: -1                  # Maximum instances (-1 = unlimited)
+  max_instances: -1                                 # Maximum instances (-1 = unlimited)
-  llama_executable: "llama-server"   # Path to llama-server executable
+  llama_executable: "llama-server"                  # Path to llama-server executable
-  default_auto_restart: true         # Default auto-restart setting
+  default_auto_restart: true                        # Default auto-restart setting
-  default_max_restarts: 3            # Default maximum restart attempts
+  default_max_restarts: 3                           # Default maximum restart attempts
-  default_restart_delay: 5           # Default restart delay in seconds
+  default_restart_delay: 5                          # Default restart delay in seconds
  timeout_check_interval: 5                         # Default instance timeout check interval in minutes
 ```
 **Environment Variables:**
@@ -195,6 +197,7 @@ instances:
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
 - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
 #### Authentication Configuration
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -66,6 +66,9 @@ type InstancesConfig struct {
 	// Default restart delay for new instances (in seconds)
 	DefaultRestartDelay int `yaml:"default_restart_delay"`
 	// Interval for checking instance timeouts (in minutes)
 	TimeoutCheckInterval int `yaml:"timeout_check_interval"`
 }
 // AuthConfig contains authentication settings
@@ -98,16 +101,17 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			EnableSwagger:  false,
 		},
 		Instances: InstancesConfig{
-			PortRange:           [2]int{8000, 9000},
+			PortRange:            [2]int{8000, 9000},
-			DataDir:             getDefaultDataDirectory(),
+			DataDir:              getDefaultDataDirectory(),
-			InstancesDir:        filepath.Join(getDefaultDataDirectory(), "instances"),
+			InstancesDir:         filepath.Join(getDefaultDataDirectory(), "instances"),
-			LogsDir:             filepath.Join(getDefaultDataDirectory(), "logs"),
+			LogsDir:              filepath.Join(getDefaultDataDirectory(), "logs"),
-			AutoCreateDirs:      true,
+			AutoCreateDirs:       true,
-			MaxInstances:        -1, // -1 means unlimited
+			MaxInstances:         -1, // -1 means unlimited
-			LlamaExecutable:     "llama-server",
+			LlamaExecutable:      "llama-server",
-			DefaultAutoRestart:  true,
+			DefaultAutoRestart:   true,
-			DefaultMaxRestarts:  3,
+			DefaultMaxRestarts:   3,
-			DefaultRestartDelay: 5,
+			DefaultRestartDelay:  5,
 			TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
 		},
 		Auth: AuthConfig{
 			RequireInferenceAuth:  true,
@@ -217,6 +221,11 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.DefaultRestartDelay = seconds
 		}
 	}
 	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
 		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
 			cfg.Instances.TimeoutCheckInterval = minutes
 		}
 	}
 	// Auth config
 	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
 		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -13,16 +13,30 @@ import (
 	"net/url"
 	"os/exec"
 	"sync"
 	"sync/atomic"
 	"time"
 )
 // TimeProvider interface allows for testing with mock time
 type TimeProvider interface {
 	Now() time.Time
 }
 // realTimeProvider implements TimeProvider using the actual time
 type realTimeProvider struct{}
 func (realTimeProvider) Now() time.Time {
 	return time.Now()
 }
 type CreateInstanceOptions struct {
 	// Auto restart
-	AutoRestart *bool `json:"auto_restart,omitempty"`
+	AutoRestart  *bool `json:"auto_restart,omitempty"`
-	MaxRestarts *int  `json:"max_restarts,omitempty"`
+	MaxRestarts  *int  `json:"max_restarts,omitempty"`
-	// RestartDelay duration in seconds
+	RestartDelay *int  `json:"restart_delay,omitempty"`
-	RestartDelay *int `json:"restart_delay_seconds,omitempty"`
+	// Timeout
-
+	IdleTimeout *int `json:"idle_timeout,omitempty"`
 	// LlamaServerOptions contains the options for the llama server
 	llamacpp.LlamaServerOptions `json:",inline"`
 }
@@ -34,7 +48,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	type tempCreateOptions struct {
 		AutoRestart  *bool `json:"auto_restart,omitempty"`
 		MaxRestarts  *int  `json:"max_restarts,omitempty"`
-		RestartDelay *int  `json:"restart_delay_seconds,omitempty"`
+		RestartDelay *int  `json:"restart_delay,omitempty"`
 		IdleTimeout  *int  `json:"idle_timeout,omitempty"`
 	}
 	var temp tempCreateOptions
@@ -46,6 +61,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	c.AutoRestart = temp.AutoRestart
 	c.MaxRestarts = temp.MaxRestarts
 	c.RestartDelay = temp.RestartDelay
 	c.IdleTimeout = temp.IdleTimeout
 	// Now unmarshal the embedded LlamaServerOptions
 	if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
@@ -83,6 +99,10 @@ type Process struct {
 	// Restart control
 	restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
 	monitorDone   chan struct{}      `json:"-"` // Channel to signal monitor goroutine completion
 	// Timeout management
 	lastRequestTime atomic.Int64 // Unix timestamp of last request
 	timeProvider    TimeProvider `json:"-"` // Time provider for testing
 }
 // validateAndCopyOptions validates and creates a deep copy of the provided options
@@ -117,6 +137,15 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
 			}
 			optionsCopy.RestartDelay = &restartDelay
 		}
 		if options.IdleTimeout != nil {
 			idleTimeout := *options.IdleTimeout
 			if idleTimeout < 0 {
 				log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
 				idleTimeout = 0
 			}
 			optionsCopy.IdleTimeout = &idleTimeout
 		}
 	}
 	return optionsCopy
@@ -142,6 +171,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
 		defaultRestartDelay := globalSettings.DefaultRestartDelay
 		options.RestartDelay = &defaultRestartDelay
 	}
 	if options.IdleTimeout == nil {
 		defaultIdleTimeout := 0
 		options.IdleTimeout = &defaultIdleTimeout
 	}
 }
 // NewInstance creates a new instance with the given name, log path, and options
@@ -158,10 +192,8 @@ func NewInstance(name string, globalSettings *config.InstancesConfig, options *C
 		options:        optionsCopy,
 		globalSettings: globalSettings,
 		logger:         logger,
-
+		timeProvider:   realTimeProvider{},
-		Running: false,
+		Created:        time.Now().Unix(),
 		Created: time.Now().Unix(),
 	}
 }
@@ -189,6 +221,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
 	i.proxy = nil
 }
 // SetTimeProvider sets a custom time provider for testing
 func (i *Process) SetTimeProvider(tp TimeProvider) {
 	i.timeProvider = tp
 }
 // GetProxy returns the reverse proxy for this instance, creating it if needed
 func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 	i.mu.Lock()
--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -91,38 +91,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
 	}
 }
 func TestNewInstance_ValidationAndDefaults(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
 		DefaultMaxRestarts:  3,
 		DefaultRestartDelay: 5,
 	}
 	// Test with invalid negative values
 	invalidMaxRestarts := -5
 	invalidRestartDelay := -10
 	options := &instance.CreateInstanceOptions{
 		MaxRestarts:  &invalidMaxRestarts,
 		RestartDelay: &invalidRestartDelay,
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	instance := instance.NewInstance("test-instance", globalSettings, options)
 	opts := instance.GetOptions()
 	// Check that negative values were corrected to 0
 	if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
 		t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
 	}
 	if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
 		t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
 	}
 }
 func TestSetOptions(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
@@ -164,33 +132,6 @@ func TestSetOptions(t *testing.T) {
 	}
 }
 func TestSetOptions_NilOptions(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
 		DefaultMaxRestarts:  3,
 		DefaultRestartDelay: 5,
 	}
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	instance := instance.NewInstance("test-instance", globalSettings, options)
 	originalOptions := instance.GetOptions()
 	// Try to set nil options
 	instance.SetOptions(nil)
 	// Options should remain unchanged
 	currentOptions := instance.GetOptions()
 	if currentOptions.Model != originalOptions.Model {
 		t.Error("Options should not change when setting nil options")
 	}
 }
 func TestGetProxy(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
@@ -317,58 +258,6 @@ func TestUnmarshalJSON(t *testing.T) {
 	}
 }
 func TestUnmarshalJSON_PartialOptions(t *testing.T) {
 	jsonData := `{
 		"name": "test-instance",
 		"running": false,
 		"options": {
 			"model": "/path/to/model.gguf"
 		}
 	}`
 	var inst instance.Process
 	err := json.Unmarshal([]byte(jsonData), &inst)
 	if err != nil {
 		t.Fatalf("JSON unmarshal failed: %v", err)
 	}
 	opts := inst.GetOptions()
 	if opts.Model != "/path/to/model.gguf" {
 		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
 	}
 	// Note: Defaults are NOT applied during unmarshaling
 	// They should only be applied by NewInstance or SetOptions
 	if opts.AutoRestart != nil {
 		t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
 	}
 }
 func TestUnmarshalJSON_NoOptions(t *testing.T) {
 	jsonData := `{
 		"name": "test-instance",
 		"running": false
 	}`
 	var inst instance.Process
 	err := json.Unmarshal([]byte(jsonData), &inst)
 	if err != nil {
 		t.Fatalf("JSON unmarshal failed: %v", err)
 	}
 	if inst.Name != "test-instance" {
 		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
 	}
 	if inst.Running {
 		t.Error("Expected running to be false")
 	}
 	opts := inst.GetOptions()
 	if opts != nil {
 		t.Error("Expected options to be nil when not provided in JSON")
 	}
 }
 func TestCreateInstanceOptionsValidation(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -377,13 +266,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 		expectedMax   int
 		expectedDelay int
 	}{
 		{
 			name:          "nil values",
 			maxRestarts:   nil,
 			restartDelay:  nil,
 			expectedMax:   0, // Should remain nil, but we can't easily test nil in this structure
 			expectedDelay: 0,
 		},
 		{
 			name:          "valid positive values",
 			maxRestarts:   testutil.IntPtr(10),
@@ -424,20 +306,16 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 			instance := instance.NewInstance("test", globalSettings, options)
 			opts := instance.GetOptions()
-			if tt.maxRestarts != nil {
+			if opts.MaxRestarts == nil {
-				if opts.MaxRestarts == nil {
+				t.Error("Expected MaxRestarts to be set")
-					t.Error("Expected MaxRestarts to be set")
+			} else if *opts.MaxRestarts != tt.expectedMax {
-				} else if *opts.MaxRestarts != tt.expectedMax {
+				t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
 					t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
 				}
 			}
-			if tt.restartDelay != nil {
+			if opts.RestartDelay == nil {
-				if opts.RestartDelay == nil {
+				t.Error("Expected RestartDelay to be set")
-					t.Error("Expected RestartDelay to be set")
+			} else if *opts.RestartDelay != tt.expectedDelay {
-				} else if *opts.RestartDelay != tt.expectedDelay {
+				t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
 					t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
 				}
 			}
 		})
 	}
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -30,6 +30,9 @@ func (i *Process) Start() error {
 		i.restarts = 0
 	}
 	// Initialize last request time to current time when starting
 	i.lastRequestTime.Store(i.timeProvider.Now().Unix())
 	// Create log files
 	if err := i.logger.Create(); err != nil {
 		return fmt.Errorf("failed to create log files: %w", err)
--- a/pkg/instance/timeout.go
+++ b/pkg/instance/timeout.go
@@ -0,0 +1,28 @@
 package instance
 // UpdateLastRequestTime updates the last request access time for the instance via proxy
 func (i *Process) UpdateLastRequestTime() {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	lastRequestTime := i.timeProvider.Now().Unix()
 	i.lastRequestTime.Store(lastRequestTime)
 }
 func (i *Process) ShouldTimeout() bool {
 	i.mu.RLock()
 	defer i.mu.RUnlock()
 	if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
 		return false
 	}
 	// Check if the last request time exceeds the idle timeout
 	lastRequest := i.lastRequestTime.Load()
 	idleTimeoutMinutes := *i.options.IdleTimeout
 	// Convert timeout from minutes to seconds for comparison
 	idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
 	return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
 }
--- a/pkg/instance/timeout_test.go
+++ b/pkg/instance/timeout_test.go
@@ -0,0 +1,195 @@
 package instance_test
 import (
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/testutil"
 	"sync/atomic"
 	"testing"
 	"time"
 )
 // MockTimeProvider implements TimeProvider for testing
 type MockTimeProvider struct {
 	currentTime atomic.Int64 // Unix timestamp
 }
 func NewMockTimeProvider(t time.Time) *MockTimeProvider {
 	m := &MockTimeProvider{}
 	m.currentTime.Store(t.Unix())
 	return m
 }
 func (m *MockTimeProvider) Now() time.Time {
 	return time.Unix(m.currentTime.Load(), 0)
 }
 func (m *MockTimeProvider) SetTime(t time.Time) {
 	m.currentTime.Store(t.Unix())
 }
 // Timeout-related tests
 func TestUpdateLastRequestTime(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst := instance.NewInstance("test-instance", globalSettings, options)
 	// Test that UpdateLastRequestTime doesn't panic
 	inst.UpdateLastRequestTime()
 }
 func TestShouldTimeout_NotRunning(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst := instance.NewInstance("test-instance", globalSettings, options)
 	// Instance is not running, should not timeout regardless of configuration
 	if inst.ShouldTimeout() {
 		t.Error("Non-running instance should never timeout")
 	}
 }
 func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	tests := []struct {
 		name        string
 		idleTimeout *int
 	}{
 		{"nil timeout", nil},
 		{"zero timeout", testutil.IntPtr(0)},
 		{"negative timeout", testutil.IntPtr(-5)},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			options := &instance.CreateInstanceOptions{
 				IdleTimeout: tt.idleTimeout,
 				LlamaServerOptions: llamacpp.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 				},
 			}
 			inst := instance.NewInstance("test-instance", globalSettings, options)
 			// Simulate running state
 			inst.Running = true
 			if inst.ShouldTimeout() {
 				t.Errorf("Instance with %s should not timeout", tt.name)
 			}
 		})
 	}
 }
 func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	idleTimeout := 5 // 5 minutes
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst := instance.NewInstance("test-instance", globalSettings, options)
 	inst.Running = true
 	// Update last request time to now
 	inst.UpdateLastRequestTime()
 	// Should not timeout immediately
 	if inst.ShouldTimeout() {
 		t.Error("Instance should not timeout when last request was recent")
 	}
 }
 func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst := instance.NewInstance("test-instance", globalSettings, options)
 	inst.Running = true
 	// Use MockTimeProvider to simulate old last request time
 	mockTime := NewMockTimeProvider(time.Now())
 	inst.SetTimeProvider(mockTime)
 	// Set last request time to now
 	inst.UpdateLastRequestTime()
 	// Advance time by 2 minutes (exceeds 1 minute timeout)
 	mockTime.SetTime(time.Now().Add(2 * time.Minute))
 	if !inst.ShouldTimeout() {
 		t.Error("Instance should timeout when last request exceeds idle timeout")
 	}
 }
 func TestTimeoutConfiguration_Validation(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	tests := []struct {
 		name            string
 		inputTimeout    *int
 		expectedTimeout int
 	}{
 		{"default value when nil", nil, 0},
 		{"positive value", testutil.IntPtr(10), 10},
 		{"zero value", testutil.IntPtr(0), 0},
 		{"negative value gets corrected", testutil.IntPtr(-5), 0},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			options := &instance.CreateInstanceOptions{
 				IdleTimeout: tt.inputTimeout,
 				LlamaServerOptions: llamacpp.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 				},
 			}
 			inst := instance.NewInstance("test-instance", globalSettings, options)
 			opts := inst.GetOptions()
 			if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
 				t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
 			}
 		})
 	}
 }
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -10,6 +10,7 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
 	"time"
 )
 // InstanceManager defines the interface for managing instances of the llama server.
@@ -31,20 +32,48 @@ type instanceManager struct {
 	instances       map[string]*instance.Process
 	ports           map[int]bool
 	instancesConfig config.InstancesConfig
 	// Timeout checker
 	timeoutChecker *time.Ticker
 	shutdownChan   chan struct{}
 	shutdownDone   chan struct{}
 	isShutdown     bool
 }
 // NewInstanceManager creates a new instance of InstanceManager.
 func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
 	if instancesConfig.TimeoutCheckInterval <= 0 {
 		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 	}
 	im := &instanceManager{
 		instances:       make(map[string]*instance.Process),
 		ports:           make(map[int]bool),
 		instancesConfig: instancesConfig,
 		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
 		shutdownChan:   make(chan struct{}),
 		shutdownDone:   make(chan struct{}),
 	}
 	// Load existing instances from disk
 	if err := im.loadInstances(); err != nil {
 		log.Printf("Error loading instances: %v", err)
 	}
 	// Start the timeout checker goroutine after initialization is complete
 	go func() {
 		defer close(im.shutdownDone)
 		for {
 			select {
 			case <-im.timeoutChecker.C:
 				im.checkAllTimeouts()
 			case <-im.shutdownChan:
 				return // Exit goroutine on shutdown
 			}
 		}
 	}()
 	return im
 }
@@ -94,6 +123,27 @@ func (im *instanceManager) Shutdown() {
 	im.mu.Lock()
 	defer im.mu.Unlock()
 	// Check if already shutdown
 	if im.isShutdown {
 		return
 	}
 	im.isShutdown = true
 	// Signal the timeout checker to stop
 	close(im.shutdownChan)
 	// Release lock temporarily to wait for goroutine
 	im.mu.Unlock()
 	// Wait for the timeout checker goroutine to actually stop
 	<-im.shutdownDone
 	// Reacquire lock
 	im.mu.Lock()
 	// Now stop the ticker
 	if im.timeoutChecker != nil {
 		im.timeoutChecker.Stop()
 	}
 	var wg sync.WaitGroup
 	wg.Add(len(im.instances))
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -27,10 +27,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 		return nil, fmt.Errorf("instance options cannot be nil")
 	}
 	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
 		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
 	}
 	name, err := validation.ValidateInstanceName(name)
 	if err != nil {
 		return nil, err
@@ -44,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 	im.mu.Lock()
 	defer im.mu.Unlock()
 	// Check max instances limit after acquiring the lock
 	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
 		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
 	}
 	// Check if instance with this name already exists
 	if im.instances[name] != nil {
 		return nil, fmt.Errorf("instance with name %s already exists", name)
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -0,0 +1,26 @@
 package manager
 import "log"
 func (im *instanceManager) checkAllTimeouts() {
 	im.mu.RLock()
 	var timeoutInstances []string
 	// Identify instances that should timeout
 	for _, inst := range im.instances {
 		if inst.ShouldTimeout() {
 			timeoutInstances = append(timeoutInstances, inst.Name)
 		}
 	}
 	im.mu.RUnlock() // Release read lock before calling StopInstance
 	// Stop the timed-out instances
 	for _, name := range timeoutInstances {
 		log.Printf("Instance %s has timed out, stopping it", name)
 		if _, err := im.StopInstance(name); err != nil {
 			log.Printf("Error stopping instance %s: %v", name, err)
 		} else {
 			log.Printf("Instance %s stopped successfully", name)
 		}
 	}
 }
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -472,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
 			proxyPath = "/" + proxyPath
 		}
 		// Update the last request time for the instance
 		inst.UpdateLastRequestTime()
 		// Modify the request to remove the proxy prefix
 		originalPath := r.URL.Path
 		r.URL.Path = proxyPath
@@ -582,6 +585,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 			return
 		}
 		// Update last request time for the instance
 		inst.UpdateLastRequestTime()
 		// Recreate the request body from the bytes we read
 		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
 		r.ContentLength = int64(len(bodyBytes))
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,7 +1,7 @@
 import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
 // Only define the basic fields we want to show by default
-export const basicFieldsConfig: Record<string, { 
+export const basicFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
@@ -21,6 +21,11 @@ export const basicFieldsConfig: Record<string, {
    placeholder: '5',
    description: 'Delay in seconds before attempting restart'
  },
  idle_timeout: {
    label: 'Idle Timeout (minutes)',
    placeholder: '60',
    description: 'Time in minutes before instance is considered idle and stopped'
  },
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -6,6 +6,7 @@ export const CreateInstanceOptionsSchema = z.object({
  auto_restart: z.boolean().optional(),
  max_restarts: z.number().optional(),
  restart_delay: z.number().optional(),
  idle_timeout: z.number().optional(),
  // Common params
  verbose_prompt: z.boolean().optional(),