Merge pull request #22 from lordmathis/feat/timeout

feat: Implement idle instance timeout
2025-12-24 10:04:26 +00:00 · 2025-08-20 13:34:38 +02:00
parent eb9599f26a 7194e1fdd1
commit 651c8b9b2c
14 changed files with 666 additions and 807 deletions
--- a/README.md
+++ b/README.md
@@ -11,7 +11,8 @@
 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
-⚡ **Persistent State**: Instances survive server restarts
+⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
+💾 **State Persistence**: Ensure instances remain intact across server restarts  

 ![Dashboard Screenshot](docs/images/screenshot.png)

@@ -182,6 +183,7 @@ instances:
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
  default_restart_delay: 5                          # Default restart delay in seconds
+  timeout_check_interval: 5                         # Default instance timeout check interval in minutes
 ```

 **Environment Variables:**
@@ -195,6 +197,7 @@ instances:
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
+- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes

 #### Authentication Configuration

--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -66,6 +66,9 @@ type InstancesConfig struct {

 	// Default restart delay for new instances (in seconds)
 	DefaultRestartDelay int `yaml:"default_restart_delay"`
+
+	// Interval for checking instance timeouts (in minutes)
+	TimeoutCheckInterval int `yaml:"timeout_check_interval"`
 }

 // AuthConfig contains authentication settings
@@ -108,6 +111,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
+			TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
 		},
 		Auth: AuthConfig{
 			RequireInferenceAuth:  true,
@@ -217,6 +221,11 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.DefaultRestartDelay = seconds
 		}
 	}
+	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
+		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
+			cfg.Instances.TimeoutCheckInterval = minutes
+		}
+	}
 	// Auth config
 	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
 		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -13,16 +13,30 @@ import (
 	"net/url"
 	"os/exec"
 	"sync"
+	"sync/atomic"
 	"time"
 )

+// TimeProvider interface allows for testing with mock time
+type TimeProvider interface {
+	Now() time.Time
+}
+
+// realTimeProvider implements TimeProvider using the actual time
+type realTimeProvider struct{}
+
+func (realTimeProvider) Now() time.Time {
+	return time.Now()
+}
+
 type CreateInstanceOptions struct {
 	// Auto restart
 	AutoRestart  *bool `json:"auto_restart,omitempty"`
 	MaxRestarts  *int  `json:"max_restarts,omitempty"`
-	// RestartDelay duration in seconds
-	RestartDelay *int `json:"restart_delay_seconds,omitempty"`
-
+	RestartDelay *int  `json:"restart_delay,omitempty"`
+	// Timeout
+	IdleTimeout *int `json:"idle_timeout,omitempty"`
+	// LlamaServerOptions contains the options for the llama server
 	llamacpp.LlamaServerOptions `json:",inline"`
 }

@@ -34,7 +48,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	type tempCreateOptions struct {
 		AutoRestart  *bool `json:"auto_restart,omitempty"`
 		MaxRestarts  *int  `json:"max_restarts,omitempty"`
-		RestartDelay *int  `json:"restart_delay_seconds,omitempty"`
+		RestartDelay *int  `json:"restart_delay,omitempty"`
+		IdleTimeout  *int  `json:"idle_timeout,omitempty"`
 	}

 	var temp tempCreateOptions
@@ -46,6 +61,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 	c.AutoRestart = temp.AutoRestart
 	c.MaxRestarts = temp.MaxRestarts
 	c.RestartDelay = temp.RestartDelay
+	c.IdleTimeout = temp.IdleTimeout

 	// Now unmarshal the embedded LlamaServerOptions
 	if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
@@ -83,6 +99,10 @@ type Process struct {
 	// Restart control
 	restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
 	monitorDone   chan struct{}      `json:"-"` // Channel to signal monitor goroutine completion
+
+	// Timeout management
+	lastRequestTime atomic.Int64 // Unix timestamp of last request
+	timeProvider    TimeProvider `json:"-"` // Time provider for testing
 }

 // validateAndCopyOptions validates and creates a deep copy of the provided options
@@ -117,6 +137,15 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
 			}
 			optionsCopy.RestartDelay = &restartDelay
 		}
+
+		if options.IdleTimeout != nil {
+			idleTimeout := *options.IdleTimeout
+			if idleTimeout < 0 {
+				log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
+				idleTimeout = 0
+			}
+			optionsCopy.IdleTimeout = &idleTimeout
+		}
 	}

 	return optionsCopy
@@ -142,6 +171,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
 		defaultRestartDelay := globalSettings.DefaultRestartDelay
 		options.RestartDelay = &defaultRestartDelay
 	}
+
+	if options.IdleTimeout == nil {
+		defaultIdleTimeout := 0
+		options.IdleTimeout = &defaultIdleTimeout
+	}
 }

 // NewInstance creates a new instance with the given name, log path, and options
@@ -158,9 +192,7 @@ func NewInstance(name string, globalSettings *config.InstancesConfig, options *C
 		options:        optionsCopy,
 		globalSettings: globalSettings,
 		logger:         logger,
-
-		Running: false,
-
+		timeProvider:   realTimeProvider{},
 		Created:        time.Now().Unix(),
 	}
 }
@@ -189,6 +221,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
 	i.proxy = nil
 }

+// SetTimeProvider sets a custom time provider for testing
+func (i *Process) SetTimeProvider(tp TimeProvider) {
+	i.timeProvider = tp
+}
+
 // GetProxy returns the reverse proxy for this instance, creating it if needed
 func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 	i.mu.Lock()
--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -91,38 +91,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
 	}
 }

-func TestNewInstance_ValidationAndDefaults(t *testing.T) {
-	globalSettings := &config.InstancesConfig{
-		LogsDir:             "/tmp/test",
-		DefaultAutoRestart:  true,
-		DefaultMaxRestarts:  3,
-		DefaultRestartDelay: 5,
-	}
-
-	// Test with invalid negative values
-	invalidMaxRestarts := -5
-	invalidRestartDelay := -10
-
-	options := &instance.CreateInstanceOptions{
-		MaxRestarts:  &invalidMaxRestarts,
-		RestartDelay: &invalidRestartDelay,
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-	}
-
-	instance := instance.NewInstance("test-instance", globalSettings, options)
-	opts := instance.GetOptions()
-
-	// Check that negative values were corrected to 0
-	if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
-		t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
-	}
-	if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
-		t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
-	}
-}
-
 func TestSetOptions(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
@@ -164,33 +132,6 @@ func TestSetOptions(t *testing.T) {
 	}
 }

-func TestSetOptions_NilOptions(t *testing.T) {
-	globalSettings := &config.InstancesConfig{
-		LogsDir:             "/tmp/test",
-		DefaultAutoRestart:  true,
-		DefaultMaxRestarts:  3,
-		DefaultRestartDelay: 5,
-	}
-
-	options := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-	}
-
-	instance := instance.NewInstance("test-instance", globalSettings, options)
-	originalOptions := instance.GetOptions()
-
-	// Try to set nil options
-	instance.SetOptions(nil)
-
-	// Options should remain unchanged
-	currentOptions := instance.GetOptions()
-	if currentOptions.Model != originalOptions.Model {
-		t.Error("Options should not change when setting nil options")
-	}
-}
-
 func TestGetProxy(t *testing.T) {
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
@@ -317,58 +258,6 @@ func TestUnmarshalJSON(t *testing.T) {
 	}
 }

-func TestUnmarshalJSON_PartialOptions(t *testing.T) {
-	jsonData := `{
-		"name": "test-instance",
-		"running": false,
-		"options": {
-			"model": "/path/to/model.gguf"
-		}
-	}`
-
-	var inst instance.Process
-	err := json.Unmarshal([]byte(jsonData), &inst)
-	if err != nil {
-		t.Fatalf("JSON unmarshal failed: %v", err)
-	}
-
-	opts := inst.GetOptions()
-	if opts.Model != "/path/to/model.gguf" {
-		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
-	}
-
-	// Note: Defaults are NOT applied during unmarshaling
-	// They should only be applied by NewInstance or SetOptions
-	if opts.AutoRestart != nil {
-		t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
-	}
-}
-
-func TestUnmarshalJSON_NoOptions(t *testing.T) {
-	jsonData := `{
-		"name": "test-instance",
-		"running": false
-	}`
-
-	var inst instance.Process
-	err := json.Unmarshal([]byte(jsonData), &inst)
-	if err != nil {
-		t.Fatalf("JSON unmarshal failed: %v", err)
-	}
-
-	if inst.Name != "test-instance" {
-		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
-	}
-	if inst.Running {
-		t.Error("Expected running to be false")
-	}
-
-	opts := inst.GetOptions()
-	if opts != nil {
-		t.Error("Expected options to be nil when not provided in JSON")
-	}
-}
-
 func TestCreateInstanceOptionsValidation(t *testing.T) {
 	tests := []struct {
 		name          string
@@ -377,13 +266,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 		expectedMax   int
 		expectedDelay int
 	}{
-		{
-			name:          "nil values",
-			maxRestarts:   nil,
-			restartDelay:  nil,
-			expectedMax:   0, // Should remain nil, but we can't easily test nil in this structure
-			expectedDelay: 0,
-		},
 		{
 			name:          "valid positive values",
 			maxRestarts:   testutil.IntPtr(10),
@@ -424,21 +306,17 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 			instance := instance.NewInstance("test", globalSettings, options)
 			opts := instance.GetOptions()

-			if tt.maxRestarts != nil {
 			if opts.MaxRestarts == nil {
 				t.Error("Expected MaxRestarts to be set")
 			} else if *opts.MaxRestarts != tt.expectedMax {
 				t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
 			}
-			}

-			if tt.restartDelay != nil {
 			if opts.RestartDelay == nil {
 				t.Error("Expected RestartDelay to be set")
 			} else if *opts.RestartDelay != tt.expectedDelay {
 				t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
 			}
-			}
 		})
 	}
 }
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -30,6 +30,9 @@ func (i *Process) Start() error {
 		i.restarts = 0
 	}

+	// Initialize last request time to current time when starting
+	i.lastRequestTime.Store(i.timeProvider.Now().Unix())
+
 	// Create log files
 	if err := i.logger.Create(); err != nil {
 		return fmt.Errorf("failed to create log files: %w", err)
--- a/pkg/instance/timeout.go
+++ b/pkg/instance/timeout.go
@@ -0,0 +1,28 @@
+package instance
+
+// UpdateLastRequestTime updates the last request access time for the instance via proxy
+func (i *Process) UpdateLastRequestTime() {
+	i.mu.Lock()
+	defer i.mu.Unlock()
+
+	lastRequestTime := i.timeProvider.Now().Unix()
+	i.lastRequestTime.Store(lastRequestTime)
+}
+
+func (i *Process) ShouldTimeout() bool {
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+
+	if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
+		return false
+	}
+
+	// Check if the last request time exceeds the idle timeout
+	lastRequest := i.lastRequestTime.Load()
+	idleTimeoutMinutes := *i.options.IdleTimeout
+
+	// Convert timeout from minutes to seconds for comparison
+	idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
+
+	return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
+}
--- a/pkg/instance/timeout_test.go
+++ b/pkg/instance/timeout_test.go
@@ -0,0 +1,195 @@
+package instance_test
+
+import (
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/config"
+	"llamactl/pkg/instance"
+	"llamactl/pkg/testutil"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// MockTimeProvider implements TimeProvider for testing
+type MockTimeProvider struct {
+	currentTime atomic.Int64 // Unix timestamp
+}
+
+func NewMockTimeProvider(t time.Time) *MockTimeProvider {
+	m := &MockTimeProvider{}
+	m.currentTime.Store(t.Unix())
+	return m
+}
+
+func (m *MockTimeProvider) Now() time.Time {
+	return time.Unix(m.currentTime.Load(), 0)
+}
+
+func (m *MockTimeProvider) SetTime(t time.Time) {
+	m.currentTime.Store(t.Unix())
+}
+
+// Timeout-related tests
+
+func TestUpdateLastRequestTime(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	inst := instance.NewInstance("test-instance", globalSettings, options)
+
+	// Test that UpdateLastRequestTime doesn't panic
+	inst.UpdateLastRequestTime()
+}
+
+func TestShouldTimeout_NotRunning(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	idleTimeout := 1 // 1 minute
+	options := &instance.CreateInstanceOptions{
+		IdleTimeout: &idleTimeout,
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	inst := instance.NewInstance("test-instance", globalSettings, options)
+
+	// Instance is not running, should not timeout regardless of configuration
+	if inst.ShouldTimeout() {
+		t.Error("Non-running instance should never timeout")
+	}
+}
+
+func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	tests := []struct {
+		name        string
+		idleTimeout *int
+	}{
+		{"nil timeout", nil},
+		{"zero timeout", testutil.IntPtr(0)},
+		{"negative timeout", testutil.IntPtr(-5)},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			options := &instance.CreateInstanceOptions{
+				IdleTimeout: tt.idleTimeout,
+				LlamaServerOptions: llamacpp.LlamaServerOptions{
+					Model: "/path/to/model.gguf",
+				},
+			}
+
+			inst := instance.NewInstance("test-instance", globalSettings, options)
+			// Simulate running state
+			inst.Running = true
+
+			if inst.ShouldTimeout() {
+				t.Errorf("Instance with %s should not timeout", tt.name)
+			}
+		})
+	}
+}
+
+func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	idleTimeout := 5 // 5 minutes
+	options := &instance.CreateInstanceOptions{
+		IdleTimeout: &idleTimeout,
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	inst := instance.NewInstance("test-instance", globalSettings, options)
+	inst.Running = true
+
+	// Update last request time to now
+	inst.UpdateLastRequestTime()
+
+	// Should not timeout immediately
+	if inst.ShouldTimeout() {
+		t.Error("Instance should not timeout when last request was recent")
+	}
+}
+
+func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	idleTimeout := 1 // 1 minute
+	options := &instance.CreateInstanceOptions{
+		IdleTimeout: &idleTimeout,
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	inst := instance.NewInstance("test-instance", globalSettings, options)
+	inst.Running = true
+
+	// Use MockTimeProvider to simulate old last request time
+	mockTime := NewMockTimeProvider(time.Now())
+	inst.SetTimeProvider(mockTime)
+
+	// Set last request time to now
+	inst.UpdateLastRequestTime()
+
+	// Advance time by 2 minutes (exceeds 1 minute timeout)
+	mockTime.SetTime(time.Now().Add(2 * time.Minute))
+
+	if !inst.ShouldTimeout() {
+		t.Error("Instance should timeout when last request exceeds idle timeout")
+	}
+}
+
+func TestTimeoutConfiguration_Validation(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	tests := []struct {
+		name            string
+		inputTimeout    *int
+		expectedTimeout int
+	}{
+		{"default value when nil", nil, 0},
+		{"positive value", testutil.IntPtr(10), 10},
+		{"zero value", testutil.IntPtr(0), 0},
+		{"negative value gets corrected", testutil.IntPtr(-5), 0},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			options := &instance.CreateInstanceOptions{
+				IdleTimeout: tt.inputTimeout,
+				LlamaServerOptions: llamacpp.LlamaServerOptions{
+					Model: "/path/to/model.gguf",
+				},
+			}
+
+			inst := instance.NewInstance("test-instance", globalSettings, options)
+			opts := inst.GetOptions()
+
+			if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
+				t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
+			}
+		})
+	}
+}
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -10,6 +10,7 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+	"time"
 )

 // InstanceManager defines the interface for managing instances of the llama server.
@@ -31,20 +32,48 @@ type instanceManager struct {
 	instances       map[string]*instance.Process
 	ports           map[int]bool
 	instancesConfig config.InstancesConfig
+
+	// Timeout checker
+	timeoutChecker *time.Ticker
+	shutdownChan   chan struct{}
+	shutdownDone   chan struct{}
+	isShutdown     bool
 }

 // NewInstanceManager creates a new instance of InstanceManager.
 func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
+	if instancesConfig.TimeoutCheckInterval <= 0 {
+		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
+	}
 	im := &instanceManager{
 		instances:       make(map[string]*instance.Process),
 		ports:           make(map[int]bool),
 		instancesConfig: instancesConfig,
+
+		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
+		shutdownChan:   make(chan struct{}),
+		shutdownDone:   make(chan struct{}),
 	}

 	// Load existing instances from disk
 	if err := im.loadInstances(); err != nil {
 		log.Printf("Error loading instances: %v", err)
 	}
+
+	// Start the timeout checker goroutine after initialization is complete
+	go func() {
+		defer close(im.shutdownDone)
+
+		for {
+			select {
+			case <-im.timeoutChecker.C:
+				im.checkAllTimeouts()
+			case <-im.shutdownChan:
+				return // Exit goroutine on shutdown
+			}
+		}
+	}()
+
 	return im
 }

@@ -94,6 +123,27 @@ func (im *instanceManager) Shutdown() {
 	im.mu.Lock()
 	defer im.mu.Unlock()

+	// Check if already shutdown
+	if im.isShutdown {
+		return
+	}
+	im.isShutdown = true
+
+	// Signal the timeout checker to stop
+	close(im.shutdownChan)
+
+	// Release lock temporarily to wait for goroutine
+	im.mu.Unlock()
+	// Wait for the timeout checker goroutine to actually stop
+	<-im.shutdownDone
+	// Reacquire lock
+	im.mu.Lock()
+
+	// Now stop the ticker
+	if im.timeoutChecker != nil {
+		im.timeoutChecker.Stop()
+	}
+
 	var wg sync.WaitGroup
 	wg.Add(len(im.instances))

--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -27,10 +27,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 		return nil, fmt.Errorf("instance options cannot be nil")
 	}

-	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
-		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
-	}
-
 	name, err := validation.ValidateInstanceName(name)
 	if err != nil {
 		return nil, err
@@ -44,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 	im.mu.Lock()
 	defer im.mu.Unlock()

+	// Check max instances limit after acquiring the lock
+	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
+		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
+	}
+
 	// Check if instance with this name already exists
 	if im.instances[name] != nil {
 		return nil, fmt.Errorf("instance with name %s already exists", name)
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -0,0 +1,26 @@
+package manager
+
+import "log"
+
+func (im *instanceManager) checkAllTimeouts() {
+	im.mu.RLock()
+	var timeoutInstances []string
+
+	// Identify instances that should timeout
+	for _, inst := range im.instances {
+		if inst.ShouldTimeout() {
+			timeoutInstances = append(timeoutInstances, inst.Name)
+		}
+	}
+	im.mu.RUnlock() // Release read lock before calling StopInstance
+
+	// Stop the timed-out instances
+	for _, name := range timeoutInstances {
+		log.Printf("Instance %s has timed out, stopping it", name)
+		if _, err := im.StopInstance(name); err != nil {
+			log.Printf("Error stopping instance %s: %v", name, err)
+		} else {
+			log.Printf("Instance %s stopped successfully", name)
+		}
+	}
+}
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -472,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
 			proxyPath = "/" + proxyPath
 		}

+		// Update the last request time for the instance
+		inst.UpdateLastRequestTime()
+
 		// Modify the request to remove the proxy prefix
 		originalPath := r.URL.Path
 		r.URL.Path = proxyPath
@@ -582,6 +585,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 			return
 		}

+		// Update last request time for the instance
+		inst.UpdateLastRequestTime()
+
 		// Recreate the request body from the bytes we read
 		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
 		r.ContentLength = int64(len(bodyBytes))
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -21,6 +21,11 @@ export const basicFieldsConfig: Record<string, {
    placeholder: '5',
    description: 'Delay in seconds before attempting restart'
  },
+  idle_timeout: {
+    label: 'Idle Timeout (minutes)',
+    placeholder: '60',
+    description: 'Time in minutes before instance is considered idle and stopped'
+  },
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -6,6 +6,7 @@ export const CreateInstanceOptionsSchema = z.object({
  auto_restart: z.boolean().optional(),
  max_restarts: z.number().optional(),
  restart_delay: z.number().optional(),
+  idle_timeout: z.number().optional(),

  // Common params
  verbose_prompt: z.boolean().optional(),