Merge pull request #26 from lordmathis/feat/lru-eviction

feat: Implement least recently used instance eviction
2025-12-24 10:04:26 +00:00 · 2025-08-31 12:44:06 +02:00
parent 8fdebb069c 9579930a6a
commit 9a4dafeee8
10 changed files with 663 additions and 358 deletions
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
-⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
+⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  
@@ -113,6 +113,8 @@ instances:
  logs_dir: ~/.local/share/llamactl/logs    # Logs directory
  auto_create_dirs: true         # Auto-create data/config/logs dirs if missing
  max_instances: -1              # Max instances (-1 = unlimited)
  max_running_instances: -1      # Max running instances (-1 = unlimited)
  enable_lru_eviction: true      # Enable LRU eviction for idle instances
  llama_executable: llama-server # Path to llama-server executable
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
@@ -184,6 +186,8 @@ instances:
  logs_dir: "~/.local/share/llamactl/logs"          # Directory for instance logs (default: data_dir/logs)
  auto_create_dirs: true                            # Automatically create data/config/logs directories (default: true)
  max_instances: -1                                 # Maximum instances (-1 = unlimited)
  max_running_instances: -1                         # Maximum running instances (-1 = unlimited)
  enable_lru_eviction: true                         # Enable LRU eviction for idle instances
  llama_executable: "llama-server"                  # Path to llama-server executable
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
@@ -200,6 +204,8 @@ instances:
 - `LLAMACTL_LOGS_DIR` - Log directory path
 - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
 - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
 - `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
 - `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
 - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -58,6 +58,9 @@ type InstancesConfig struct {
 	// Maximum number of instances that can be running at the same time
 	MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
 	// Enable LRU eviction for instance logs
 	EnableLRUEviction bool `yaml:"enable_lru_eviction"`
 	// Path to llama-server executable
 	LlamaExecutable string `yaml:"llama_executable"`
@@ -117,6 +120,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			AutoCreateDirs:       true,
 			MaxInstances:         -1, // -1 means unlimited
 			MaxRunningInstances:  -1, // -1 means unlimited
 			EnableLRUEviction:    true,
 			LlamaExecutable:      "llama-server",
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
@@ -220,6 +224,11 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.MaxRunningInstances = m
 		}
 	}
 	if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
 		if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
 			cfg.Instances.EnableLRUEviction = b
 		}
 	}
 	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
 		cfg.Instances.LlamaExecutable = llamaExec
 	}
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -110,19 +110,25 @@ func (i *Process) Stop() error {
 	i.mu.Unlock()
-	// Stop the process with SIGINT
+	// Stop the process with SIGINT if cmd exists
-	if i.cmd.Process != nil {
+	if i.cmd != nil && i.cmd.Process != nil {
 		if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
 			log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
 		}
 	}
 	// If no process exists, we can return immediately
 	if i.cmd == nil || monitorDone == nil {
 		i.logger.Close()
 		return nil
 	}
 	select {
 	case <-monitorDone:
 		// Process exited normally
 	case <-time.After(30 * time.Second):
 		// Force kill if it doesn't exit within 30 seconds
-		if i.cmd.Process != nil {
+		if i.cmd != nil && i.cmd.Process != nil {
 			killErr := i.cmd.Process.Kill()
 			if killErr != nil {
 				log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -144,6 +150,10 @@ func (i *Process) Stop() error {
 	return nil
 }
 func (i *Process) LastRequestTime() int64 {
 	return i.lastRequestTime.Load()
 }
 func (i *Process) WaitForHealthy(timeout int) error {
 	if !i.IsRunning() {
 		return fmt.Errorf("instance %s is not running", i.Name)
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -21,7 +21,9 @@ type InstanceManager interface {
 	UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
 	DeleteInstance(name string) error
 	StartInstance(name string) (*instance.Process, error)
 	IsMaxRunningInstancesReached() bool
 	StopInstance(name string) (*instance.Process, error)
 	EvictLRUInstance() error
 	RestartInstance(name string) (*instance.Process, error)
 	GetInstanceLogs(name string) (string, error)
 	Shutdown()
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -11,7 +11,6 @@ import (
 	"strings"
 	"sync"
 	"testing"
 	"time"
 )
 func TestNewInstanceManager(t *testing.T) {
@@ -26,13 +25,13 @@ func TestNewInstanceManager(t *testing.T) {
 		TimeoutCheckInterval: 5,
 	}
-	manager := manager.NewInstanceManager(cfg)
+	mgr := manager.NewInstanceManager(cfg)
-	if manager == nil {
+	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}
 	// Test initial state
-	instances, err := manager.ListInstances()
+	instances, err := mgr.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
 	}
@@ -41,217 +40,6 @@ func TestNewInstanceManager(t *testing.T) {
 	}
 }
 func TestCreateInstance_Success(t *testing.T) {
 	manager := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
 	inst, err := manager.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	if inst.Name != "test-instance" {
 		t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
 	}
 	if inst.GetStatus() != instance.Stopped {
 		t.Error("New instance should not be running")
 	}
 	if inst.GetOptions().Port != 8080 {
 		t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
 	}
 }
 func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	// Test duplicate names
 	mngr := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	_, err := mngr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("First CreateInstance failed: %v", err)
 	}
 	// Try to create duplicate
 	_, err = mngr.CreateInstance("test-instance", options)
 	if err == nil {
 		t.Error("Expected error for duplicate instance name")
 	}
 	if !strings.Contains(err.Error(), "already exists") {
 		t.Errorf("Expected duplicate name error, got: %v", err)
 	}
 	// Test max instances limit
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
 	limitedManager := manager.NewInstanceManager(cfg)
 	_, err = limitedManager.CreateInstance("instance1", options)
 	if err != nil {
 		t.Fatalf("CreateInstance 1 failed: %v", err)
 	}
 	// This should fail due to max instances limit
 	_, err = limitedManager.CreateInstance("instance2", options)
 	if err == nil {
 		t.Error("Expected error when exceeding max instances limit")
 	}
 	if !strings.Contains(err.Error(), "maximum number of instances") {
 		t.Errorf("Expected max instances error, got: %v", err)
 	}
 }
 func TestPortManagement(t *testing.T) {
 	manager := createTestManager()
 	// Test auto port assignment
 	options1 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst1, err := manager.CreateInstance("instance1", options1)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	port1 := inst1.GetOptions().Port
 	if port1 < 8000 || port1 > 9000 {
 		t.Errorf("Expected port in range 8000-9000, got %d", port1)
 	}
 	// Test port conflict detection
 	options2 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model2.gguf",
 			Port:  port1, // Same port - should conflict
 		},
 	}
 	_, err = manager.CreateInstance("instance2", options2)
 	if err == nil {
 		t.Error("Expected error for port conflict")
 	}
 	if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
 		t.Errorf("Expected port conflict error, got: %v", err)
 	}
 	// Test port release on deletion
 	specificPort := 8080
 	options3 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  specificPort,
 		},
 	}
 	_, err = manager.CreateInstance("port-test", options3)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	err = manager.DeleteInstance("port-test")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
 	// Should be able to create new instance with same port
 	_, err = manager.CreateInstance("new-port-test", options3)
 	if err != nil {
 		t.Errorf("Expected to reuse port after deletion, got error: %v", err)
 	}
 }
 func TestInstanceOperations(t *testing.T) {
 	manager := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	// Create instance
 	created, err := manager.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Get instance
 	retrieved, err := manager.GetInstance("test-instance")
 	if err != nil {
 		t.Fatalf("GetInstance failed: %v", err)
 	}
 	if retrieved.Name != created.Name {
 		t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
 	}
 	// Update instance
 	newOptions := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/new-model.gguf",
 			Port:  8081,
 		},
 	}
 	updated, err := manager.UpdateInstance("test-instance", newOptions)
 	if err != nil {
 		t.Fatalf("UpdateInstance failed: %v", err)
 	}
 	if updated.GetOptions().Model != "/path/to/new-model.gguf" {
 		t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
 	}
 	// List instances
 	instances, err := manager.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
 	}
 	if len(instances) != 1 {
 		t.Errorf("Expected 1 instance, got %d", len(instances))
 	}
 	// Delete instance
 	err = manager.DeleteInstance("test-instance")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
 	_, err = manager.GetInstance("test-instance")
 	if err == nil {
 		t.Error("Instance should not exist after deletion")
 	}
 	// Test operations on non-existent instances
 	_, err = manager.GetInstance("nonexistent")
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 	err = manager.DeleteInstance("nonexistent")
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 	_, err = manager.UpdateInstance("nonexistent", options)
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 }
 func TestPersistence(t *testing.T) {
 	tempDir := t.TempDir()
@@ -312,102 +100,9 @@ func TestPersistence(t *testing.T) {
 	}
 }
 func TestTimeoutFunctionality(t *testing.T) {
 	// Test timeout checker initialization
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		TimeoutCheckInterval: 10,
 		MaxInstances:         5,
 	}
 	manager := manager.NewInstanceManager(cfg)
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
 	manager.Shutdown() // Clean up
 	// Test timeout configuration and logic without starting the actual process
 	testManager := createTestManager()
 	defer testManager.Shutdown()
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst, err := testManager.CreateInstance("timeout-test", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Test timeout configuration is properly set
 	if inst.GetOptions().IdleTimeout == nil {
 		t.Fatal("Instance should have idle timeout configured")
 	}
 	if *inst.GetOptions().IdleTimeout != 1 {
 		t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
 	}
 	// Test timeout logic without actually starting the process
 	// Create a mock time provider to simulate timeout
 	mockTime := NewMockTimeProvider(time.Now())
 	inst.SetTimeProvider(mockTime)
 	// Set instance to running state so timeout logic can work
 	inst.SetStatus(instance.Running)
 	// Simulate instance being "running" for timeout check (without actual process)
 	// We'll test the ShouldTimeout logic directly
 	inst.UpdateLastRequestTime()
 	// Initially should not timeout (just updated)
 	if inst.ShouldTimeout() {
 		t.Error("Instance should not timeout immediately after request")
 	}
 	// Advance time to trigger timeout
 	mockTime.SetTime(time.Now().Add(2 * time.Minute))
 	// Now it should timeout
 	if !inst.ShouldTimeout() {
 		t.Error("Instance should timeout after idle period")
 	}
 	// Reset running state to avoid shutdown issues
 	inst.SetStatus(instance.Stopped)
 	// Test that instance without timeout doesn't timeout
 	noTimeoutOptions := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 		// No IdleTimeout set
 	}
 	noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	noTimeoutInst.SetTimeProvider(mockTime)
 	noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
 	noTimeoutInst.UpdateLastRequestTime()
 	// Even with time advanced, should not timeout
 	if noTimeoutInst.ShouldTimeout() {
 		t.Error("Instance without timeout configuration should never timeout")
 	}
 	// Reset running state to avoid shutdown issues
 	noTimeoutInst.SetStatus(instance.Stopped)
 }
 func TestConcurrentAccess(t *testing.T) {
-	manager := createTestManager()
+	mgr := createTestManager()
-	defer manager.Shutdown()
+	defer mgr.Shutdown()
 	// Test concurrent operations
 	var wg sync.WaitGroup
@@ -424,7 +119,7 @@ func TestConcurrentAccess(t *testing.T) {
 				},
 			}
 			instanceName := fmt.Sprintf("concurrent-test-%d", index)
-			if _, err := manager.CreateInstance(instanceName, options); err != nil {
+			if _, err := mgr.CreateInstance(instanceName, options); err != nil {
 				errChan <- err
 			}
 		}(i)
@@ -435,7 +130,7 @@ func TestConcurrentAccess(t *testing.T) {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
-			if _, err := manager.ListInstances(); err != nil {
+			if _, err := mgr.ListInstances(); err != nil {
 				errChan <- err
 			}
 		}()
@@ -451,7 +146,7 @@ func TestConcurrentAccess(t *testing.T) {
 }
 func TestShutdown(t *testing.T) {
-	manager := createTestManager()
+	mgr := createTestManager()
 	// Create test instance
 	options := &instance.CreateInstanceOptions{
@@ -459,16 +154,16 @@ func TestShutdown(t *testing.T) {
 			Model: "/path/to/model.gguf",
 		},
 	}
-	_, err := manager.CreateInstance("test-instance", options)
+	_, err := mgr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Shutdown should not panic
-	manager.Shutdown()
+	mgr.Shutdown()
 	// Multiple shutdowns should not panic
-	manager.Shutdown()
+	mgr.Shutdown()
 }
 // Helper function to create a test manager with standard config
@@ -485,25 +180,3 @@ func createTestManager() manager.InstanceManager {
 	}
 	return manager.NewInstanceManager(cfg)
 }
 // Helper for timeout tests
 type MockTimeProvider struct {
 	currentTime time.Time
 	mu          sync.RWMutex
 }
 func NewMockTimeProvider(t time.Time) *MockTimeProvider {
 	return &MockTimeProvider{currentTime: t}
 }
 func (m *MockTimeProvider) Now() time.Time {
 	m.mu.RLock()
 	defer m.mu.RUnlock()
 	return m.currentTime
 }
 func (m *MockTimeProvider) SetTime(t time.Time) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.currentTime = t
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -174,6 +174,7 @@ func (im *instanceManager) DeleteInstance(name string) error {
 func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
 	instance, exists := im.instances[name]
 	maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
 	im.mu.RUnlock()
 	if !exists {
@@ -183,7 +184,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
 		return instance, fmt.Errorf("instance with name %s is already running", name)
 	}
-	if len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1 {
+	if maxRunningExceeded {
 		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
 	}
@@ -201,6 +202,17 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
 	return instance, nil
 }
 func (im *instanceManager) IsMaxRunningInstancesReached() bool {
 	im.mu.RLock()
 	defer im.mu.RUnlock()
 	if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
 		return true
 	}
 	return false
 }
 // StopInstance stops a running instance and returns it.
 func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -0,0 +1,221 @@
 package manager_test
 import (
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"strings"
 	"testing"
 )
 func TestCreateInstance_Success(t *testing.T) {
 	manager := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
 	inst, err := manager.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	if inst.Name != "test-instance" {
 		t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
 	}
 	if inst.GetStatus() != instance.Stopped {
 		t.Error("New instance should not be running")
 	}
 	if inst.GetOptions().Port != 8080 {
 		t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
 	}
 }
 func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	// Test duplicate names
 	mngr := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	_, err := mngr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("First CreateInstance failed: %v", err)
 	}
 	// Try to create duplicate
 	_, err = mngr.CreateInstance("test-instance", options)
 	if err == nil {
 		t.Error("Expected error for duplicate instance name")
 	}
 	if !strings.Contains(err.Error(), "already exists") {
 		t.Errorf("Expected duplicate name error, got: %v", err)
 	}
 	// Test max instances limit
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
 	limitedManager := manager.NewInstanceManager(cfg)
 	_, err = limitedManager.CreateInstance("instance1", options)
 	if err != nil {
 		t.Fatalf("CreateInstance 1 failed: %v", err)
 	}
 	// This should fail due to max instances limit
 	_, err = limitedManager.CreateInstance("instance2", options)
 	if err == nil {
 		t.Error("Expected error when exceeding max instances limit")
 	}
 	if !strings.Contains(err.Error(), "maximum number of instances") {
 		t.Errorf("Expected max instances error, got: %v", err)
 	}
 }
 func TestPortManagement(t *testing.T) {
 	manager := createTestManager()
 	// Test auto port assignment
 	options1 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst1, err := manager.CreateInstance("instance1", options1)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	port1 := inst1.GetOptions().Port
 	if port1 < 8000 || port1 > 9000 {
 		t.Errorf("Expected port in range 8000-9000, got %d", port1)
 	}
 	// Test port conflict detection
 	options2 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model2.gguf",
 			Port:  port1, // Same port - should conflict
 		},
 	}
 	_, err = manager.CreateInstance("instance2", options2)
 	if err == nil {
 		t.Error("Expected error for port conflict")
 	}
 	if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
 		t.Errorf("Expected port conflict error, got: %v", err)
 	}
 	// Test port release on deletion
 	specificPort := 8080
 	options3 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  specificPort,
 		},
 	}
 	_, err = manager.CreateInstance("port-test", options3)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	err = manager.DeleteInstance("port-test")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
 	// Should be able to create new instance with same port
 	_, err = manager.CreateInstance("new-port-test", options3)
 	if err != nil {
 		t.Errorf("Expected to reuse port after deletion, got error: %v", err)
 	}
 }
 func TestInstanceOperations(t *testing.T) {
 	manager := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	// Create instance
 	created, err := manager.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Get instance
 	retrieved, err := manager.GetInstance("test-instance")
 	if err != nil {
 		t.Fatalf("GetInstance failed: %v", err)
 	}
 	if retrieved.Name != created.Name {
 		t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
 	}
 	// Update instance
 	newOptions := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/new-model.gguf",
 			Port:  8081,
 		},
 	}
 	updated, err := manager.UpdateInstance("test-instance", newOptions)
 	if err != nil {
 		t.Fatalf("UpdateInstance failed: %v", err)
 	}
 	if updated.GetOptions().Model != "/path/to/new-model.gguf" {
 		t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
 	}
 	// List instances
 	instances, err := manager.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
 	}
 	if len(instances) != 1 {
 		t.Errorf("Expected 1 instance, got %d", len(instances))
 	}
 	// Delete instance
 	err = manager.DeleteInstance("test-instance")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
 	_, err = manager.GetInstance("test-instance")
 	if err == nil {
 		t.Error("Instance should not exist after deletion")
 	}
 	// Test operations on non-existent instances
 	_, err = manager.GetInstance("nonexistent")
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 	err = manager.DeleteInstance("nonexistent")
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 	_, err = manager.UpdateInstance("nonexistent", options)
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 }
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -1,6 +1,10 @@
 package manager
-import "log"
+import (
 	"fmt"
 	"llamactl/pkg/instance"
 	"log"
 )
 func (im *instanceManager) checkAllTimeouts() {
 	im.mu.RLock()
@@ -24,3 +28,37 @@ func (im *instanceManager) checkAllTimeouts() {
 		}
 	}
 }
 // EvictLRUInstance finds and stops the least recently used running instance.
 func (im *instanceManager) EvictLRUInstance() error {
 	im.mu.RLock()
 	var lruInstance *instance.Process
 	for name, _ := range im.runningInstances {
 		inst := im.instances[name]
 		if inst == nil {
 			continue
 		}
 		if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
 			continue // Skip instances without idle timeout
 		}
 		if lruInstance == nil {
 			lruInstance = inst
 		}
 		if inst.LastRequestTime() < lruInstance.LastRequestTime() {
 			lruInstance = inst
 		}
 	}
 	im.mu.RUnlock()
 	if lruInstance == nil {
 		return fmt.Errorf("failed to find lru instance")
 	}
 	// Evict Instance
 	_, err := im.StopInstance(lruInstance.Name)
 	return err
 }
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -0,0 +1,321 @@
 package manager_test
 import (
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"sync"
 	"testing"
 	"time"
 )
 func TestTimeoutFunctionality(t *testing.T) {
 	// Test timeout checker initialization
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		TimeoutCheckInterval: 10,
 		MaxInstances:         5,
 	}
 	manager := manager.NewInstanceManager(cfg)
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
 	manager.Shutdown() // Clean up
 	// Test timeout configuration and logic without starting the actual process
 	testManager := createTestManager()
 	defer testManager.Shutdown()
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst, err := testManager.CreateInstance("timeout-test", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Test timeout configuration is properly set
 	if inst.GetOptions().IdleTimeout == nil {
 		t.Fatal("Instance should have idle timeout configured")
 	}
 	if *inst.GetOptions().IdleTimeout != 1 {
 		t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
 	}
 	// Test timeout logic without actually starting the process
 	// Create a mock time provider to simulate timeout
 	mockTime := NewMockTimeProvider(time.Now())
 	inst.SetTimeProvider(mockTime)
 	// Set instance to running state so timeout logic can work
 	inst.SetStatus(instance.Running)
 	// Simulate instance being "running" for timeout check (without actual process)
 	// We'll test the ShouldTimeout logic directly
 	inst.UpdateLastRequestTime()
 	// Initially should not timeout (just updated)
 	if inst.ShouldTimeout() {
 		t.Error("Instance should not timeout immediately after request")
 	}
 	// Advance time to trigger timeout
 	mockTime.SetTime(time.Now().Add(2 * time.Minute))
 	// Now it should timeout
 	if !inst.ShouldTimeout() {
 		t.Error("Instance should timeout after idle period")
 	}
 	// Reset running state to avoid shutdown issues
 	inst.SetStatus(instance.Stopped)
 	// Test that instance without timeout doesn't timeout
 	noTimeoutOptions := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 		// No IdleTimeout set
 	}
 	noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	noTimeoutInst.SetTimeProvider(mockTime)
 	noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
 	noTimeoutInst.UpdateLastRequestTime()
 	// Even with time advanced, should not timeout
 	if noTimeoutInst.ShouldTimeout() {
 		t.Error("Instance without timeout configuration should never timeout")
 	}
 	// Reset running state to avoid shutdown issues
 	noTimeoutInst.SetStatus(instance.Stopped)
 }
 func TestEvictLRUInstance_Success(t *testing.T) {
 	manager := createTestManager()
 	// Don't defer manager.Shutdown() - we'll handle cleanup manually
 	// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
 	options1 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model1.gguf",
 		},
 		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
 	}
 	options2 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model2.gguf",
 		},
 		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
 	}
 	options3 := &instance.CreateInstanceOptions{
 		LlamaServerOptions: llamacpp.LlamaServerOptions{
 			Model: "/path/to/model3.gguf",
 		},
 		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
 	}
 	inst1, err := manager.CreateInstance("instance-1", options1)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	inst2, err := manager.CreateInstance("instance-2", options2)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	inst3, err := manager.CreateInstance("instance-3", options3)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Set up mock time and set instances to running
 	mockTime := NewMockTimeProvider(time.Now())
 	inst1.SetTimeProvider(mockTime)
 	inst2.SetTimeProvider(mockTime)
 	inst3.SetTimeProvider(mockTime)
 	inst1.SetStatus(instance.Running)
 	inst2.SetStatus(instance.Running)
 	inst3.SetStatus(instance.Running)
 	// Set different last request times (oldest to newest)
 	// inst1: oldest (will be evicted)
 	inst1.UpdateLastRequestTime()
 	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
 	inst2.UpdateLastRequestTime()
 	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
 	inst3.UpdateLastRequestTime()
 	// Evict LRU instance (should be inst1)
 	err = manager.EvictLRUInstance()
 	if err != nil {
 		t.Fatalf("EvictLRUInstance failed: %v", err)
 	}
 	// Verify inst1 is stopped
 	if inst1.IsRunning() {
 		t.Error("Expected instance-1 to be stopped after eviction")
 	}
 	// Verify inst2 and inst3 are still running
 	if !inst2.IsRunning() {
 		t.Error("Expected instance-2 to still be running")
 	}
 	if !inst3.IsRunning() {
 		t.Error("Expected instance-3 to still be running")
 	}
 	// Clean up manually - set all to stopped and then shutdown
 	inst2.SetStatus(instance.Stopped)
 	inst3.SetStatus(instance.Stopped)
 }
 func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
 	// Helper function to create instances with different timeout configurations
 	createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
 		options := &instance.CreateInstanceOptions{
 			LlamaServerOptions: llamacpp.LlamaServerOptions{
 				Model: model,
 			},
 			IdleTimeout: timeout,
 		}
 		inst, err := manager.CreateInstance(name, options)
 		if err != nil {
 			t.Fatalf("CreateInstance failed: %v", err)
 		}
 		return inst
 	}
 	t.Run("no running instances", func(t *testing.T) {
 		manager := createTestManager()
 		defer manager.Shutdown()
 		err := manager.EvictLRUInstance()
 		if err == nil {
 			t.Error("Expected error when no running instances exist")
 		}
 		if err.Error() != "failed to find lru instance" {
 			t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
 		}
 	})
 	t.Run("only instances without timeout", func(t *testing.T) {
 		manager := createTestManager()
 		defer manager.Shutdown()
 		// Create instances with various non-eligible timeout configurations
 		zeroTimeout := 0
 		negativeTimeout := -1
 		inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
 		inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
 		inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
 		// Set instances to running
 		instances := []*instance.Process{inst1, inst2, inst3}
 		for _, inst := range instances {
 			inst.SetStatus(instance.Running)
 		}
 		defer func() {
 			// Reset instances to stopped to avoid shutdown panics
 			for _, inst := range instances {
 				inst.SetStatus(instance.Stopped)
 			}
 		}()
 		// Try to evict - should fail because no eligible instances
 		err := manager.EvictLRUInstance()
 		if err == nil {
 			t.Error("Expected error when no eligible instances exist")
 		}
 		if err.Error() != "failed to find lru instance" {
 			t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
 		}
 		// Verify all instances are still running
 		for i, inst := range instances {
 			if !inst.IsRunning() {
 				t.Errorf("Expected instance %d to still be running", i+1)
 			}
 		}
 	})
 	t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
 		manager := createTestManager()
 		defer manager.Shutdown()
 		// Create mix of instances: some with timeout enabled, some disabled
 		validTimeout := 1
 		zeroTimeout := 0
 		instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
 		instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
 		instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
 		// Set all instances to running
 		instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
 		for _, inst := range instances {
 			inst.SetStatus(instance.Running)
 			inst.UpdateLastRequestTime()
 		}
 		defer func() {
 			// Reset instances to stopped to avoid shutdown panics
 			for _, inst := range instances {
 				if inst.IsRunning() {
 					inst.SetStatus(instance.Stopped)
 				}
 			}
 		}()
 		// Evict LRU instance - should only consider the one with timeout
 		err := manager.EvictLRUInstance()
 		if err != nil {
 			t.Fatalf("EvictLRUInstance failed: %v", err)
 		}
 		// Verify only the instance with timeout was evicted
 		if instWithTimeout.IsRunning() {
 			t.Error("Expected with-timeout instance to be stopped after eviction")
 		}
 		if !instNoTimeout1.IsRunning() {
 			t.Error("Expected no-timeout-1 instance to still be running")
 		}
 		if !instNoTimeout2.IsRunning() {
 			t.Error("Expected no-timeout-2 instance to still be running")
 		}
 	})
 }
 // Helper for timeout tests
 type MockTimeProvider struct {
 	currentTime time.Time
 	mu          sync.RWMutex
 }
 func NewMockTimeProvider(t time.Time) *MockTimeProvider {
 	return &MockTimeProvider{currentTime: t}
 }
 func (m *MockTimeProvider) Now() time.Time {
 	m.mu.RLock()
 	defer m.mu.RUnlock()
 	return m.currentTime
 }
 func (m *MockTimeProvider) SetTime(t time.Time) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.currentTime = t
 }
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -581,7 +581,25 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 		}
 		if !inst.IsRunning() {
-			if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
+			allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
 			if !allowOnDemand {
 				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
 				return
 			}
 			if h.InstanceManager.IsMaxRunningInstancesReached() {
 				if h.cfg.Instances.EnableLRUEviction {
 					err := h.InstanceManager.EvictLRUInstance()
 					if err != nil {
 						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
 						return
 					}
 				} else {
 					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
 					return
 				}
 			}
 			// If on-demand start is enabled, start the instance
 			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
 				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
@@ -593,11 +611,6 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
 				return
 			}
 			} else {
 				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
 				return
 			}
 		}
 		proxy, err := inst.GetProxy()