diff --git a/README.md b/README.md index 3a5d29d..d9edfd5 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) 🔐 **API Key Authentication**: Separate keys for management vs inference access 📊 **Instance Monitoring**: Health checks, auto-restart, log management -⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period +⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests 💾 **State Persistence**: Ensure instances remain intact across server restarts @@ -113,6 +113,8 @@ instances: logs_dir: ~/.local/share/llamactl/logs # Logs directory auto_create_dirs: true # Auto-create data/config/logs dirs if missing max_instances: -1 # Max instances (-1 = unlimited) + max_running_instances: -1 # Max running instances (-1 = unlimited) + enable_lru_eviction: true # Enable LRU eviction for idle instances llama_executable: llama-server # Path to llama-server executable default_auto_restart: true # Auto-restart new instances by default default_max_restarts: 3 # Max restarts for new instances @@ -184,6 +186,8 @@ instances: logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs) auto_create_dirs: true # Automatically create data/config/logs directories (default: true) max_instances: -1 # Maximum instances (-1 = unlimited) + max_running_instances: -1 # Maximum running instances (-1 = unlimited) + enable_lru_eviction: true # Enable LRU eviction for idle instances llama_executable: "llama-server" # Path to llama-server executable default_auto_restart: true # Default auto-restart setting default_max_restarts: 3 # Default maximum restart attempts @@ -200,6 +204,8 @@ instances: - `LLAMACTL_LOGS_DIR` - Log directory path - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false) - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances +- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances +- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false) - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts diff --git a/pkg/config/config.go b/pkg/config/config.go index 89080ed..5017662 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -58,6 +58,9 @@ type InstancesConfig struct { // Maximum number of instances that can be running at the same time MaxRunningInstances int `yaml:"max_running_instances,omitempty"` + // Enable LRU eviction for instance logs + EnableLRUEviction bool `yaml:"enable_lru_eviction"` + // Path to llama-server executable LlamaExecutable string `yaml:"llama_executable"` @@ -117,6 +120,7 @@ func LoadConfig(configPath string) (AppConfig, error) { AutoCreateDirs: true, MaxInstances: -1, // -1 means unlimited MaxRunningInstances: -1, // -1 means unlimited + EnableLRUEviction: true, LlamaExecutable: "llama-server", DefaultAutoRestart: true, DefaultMaxRestarts: 3, @@ -220,6 +224,11 @@ func loadEnvVars(cfg *AppConfig) { cfg.Instances.MaxRunningInstances = m } } + if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" { + if b, err := strconv.ParseBool(enableLRUEviction); err == nil { + cfg.Instances.EnableLRUEviction = b + } + } if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { cfg.Instances.LlamaExecutable = llamaExec } diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 4de9dde..91bea6c 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -110,19 +110,25 @@ func (i *Process) Stop() error { i.mu.Unlock() - // Stop the process with SIGINT - if i.cmd.Process != nil { + // Stop the process with SIGINT if cmd exists + if i.cmd != nil && i.cmd.Process != nil { if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil { log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err) } } + // If no process exists, we can return immediately + if i.cmd == nil || monitorDone == nil { + i.logger.Close() + return nil + } + select { case <-monitorDone: // Process exited normally case <-time.After(30 * time.Second): // Force kill if it doesn't exit within 30 seconds - if i.cmd.Process != nil { + if i.cmd != nil && i.cmd.Process != nil { killErr := i.cmd.Process.Kill() if killErr != nil { log.Printf("Failed to force kill instance %s: %v", i.Name, killErr) @@ -144,6 +150,10 @@ func (i *Process) Stop() error { return nil } +func (i *Process) LastRequestTime() int64 { + return i.lastRequestTime.Load() +} + func (i *Process) WaitForHealthy(timeout int) error { if !i.IsRunning() { return fmt.Errorf("instance %s is not running", i.Name) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 5d60f3a..390cefe 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -21,7 +21,9 @@ type InstanceManager interface { UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) DeleteInstance(name string) error StartInstance(name string) (*instance.Process, error) + IsMaxRunningInstancesReached() bool StopInstance(name string) (*instance.Process, error) + EvictLRUInstance() error RestartInstance(name string) (*instance.Process, error) GetInstanceLogs(name string) (string, error) Shutdown() diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index df675dc..a0d5492 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -11,7 +11,6 @@ import ( "strings" "sync" "testing" - "time" ) func TestNewInstanceManager(t *testing.T) { @@ -26,13 +25,13 @@ func TestNewInstanceManager(t *testing.T) { TimeoutCheckInterval: 5, } - manager := manager.NewInstanceManager(cfg) - if manager == nil { + mgr := manager.NewInstanceManager(cfg) + if mgr == nil { t.Fatal("NewInstanceManager returned nil") } // Test initial state - instances, err := manager.ListInstances() + instances, err := mgr.ListInstances() if err != nil { t.Fatalf("ListInstances failed: %v", err) } @@ -41,217 +40,6 @@ func TestNewInstanceManager(t *testing.T) { } } -func TestCreateInstance_Success(t *testing.T) { - manager := createTestManager() - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - - inst, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - if inst.Name != "test-instance" { - t.Errorf("Expected instance name 'test-instance', got %q", inst.Name) - } - if inst.GetStatus() != instance.Stopped { - t.Error("New instance should not be running") - } - if inst.GetOptions().Port != 8080 { - t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port) - } -} - -func TestCreateInstance_ValidationAndLimits(t *testing.T) { - // Test duplicate names - mngr := createTestManager() - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - _, err := mngr.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("First CreateInstance failed: %v", err) - } - - // Try to create duplicate - _, err = mngr.CreateInstance("test-instance", options) - if err == nil { - t.Error("Expected error for duplicate instance name") - } - if !strings.Contains(err.Error(), "already exists") { - t.Errorf("Expected duplicate name error, got: %v", err) - } - - // Test max instances limit - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - MaxInstances: 1, // Very low limit for testing - TimeoutCheckInterval: 5, - } - limitedManager := manager.NewInstanceManager(cfg) - - _, err = limitedManager.CreateInstance("instance1", options) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - // This should fail due to max instances limit - _, err = limitedManager.CreateInstance("instance2", options) - if err == nil { - t.Error("Expected error when exceeding max instances limit") - } - if !strings.Contains(err.Error(), "maximum number of instances") { - t.Errorf("Expected max instances error, got: %v", err) - } -} - -func TestPortManagement(t *testing.T) { - manager := createTestManager() - - // Test auto port assignment - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - inst1, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - port1 := inst1.GetOptions().Port - if port1 < 8000 || port1 > 9000 { - t.Errorf("Expected port in range 8000-9000, got %d", port1) - } - - // Test port conflict detection - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model2.gguf", - Port: port1, // Same port - should conflict - }, - } - - _, err = manager.CreateInstance("instance2", options2) - if err == nil { - t.Error("Expected error for port conflict") - } - if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") { - t.Errorf("Expected port conflict error, got: %v", err) - } - - // Test port release on deletion - specificPort := 8080 - options3 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: specificPort, - }, - } - - _, err = manager.CreateInstance("port-test", options3) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - err = manager.DeleteInstance("port-test") - if err != nil { - t.Fatalf("DeleteInstance failed: %v", err) - } - - // Should be able to create new instance with same port - _, err = manager.CreateInstance("new-port-test", options3) - if err != nil { - t.Errorf("Expected to reuse port after deletion, got error: %v", err) - } -} - -func TestInstanceOperations(t *testing.T) { - manager := createTestManager() - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create instance - created, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Get instance - retrieved, err := manager.GetInstance("test-instance") - if err != nil { - t.Fatalf("GetInstance failed: %v", err) - } - if retrieved.Name != created.Name { - t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name) - } - - // Update instance - newOptions := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/new-model.gguf", - Port: 8081, - }, - } - - updated, err := manager.UpdateInstance("test-instance", newOptions) - if err != nil { - t.Fatalf("UpdateInstance failed: %v", err) - } - if updated.GetOptions().Model != "/path/to/new-model.gguf" { - t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model) - } - - // List instances - instances, err := manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) - } - if len(instances) != 1 { - t.Errorf("Expected 1 instance, got %d", len(instances)) - } - - // Delete instance - err = manager.DeleteInstance("test-instance") - if err != nil { - t.Fatalf("DeleteInstance failed: %v", err) - } - - _, err = manager.GetInstance("test-instance") - if err == nil { - t.Error("Instance should not exist after deletion") - } - - // Test operations on non-existent instances - _, err = manager.GetInstance("nonexistent") - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } - - err = manager.DeleteInstance("nonexistent") - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } - - _, err = manager.UpdateInstance("nonexistent", options) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } -} - func TestPersistence(t *testing.T) { tempDir := t.TempDir() @@ -312,102 +100,9 @@ func TestPersistence(t *testing.T) { } } -func TestTimeoutFunctionality(t *testing.T) { - // Test timeout checker initialization - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - TimeoutCheckInterval: 10, - MaxInstances: 5, - } - - manager := manager.NewInstanceManager(cfg) - if manager == nil { - t.Fatal("Manager should be initialized with timeout checker") - } - manager.Shutdown() // Clean up - - // Test timeout configuration and logic without starting the actual process - testManager := createTestManager() - defer testManager.Shutdown() - - idleTimeout := 1 // 1 minute - options := &instance.CreateInstanceOptions{ - IdleTimeout: &idleTimeout, - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - inst, err := testManager.CreateInstance("timeout-test", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Test timeout configuration is properly set - if inst.GetOptions().IdleTimeout == nil { - t.Fatal("Instance should have idle timeout configured") - } - if *inst.GetOptions().IdleTimeout != 1 { - t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout) - } - - // Test timeout logic without actually starting the process - // Create a mock time provider to simulate timeout - mockTime := NewMockTimeProvider(time.Now()) - inst.SetTimeProvider(mockTime) - - // Set instance to running state so timeout logic can work - inst.SetStatus(instance.Running) - - // Simulate instance being "running" for timeout check (without actual process) - // We'll test the ShouldTimeout logic directly - inst.UpdateLastRequestTime() - - // Initially should not timeout (just updated) - if inst.ShouldTimeout() { - t.Error("Instance should not timeout immediately after request") - } - - // Advance time to trigger timeout - mockTime.SetTime(time.Now().Add(2 * time.Minute)) - - // Now it should timeout - if !inst.ShouldTimeout() { - t.Error("Instance should timeout after idle period") - } - - // Reset running state to avoid shutdown issues - inst.SetStatus(instance.Stopped) - - // Test that instance without timeout doesn't timeout - noTimeoutOptions := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - // No IdleTimeout set - } - - noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - noTimeoutInst.SetTimeProvider(mockTime) - noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check - noTimeoutInst.UpdateLastRequestTime() - - // Even with time advanced, should not timeout - if noTimeoutInst.ShouldTimeout() { - t.Error("Instance without timeout configuration should never timeout") - } - - // Reset running state to avoid shutdown issues - noTimeoutInst.SetStatus(instance.Stopped) -} - func TestConcurrentAccess(t *testing.T) { - manager := createTestManager() - defer manager.Shutdown() + mgr := createTestManager() + defer mgr.Shutdown() // Test concurrent operations var wg sync.WaitGroup @@ -424,7 +119,7 @@ func TestConcurrentAccess(t *testing.T) { }, } instanceName := fmt.Sprintf("concurrent-test-%d", index) - if _, err := manager.CreateInstance(instanceName, options); err != nil { + if _, err := mgr.CreateInstance(instanceName, options); err != nil { errChan <- err } }(i) @@ -435,7 +130,7 @@ func TestConcurrentAccess(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - if _, err := manager.ListInstances(); err != nil { + if _, err := mgr.ListInstances(); err != nil { errChan <- err } }() @@ -451,7 +146,7 @@ func TestConcurrentAccess(t *testing.T) { } func TestShutdown(t *testing.T) { - manager := createTestManager() + mgr := createTestManager() // Create test instance options := &instance.CreateInstanceOptions{ @@ -459,16 +154,16 @@ func TestShutdown(t *testing.T) { Model: "/path/to/model.gguf", }, } - _, err := manager.CreateInstance("test-instance", options) + _, err := mgr.CreateInstance("test-instance", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } // Shutdown should not panic - manager.Shutdown() + mgr.Shutdown() // Multiple shutdowns should not panic - manager.Shutdown() + mgr.Shutdown() } // Helper function to create a test manager with standard config @@ -485,25 +180,3 @@ func createTestManager() manager.InstanceManager { } return manager.NewInstanceManager(cfg) } - -// Helper for timeout tests -type MockTimeProvider struct { - currentTime time.Time - mu sync.RWMutex -} - -func NewMockTimeProvider(t time.Time) *MockTimeProvider { - return &MockTimeProvider{currentTime: t} -} - -func (m *MockTimeProvider) Now() time.Time { - m.mu.RLock() - defer m.mu.RUnlock() - return m.currentTime -} - -func (m *MockTimeProvider) SetTime(t time.Time) { - m.mu.Lock() - defer m.mu.Unlock() - m.currentTime = t -} diff --git a/pkg/manager/operations.go b/pkg/manager/operations.go index 21f9727..b347900 100644 --- a/pkg/manager/operations.go +++ b/pkg/manager/operations.go @@ -174,6 +174,7 @@ func (im *instanceManager) DeleteInstance(name string) error { func (im *instanceManager) StartInstance(name string) (*instance.Process, error) { im.mu.RLock() instance, exists := im.instances[name] + maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1 im.mu.RUnlock() if !exists { @@ -183,7 +184,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error) return instance, fmt.Errorf("instance with name %s is already running", name) } - if len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1 { + if maxRunningExceeded { return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances)) } @@ -201,6 +202,17 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error) return instance, nil } +func (im *instanceManager) IsMaxRunningInstancesReached() bool { + im.mu.RLock() + defer im.mu.RUnlock() + + if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances { + return true + } + + return false +} + // StopInstance stops a running instance and returns it. func (im *instanceManager) StopInstance(name string) (*instance.Process, error) { im.mu.RLock() diff --git a/pkg/manager/operations_test.go b/pkg/manager/operations_test.go new file mode 100644 index 0000000..d045b81 --- /dev/null +++ b/pkg/manager/operations_test.go @@ -0,0 +1,221 @@ +package manager_test + +import ( + "llamactl/pkg/backends/llamacpp" + "llamactl/pkg/config" + "llamactl/pkg/instance" + "llamactl/pkg/manager" + "strings" + "testing" +) + +func TestCreateInstance_Success(t *testing.T) { + manager := createTestManager() + + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + Port: 8080, + }, + } + + inst, err := manager.CreateInstance("test-instance", options) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + if inst.Name != "test-instance" { + t.Errorf("Expected instance name 'test-instance', got %q", inst.Name) + } + if inst.GetStatus() != instance.Stopped { + t.Error("New instance should not be running") + } + if inst.GetOptions().Port != 8080 { + t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port) + } +} + +func TestCreateInstance_ValidationAndLimits(t *testing.T) { + // Test duplicate names + mngr := createTestManager() + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + _, err := mngr.CreateInstance("test-instance", options) + if err != nil { + t.Fatalf("First CreateInstance failed: %v", err) + } + + // Try to create duplicate + _, err = mngr.CreateInstance("test-instance", options) + if err == nil { + t.Error("Expected error for duplicate instance name") + } + if !strings.Contains(err.Error(), "already exists") { + t.Errorf("Expected duplicate name error, got: %v", err) + } + + // Test max instances limit + cfg := config.InstancesConfig{ + PortRange: [2]int{8000, 9000}, + MaxInstances: 1, // Very low limit for testing + TimeoutCheckInterval: 5, + } + limitedManager := manager.NewInstanceManager(cfg) + + _, err = limitedManager.CreateInstance("instance1", options) + if err != nil { + t.Fatalf("CreateInstance 1 failed: %v", err) + } + + // This should fail due to max instances limit + _, err = limitedManager.CreateInstance("instance2", options) + if err == nil { + t.Error("Expected error when exceeding max instances limit") + } + if !strings.Contains(err.Error(), "maximum number of instances") { + t.Errorf("Expected max instances error, got: %v", err) + } +} + +func TestPortManagement(t *testing.T) { + manager := createTestManager() + + // Test auto port assignment + options1 := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst1, err := manager.CreateInstance("instance1", options1) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + port1 := inst1.GetOptions().Port + if port1 < 8000 || port1 > 9000 { + t.Errorf("Expected port in range 8000-9000, got %d", port1) + } + + // Test port conflict detection + options2 := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model2.gguf", + Port: port1, // Same port - should conflict + }, + } + + _, err = manager.CreateInstance("instance2", options2) + if err == nil { + t.Error("Expected error for port conflict") + } + if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") { + t.Errorf("Expected port conflict error, got: %v", err) + } + + // Test port release on deletion + specificPort := 8080 + options3 := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + Port: specificPort, + }, + } + + _, err = manager.CreateInstance("port-test", options3) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + err = manager.DeleteInstance("port-test") + if err != nil { + t.Fatalf("DeleteInstance failed: %v", err) + } + + // Should be able to create new instance with same port + _, err = manager.CreateInstance("new-port-test", options3) + if err != nil { + t.Errorf("Expected to reuse port after deletion, got error: %v", err) + } +} + +func TestInstanceOperations(t *testing.T) { + manager := createTestManager() + + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + // Create instance + created, err := manager.CreateInstance("test-instance", options) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Get instance + retrieved, err := manager.GetInstance("test-instance") + if err != nil { + t.Fatalf("GetInstance failed: %v", err) + } + if retrieved.Name != created.Name { + t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name) + } + + // Update instance + newOptions := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/new-model.gguf", + Port: 8081, + }, + } + + updated, err := manager.UpdateInstance("test-instance", newOptions) + if err != nil { + t.Fatalf("UpdateInstance failed: %v", err) + } + if updated.GetOptions().Model != "/path/to/new-model.gguf" { + t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model) + } + + // List instances + instances, err := manager.ListInstances() + if err != nil { + t.Fatalf("ListInstances failed: %v", err) + } + if len(instances) != 1 { + t.Errorf("Expected 1 instance, got %d", len(instances)) + } + + // Delete instance + err = manager.DeleteInstance("test-instance") + if err != nil { + t.Fatalf("DeleteInstance failed: %v", err) + } + + _, err = manager.GetInstance("test-instance") + if err == nil { + t.Error("Instance should not exist after deletion") + } + + // Test operations on non-existent instances + _, err = manager.GetInstance("nonexistent") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } + + err = manager.DeleteInstance("nonexistent") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } + + _, err = manager.UpdateInstance("nonexistent", options) + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } +} diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go index a28bc67..c982f10 100644 --- a/pkg/manager/timeout.go +++ b/pkg/manager/timeout.go @@ -1,6 +1,10 @@ package manager -import "log" +import ( + "fmt" + "llamactl/pkg/instance" + "log" +) func (im *instanceManager) checkAllTimeouts() { im.mu.RLock() @@ -24,3 +28,37 @@ func (im *instanceManager) checkAllTimeouts() { } } } + +// EvictLRUInstance finds and stops the least recently used running instance. +func (im *instanceManager) EvictLRUInstance() error { + im.mu.RLock() + var lruInstance *instance.Process + + for name, _ := range im.runningInstances { + inst := im.instances[name] + if inst == nil { + continue + } + + if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 { + continue // Skip instances without idle timeout + } + + if lruInstance == nil { + lruInstance = inst + } + + if inst.LastRequestTime() < lruInstance.LastRequestTime() { + lruInstance = inst + } + } + im.mu.RUnlock() + + if lruInstance == nil { + return fmt.Errorf("failed to find lru instance") + } + + // Evict Instance + _, err := im.StopInstance(lruInstance.Name) + return err +} diff --git a/pkg/manager/timeout_test.go b/pkg/manager/timeout_test.go new file mode 100644 index 0000000..41ca188 --- /dev/null +++ b/pkg/manager/timeout_test.go @@ -0,0 +1,321 @@ +package manager_test + +import ( + "llamactl/pkg/backends/llamacpp" + "llamactl/pkg/config" + "llamactl/pkg/instance" + "llamactl/pkg/manager" + "sync" + "testing" + "time" +) + +func TestTimeoutFunctionality(t *testing.T) { + // Test timeout checker initialization + cfg := config.InstancesConfig{ + PortRange: [2]int{8000, 9000}, + TimeoutCheckInterval: 10, + MaxInstances: 5, + } + + manager := manager.NewInstanceManager(cfg) + if manager == nil { + t.Fatal("Manager should be initialized with timeout checker") + } + manager.Shutdown() // Clean up + + // Test timeout configuration and logic without starting the actual process + testManager := createTestManager() + defer testManager.Shutdown() + + idleTimeout := 1 // 1 minute + options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst, err := testManager.CreateInstance("timeout-test", options) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Test timeout configuration is properly set + if inst.GetOptions().IdleTimeout == nil { + t.Fatal("Instance should have idle timeout configured") + } + if *inst.GetOptions().IdleTimeout != 1 { + t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout) + } + + // Test timeout logic without actually starting the process + // Create a mock time provider to simulate timeout + mockTime := NewMockTimeProvider(time.Now()) + inst.SetTimeProvider(mockTime) + + // Set instance to running state so timeout logic can work + inst.SetStatus(instance.Running) + + // Simulate instance being "running" for timeout check (without actual process) + // We'll test the ShouldTimeout logic directly + inst.UpdateLastRequestTime() + + // Initially should not timeout (just updated) + if inst.ShouldTimeout() { + t.Error("Instance should not timeout immediately after request") + } + + // Advance time to trigger timeout + mockTime.SetTime(time.Now().Add(2 * time.Minute)) + + // Now it should timeout + if !inst.ShouldTimeout() { + t.Error("Instance should timeout after idle period") + } + + // Reset running state to avoid shutdown issues + inst.SetStatus(instance.Stopped) + + // Test that instance without timeout doesn't timeout + noTimeoutOptions := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + // No IdleTimeout set + } + + noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + noTimeoutInst.SetTimeProvider(mockTime) + noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check + noTimeoutInst.UpdateLastRequestTime() + + // Even with time advanced, should not timeout + if noTimeoutInst.ShouldTimeout() { + t.Error("Instance without timeout configuration should never timeout") + } + + // Reset running state to avoid shutdown issues + noTimeoutInst.SetStatus(instance.Stopped) +} + +func TestEvictLRUInstance_Success(t *testing.T) { + manager := createTestManager() + // Don't defer manager.Shutdown() - we'll handle cleanup manually + + // Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic) + options1 := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model1.gguf", + }, + IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0 + } + options2 := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model2.gguf", + }, + IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0 + } + options3 := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model3.gguf", + }, + IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0 + } + + inst1, err := manager.CreateInstance("instance-1", options1) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + inst2, err := manager.CreateInstance("instance-2", options2) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + inst3, err := manager.CreateInstance("instance-3", options3) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Set up mock time and set instances to running + mockTime := NewMockTimeProvider(time.Now()) + inst1.SetTimeProvider(mockTime) + inst2.SetTimeProvider(mockTime) + inst3.SetTimeProvider(mockTime) + + inst1.SetStatus(instance.Running) + inst2.SetStatus(instance.Running) + inst3.SetStatus(instance.Running) + + // Set different last request times (oldest to newest) + // inst1: oldest (will be evicted) + inst1.UpdateLastRequestTime() + + mockTime.SetTime(mockTime.Now().Add(1 * time.Minute)) + inst2.UpdateLastRequestTime() + + mockTime.SetTime(mockTime.Now().Add(1 * time.Minute)) + inst3.UpdateLastRequestTime() + + // Evict LRU instance (should be inst1) + err = manager.EvictLRUInstance() + if err != nil { + t.Fatalf("EvictLRUInstance failed: %v", err) + } + + // Verify inst1 is stopped + if inst1.IsRunning() { + t.Error("Expected instance-1 to be stopped after eviction") + } + + // Verify inst2 and inst3 are still running + if !inst2.IsRunning() { + t.Error("Expected instance-2 to still be running") + } + if !inst3.IsRunning() { + t.Error("Expected instance-3 to still be running") + } + + // Clean up manually - set all to stopped and then shutdown + inst2.SetStatus(instance.Stopped) + inst3.SetStatus(instance.Stopped) +} + +func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) { + // Helper function to create instances with different timeout configurations + createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process { + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: model, + }, + IdleTimeout: timeout, + } + inst, err := manager.CreateInstance(name, options) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + return inst + } + + t.Run("no running instances", func(t *testing.T) { + manager := createTestManager() + defer manager.Shutdown() + + err := manager.EvictLRUInstance() + if err == nil { + t.Error("Expected error when no running instances exist") + } + if err.Error() != "failed to find lru instance" { + t.Errorf("Expected 'failed to find lru instance' error, got: %v", err) + } + }) + + t.Run("only instances without timeout", func(t *testing.T) { + manager := createTestManager() + defer manager.Shutdown() + + // Create instances with various non-eligible timeout configurations + zeroTimeout := 0 + negativeTimeout := -1 + inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout) + inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout) + inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil) + + // Set instances to running + instances := []*instance.Process{inst1, inst2, inst3} + for _, inst := range instances { + inst.SetStatus(instance.Running) + } + defer func() { + // Reset instances to stopped to avoid shutdown panics + for _, inst := range instances { + inst.SetStatus(instance.Stopped) + } + }() + + // Try to evict - should fail because no eligible instances + err := manager.EvictLRUInstance() + if err == nil { + t.Error("Expected error when no eligible instances exist") + } + if err.Error() != "failed to find lru instance" { + t.Errorf("Expected 'failed to find lru instance' error, got: %v", err) + } + + // Verify all instances are still running + for i, inst := range instances { + if !inst.IsRunning() { + t.Errorf("Expected instance %d to still be running", i+1) + } + } + }) + + t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) { + manager := createTestManager() + defer manager.Shutdown() + + // Create mix of instances: some with timeout enabled, some disabled + validTimeout := 1 + zeroTimeout := 0 + instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout) + instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout) + instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil) + + // Set all instances to running + instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2} + for _, inst := range instances { + inst.SetStatus(instance.Running) + inst.UpdateLastRequestTime() + } + defer func() { + // Reset instances to stopped to avoid shutdown panics + for _, inst := range instances { + if inst.IsRunning() { + inst.SetStatus(instance.Stopped) + } + } + }() + + // Evict LRU instance - should only consider the one with timeout + err := manager.EvictLRUInstance() + if err != nil { + t.Fatalf("EvictLRUInstance failed: %v", err) + } + + // Verify only the instance with timeout was evicted + if instWithTimeout.IsRunning() { + t.Error("Expected with-timeout instance to be stopped after eviction") + } + if !instNoTimeout1.IsRunning() { + t.Error("Expected no-timeout-1 instance to still be running") + } + if !instNoTimeout2.IsRunning() { + t.Error("Expected no-timeout-2 instance to still be running") + } + }) +} + +// Helper for timeout tests +type MockTimeProvider struct { + currentTime time.Time + mu sync.RWMutex +} + +func NewMockTimeProvider(t time.Time) *MockTimeProvider { + return &MockTimeProvider{currentTime: t} +} + +func (m *MockTimeProvider) Now() time.Time { + m.mu.RLock() + defer m.mu.RUnlock() + return m.currentTime +} + +func (m *MockTimeProvider) SetTime(t time.Time) { + m.mu.Lock() + defer m.mu.Unlock() + m.currentTime = t +} diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 7fe2224..36918ba 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -581,23 +581,36 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { } if !inst.IsRunning() { - if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart { - // If on-demand start is enabled, start the instance - if _, err := h.InstanceManager.StartInstance(modelName); err != nil { - http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) - return - } - - // Wait for the instance to become healthy before proceeding - if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout - http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) - return - } - - } else { + allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart + if !allowOnDemand { http.Error(w, "Instance is not running", http.StatusServiceUnavailable) return } + + if h.InstanceManager.IsMaxRunningInstancesReached() { + if h.cfg.Instances.EnableLRUEviction { + err := h.InstanceManager.EvictLRUInstance() + if err != nil { + http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) + return + } + } else { + http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict) + return + } + } + + // If on-demand start is enabled, start the instance + if _, err := h.InstanceManager.StartInstance(modelName); err != nil { + http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) + return + } + + // Wait for the instance to become healthy before proceeding + if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout + http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) + return + } } proxy, err := inst.GetProxy()