From 78eda77e443a07c0ae622682f0133519d6ad6fcb Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 22:49:28 +0200 Subject: [PATCH] Enhance timeout handling in InstanceManager with goroutine recovery and shutdown support --- pkg/manager/manager.go | 32 +- pkg/manager/manager_test.go | 832 +++++++++--------------------------- 2 files changed, 235 insertions(+), 629 deletions(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 12d0e77..cf65227 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -35,16 +35,21 @@ type instanceManager struct { // Timeout checker timeoutChecker *time.Ticker + shutdownChan chan struct{} } // NewInstanceManager creates a new instance of InstanceManager. func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager { + if instancesConfig.TimeoutCheckInterval <= 0 { + instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set + } im := &instanceManager{ instances: make(map[string]*instance.Process), ports: make(map[int]bool), instancesConfig: instancesConfig, timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), + shutdownChan: make(chan struct{}), } // Load existing instances from disk @@ -52,9 +57,24 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager log.Printf("Error loading instances: %v", err) } + // Start the timeout checker goroutine after initialization is complete go func() { - for range im.timeoutChecker.C { - im.checkAllTimeouts() + defer func() { + if r := recover(); r != nil { + log.Printf("Timeout checker goroutine recovered from panic: %v", r) + } + }() + + for { + select { + case <-im.timeoutChecker.C: + // Check if manager is still valid before proceeding + if im != nil { + im.checkAllTimeouts() + } + case <-im.shutdownChan: + return // Exit goroutine on shutdown + } } }() @@ -113,6 +133,14 @@ func (im *instanceManager) Shutdown() { im.timeoutChecker = nil } + // Signal the shutdown channel to exit the goroutine + select { + case <-im.shutdownChan: + // Channel already closed + default: + close(im.shutdownChan) + } + var wg sync.WaitGroup wg.Add(len(im.instances)) diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 00b9f48..e14286f 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1,16 +1,17 @@ package manager_test import ( - "encoding/json" + "fmt" "llamactl/pkg/backends/llamacpp" "llamactl/pkg/config" "llamactl/pkg/instance" "llamactl/pkg/manager" "os" "path/filepath" - "reflect" "strings" + "sync" "testing" + "time" ) func TestNewInstanceManager(t *testing.T) { @@ -66,397 +67,139 @@ func TestCreateInstance_Success(t *testing.T) { } } -func TestCreateInstance_DuplicateName(t *testing.T) { - manager := createTestManager() - - options1 := &instance.CreateInstanceOptions{ +func TestCreateInstance_ValidationAndLimits(t *testing.T) { + // Test duplicate names + mngr := createTestManager() + options := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", }, } - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create first instance - _, err := manager.CreateInstance("test-instance", options1) + _, err := mngr.CreateInstance("test-instance", options) if err != nil { t.Fatalf("First CreateInstance failed: %v", err) } // Try to create duplicate - _, err = manager.CreateInstance("test-instance", options2) + _, err = mngr.CreateInstance("test-instance", options) if err == nil { t.Error("Expected error for duplicate instance name") } if !strings.Contains(err.Error(), "already exists") { t.Errorf("Expected duplicate name error, got: %v", err) } -} -func TestCreateInstance_MaxInstancesLimit(t *testing.T) { - // Create manager with low max instances limit + // Test max instances limit cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, - MaxInstances: 2, // Very low limit for testing + MaxInstances: 1, // Very low limit for testing TimeoutCheckInterval: 5, } - manager := manager.NewInstanceManager(cfg) + limitedManager := manager.NewInstanceManager(cfg) - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options3 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create instances up to the limit - _, err := manager.CreateInstance("instance1", options1) + _, err = limitedManager.CreateInstance("instance1", options) if err != nil { t.Fatalf("CreateInstance 1 failed: %v", err) } - _, err = manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - // This should fail due to max instances limit - _, err = manager.CreateInstance("instance3", options3) + _, err = limitedManager.CreateInstance("instance2", options) if err == nil { t.Error("Expected error when exceeding max instances limit") } - if !strings.Contains(err.Error(), "maximum number of instances") && !strings.Contains(err.Error(), "limit") { + if !strings.Contains(err.Error(), "maximum number of instances") { t.Errorf("Expected max instances error, got: %v", err) } } -func TestCreateInstance_PortAssignment(t *testing.T) { +func TestPortManagement(t *testing.T) { manager := createTestManager() - // Create instance without specifying port - options := &instance.CreateInstanceOptions{ + // Test auto port assignment + options1 := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", }, } - inst, err := manager.CreateInstance("test-instance", options) + inst1, err := manager.CreateInstance("instance1", options1) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Should auto-assign a port in the range - port := inst.GetOptions().Port - if port < 8000 || port > 9000 { - t.Errorf("Expected port in range 8000-9000, got %d", port) - } -} - -func TestCreateInstance_PortConflictDetection(t *testing.T) { - manager := createTestManager() - - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, // Explicit port - }, + port1 := inst1.GetOptions().Port + if port1 < 8000 || port1 > 9000 { + t.Errorf("Expected port in range 8000-9000, got %d", port1) } + // Test port conflict detection options2 := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model2.gguf", - Port: 8080, // Same port - should conflict + Port: port1, // Same port - should conflict }, } - // Create first instance - _, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - // Try to create second instance with same port _, err = manager.CreateInstance("instance2", options2) if err == nil { t.Error("Expected error for port conflict") } - if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "conflict") && !strings.Contains(err.Error(), "in use") { + if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") { t.Errorf("Expected port conflict error, got: %v", err) } -} - -func TestCreateInstance_MultiplePortAssignment(t *testing.T) { - manager := createTestManager() - - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create multiple instances and verify they get different ports - instance1, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - instance2, err := manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - - port1 := instance1.GetOptions().Port - port2 := instance2.GetOptions().Port - - if port1 == port2 { - t.Errorf("Expected different ports, both got %d", port1) - } -} - -func TestCreateInstance_PortExhaustion(t *testing.T) { - // Create manager with very small port range - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 8001}, // Only 2 ports available - MaxInstances: 10, // Higher than available ports - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } + // Test port release on deletion + specificPort := 8080 options3 := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", + Port: specificPort, }, } - // Create instances to exhaust all ports - _, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - _, err = manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - - // This should fail due to port exhaustion - _, err = manager.CreateInstance("instance3", options3) - if err == nil { - t.Error("Expected error when ports are exhausted") - } - if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "available") { - t.Errorf("Expected port exhaustion error, got: %v", err) - } -} - -func TestDeleteInstance_PortRelease(t *testing.T) { - manager := createTestManager() - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - - // Create instance with specific port - _, err := manager.CreateInstance("test-instance", options) + _, err = manager.CreateInstance("port-test", options3) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Delete the instance - err = manager.DeleteInstance("test-instance") + err = manager.DeleteInstance("port-test") if err != nil { t.Fatalf("DeleteInstance failed: %v", err) } // Should be able to create new instance with same port - _, err = manager.CreateInstance("new-instance", options) + _, err = manager.CreateInstance("new-port-test", options3) if err != nil { t.Errorf("Expected to reuse port after deletion, got error: %v", err) } } -func TestGetInstance_Success(t *testing.T) { +func TestInstanceOperations(t *testing.T) { manager := createTestManager() - // Create an instance first options := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", }, } + + // Create instance created, err := manager.CreateInstance("test-instance", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Retrieve it + // Get instance retrieved, err := manager.GetInstance("test-instance") if err != nil { t.Fatalf("GetInstance failed: %v", err) } - if retrieved.Name != created.Name { t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name) } -} -func TestGetInstance_NotFound(t *testing.T) { - manager := createTestManager() - - _, err := manager.GetInstance("nonexistent") - if err == nil { - t.Error("Expected error for nonexistent instance") - } - if !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } -} - -func TestListInstances(t *testing.T) { - manager := createTestManager() - - // Initially empty - instances, err := manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) - } - if len(instances) != 0 { - t.Errorf("Expected 0 instances, got %d", len(instances)) - } - - // Create some instances - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - _, err = manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - _, err = manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - - // List should return both - instances, err = manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) - } - if len(instances) != 2 { - t.Errorf("Expected 2 instances, got %d", len(instances)) - } - - // Check names are present - names := make(map[string]bool) - for _, inst := range instances { - names[inst.Name] = true - } - if !names["instance1"] || !names["instance2"] { - t.Error("Expected both instance1 and instance2 in list") - } -} - -func TestDeleteInstance_Success(t *testing.T) { - manager := createTestManager() - - // Create an instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Delete it - err = manager.DeleteInstance("test-instance") - if err != nil { - t.Fatalf("DeleteInstance failed: %v", err) - } - - // Should no longer exist - _, err = manager.GetInstance("test-instance") - if err == nil { - t.Error("Instance should not exist after deletion") - } -} - -func TestDeleteInstance_NotFound(t *testing.T) { - manager := createTestManager() - - err := manager.DeleteInstance("nonexistent") - if err == nil { - t.Error("Expected error for deleting nonexistent instance") - } - if !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } -} - -func TestUpdateInstance_Success(t *testing.T) { - manager := createTestManager() - - // Create an instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Update it + // Update instance newOptions := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/new-model.gguf", @@ -468,35 +211,48 @@ func TestUpdateInstance_Success(t *testing.T) { if err != nil { t.Fatalf("UpdateInstance failed: %v", err) } - if updated.GetOptions().Model != "/path/to/new-model.gguf" { t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model) } - if updated.GetOptions().Port != 8081 { - t.Errorf("Expected port 8081, got %d", updated.GetOptions().Port) + + // List instances + instances, err := manager.ListInstances() + if err != nil { + t.Fatalf("ListInstances failed: %v", err) } -} - -func TestUpdateInstance_NotFound(t *testing.T) { - manager := createTestManager() - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, + if len(instances) != 1 { + t.Errorf("Expected 1 instance, got %d", len(instances)) } - _, err := manager.UpdateInstance("nonexistent", options) + // Delete instance + err = manager.DeleteInstance("test-instance") + if err != nil { + t.Fatalf("DeleteInstance failed: %v", err) + } + + _, err = manager.GetInstance("test-instance") if err == nil { - t.Error("Expected error for updating nonexistent instance") + t.Error("Instance should not exist after deletion") } - if !strings.Contains(err.Error(), "not found") { + + // Test operations on non-existent instances + _, err = manager.GetInstance("nonexistent") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } + + err = manager.DeleteInstance("nonexistent") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } + + _, err = manager.UpdateInstance("nonexistent", options) + if err == nil || !strings.Contains(err.Error(), "not found") { t.Errorf("Expected 'not found' error, got: %v", err) } } -func TestPersistence_InstancePersistedOnCreation(t *testing.T) { - // Create temporary directory for persistence +func TestPersistence(t *testing.T) { tempDir := t.TempDir() cfg := config.InstancesConfig{ @@ -505,8 +261,9 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { MaxInstances: 10, TimeoutCheckInterval: 5, } - manager := manager.NewInstanceManager(cfg) + // Test instance persistence on creation + manager1 := manager.NewInstanceManager(cfg) options := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", @@ -514,8 +271,7 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { }, } - // Create instance - _, err := manager.CreateInstance("test-instance", options) + _, err := manager1.CreateInstance("test-instance", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } @@ -526,366 +282,166 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { t.Errorf("Expected persistence file %s to exist", expectedPath) } - // Verify file contains correct data - data, err := os.ReadFile(expectedPath) + // Test loading instances from disk + manager2 := manager.NewInstanceManager(cfg) + instances, err := manager2.ListInstances() if err != nil { - t.Fatalf("Failed to read persistence file: %v", err) + t.Fatalf("ListInstances failed: %v", err) + } + if len(instances) != 1 { + t.Fatalf("Expected 1 loaded instance, got %d", len(instances)) + } + if instances[0].Name != "test-instance" { + t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name) } - var persistedInstance map[string]interface{} - if err := json.Unmarshal(data, &persistedInstance); err != nil { - t.Fatalf("Failed to unmarshal persisted data: %v", err) + // Test port map populated from loaded instances (port conflict should be detected) + _, err = manager2.CreateInstance("new-instance", options) // Same port + if err == nil || !strings.Contains(err.Error(), "port") { + t.Errorf("Expected port conflict error, got: %v", err) } - if persistedInstance["name"] != "test-instance" { - t.Errorf("Expected name 'test-instance', got %v", persistedInstance["name"]) - } -} - -func TestPersistence_InstancePersistedOnUpdate(t *testing.T) { - tempDir := t.TempDir() - - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - // Create instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Update instance - newOptions := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/new-model.gguf", - Port: 8081, - }, - } - _, err = manager.UpdateInstance("test-instance", newOptions) - if err != nil { - t.Fatalf("UpdateInstance failed: %v", err) - } - - // Verify persistence file was updated - expectedPath := filepath.Join(tempDir, "test-instance.json") - data, err := os.ReadFile(expectedPath) - if err != nil { - t.Fatalf("Failed to read persistence file: %v", err) - } - - var persistedInstance map[string]interface{} - if err := json.Unmarshal(data, &persistedInstance); err != nil { - t.Fatalf("Failed to unmarshal persisted data: %v", err) - } - - // Check that the options were updated - options_data, ok := persistedInstance["options"].(map[string]interface{}) - if !ok { - t.Fatal("Expected options to be present in persisted data") - } - - if options_data["model"] != "/path/to/new-model.gguf" { - t.Errorf("Expected updated model '/path/to/new-model.gguf', got %v", options_data["model"]) - } -} - -func TestPersistence_InstanceFileDeletedOnDeletion(t *testing.T) { - tempDir := t.TempDir() - - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - // Create instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - expectedPath := filepath.Join(tempDir, "test-instance.json") - - // Verify file exists - if _, err := os.Stat(expectedPath); os.IsNotExist(err) { - t.Fatal("Expected persistence file to exist before deletion") - } - - // Delete instance - err = manager.DeleteInstance("test-instance") + // Test file deletion on instance deletion + err = manager2.DeleteInstance("test-instance") if err != nil { t.Fatalf("DeleteInstance failed: %v", err) } - // Verify file was deleted if _, err := os.Stat(expectedPath); !os.IsNotExist(err) { t.Error("Expected persistence file to be deleted") } } -func TestPersistence_InstancesLoadedFromDisk(t *testing.T) { - tempDir := t.TempDir() - - // Create JSON files manually (simulating previous run) - instance1JSON := `{ - "name": "instance1", - "running": false, - "options": { - "model": "/path/to/model1.gguf", - "port": 8080 - } - }` - - instance2JSON := `{ - "name": "instance2", - "running": false, - "options": { - "model": "/path/to/model2.gguf", - "port": 8081 - } - }` - - // Write JSON files - err := os.WriteFile(filepath.Join(tempDir, "instance1.json"), []byte(instance1JSON), 0644) - if err != nil { - t.Fatalf("Failed to write test JSON file: %v", err) - } - - err = os.WriteFile(filepath.Join(tempDir, "instance2.json"), []byte(instance2JSON), 0644) - if err != nil { - t.Fatalf("Failed to write test JSON file: %v", err) - } - - // Create manager - should load instances from disk +func TestTimeoutFunctionality(t *testing.T) { + // Test timeout checker initialization cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, + TimeoutCheckInterval: 10, + MaxInstances: 5, } + manager := manager.NewInstanceManager(cfg) - - // Verify instances were loaded - instances, err := manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) + if manager == nil { + t.Fatal("Manager should be initialized with timeout checker") } + manager.Shutdown() // Clean up - if len(instances) != 2 { - t.Fatalf("Expected 2 loaded instances, got %d", len(instances)) - } + // Test timeout behavior with actual timeout logic + testManager := createTestManager() + defer testManager.Shutdown() - // Check instances by name - instancesByName := make(map[string]*instance.Process) - for _, inst := range instances { - instancesByName[inst.Name] = inst - } - - instance1, exists := instancesByName["instance1"] - if !exists { - t.Error("Expected instance1 to be loaded") - } else { - if instance1.GetOptions().Model != "/path/to/model1.gguf" { - t.Errorf("Expected model '/path/to/model1.gguf', got %q", instance1.GetOptions().Model) - } - if instance1.GetOptions().Port != 8080 { - t.Errorf("Expected port 8080, got %d", instance1.GetOptions().Port) - } - } - - instance2, exists := instancesByName["instance2"] - if !exists { - t.Error("Expected instance2 to be loaded") - } else { - if instance2.GetOptions().Model != "/path/to/model2.gguf" { - t.Errorf("Expected model '/path/to/model2.gguf', got %q", instance2.GetOptions().Model) - } - if instance2.GetOptions().Port != 8081 { - t.Errorf("Expected port 8081, got %d", instance2.GetOptions().Port) - } - } -} - -func TestPersistence_PortMapPopulatedFromLoadedInstances(t *testing.T) { - tempDir := t.TempDir() - - // Create JSON file with specific port - instanceJSON := `{ - "name": "test-instance", - "running": false, - "options": { - "model": "/path/to/model.gguf", - "port": 8080 - } - }` - - err := os.WriteFile(filepath.Join(tempDir, "test-instance.json"), []byte(instanceJSON), 0644) - if err != nil { - t.Fatalf("Failed to write test JSON file: %v", err) - } - - // Create manager - should load instance and register port - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - // Try to create new instance with same port - should fail due to conflict + idleTimeout := 1 // 1 minute options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model2.gguf", - Port: 8080, // Same port as loaded instance + Model: "/path/to/model.gguf", }, } - _, err = manager.CreateInstance("new-instance", options) - if err == nil { - t.Error("Expected error for port conflict with loaded instance") - } - if !strings.Contains(err.Error(), "port") || !strings.Contains(err.Error(), "in use") { - t.Errorf("Expected port conflict error, got: %v", err) - } -} - -func TestPersistence_CompleteInstanceDataRoundTrip(t *testing.T) { - tempDir := t.TempDir() - - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, - TimeoutCheckInterval: 5, - } - - // Create first manager and instance with comprehensive options - manager1 := manager.NewInstanceManager(cfg) - - autoRestart := false - maxRestarts := 10 - restartDelay := 30 - - originalOptions := &instance.CreateInstanceOptions{ - AutoRestart: &autoRestart, - MaxRestarts: &maxRestarts, - RestartDelay: &restartDelay, - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - Host: "localhost", - CtxSize: 4096, - GPULayers: 32, - Temperature: 0.7, - TopK: 40, - TopP: 0.9, - Verbose: true, - FlashAttn: false, - Lora: []string{"adapter1.bin", "adapter2.bin"}, - HFRepo: "microsoft/DialoGPT-medium", - }, - } - - originalInstance, err := manager1.CreateInstance("roundtrip-test", originalOptions) + inst, err := testManager.CreateInstance("timeout-test", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Create second manager (simulating restart) - should load the instance - manager2 := manager.NewInstanceManager(cfg) - - loadedInstance, err := manager2.GetInstance("roundtrip-test") + _, err = testManager.StartInstance("timeout-test") if err != nil { - t.Fatalf("GetInstance failed after reload: %v", err) + t.Fatalf("StartInstance failed: %v", err) } - // Compare all data - if loadedInstance.Name != originalInstance.Name { - t.Errorf("Name mismatch: original=%q, loaded=%q", originalInstance.Name, loadedInstance.Name) + // Create a mock time provider to simulate timeout + mockTime := NewMockTimeProvider(time.Now()) + inst.SetTimeProvider(mockTime) + inst.UpdateLastRequestTime() + + // Advance time to trigger timeout + mockTime.SetTime(time.Now().Add(2 * time.Minute)) + + // Verify the instance should timeout + if !inst.ShouldTimeout() { + t.Fatal("Instance should be configured to timeout") } - originalOpts := originalInstance.GetOptions() - loadedOpts := loadedInstance.GetOptions() - - // Compare restart options - if *loadedOpts.AutoRestart != *originalOpts.AutoRestart { - t.Errorf("AutoRestart mismatch: original=%v, loaded=%v", *originalOpts.AutoRestart, *loadedOpts.AutoRestart) - } - if *loadedOpts.MaxRestarts != *originalOpts.MaxRestarts { - t.Errorf("MaxRestarts mismatch: original=%v, loaded=%v", *originalOpts.MaxRestarts, *loadedOpts.MaxRestarts) - } - if *loadedOpts.RestartDelay != *originalOpts.RestartDelay { - t.Errorf("RestartDelay mismatch: original=%v, loaded=%v", *originalOpts.RestartDelay, *loadedOpts.RestartDelay) + // Test stopping timed out instance + _, err = testManager.StopInstance("timeout-test") + if err != nil { + t.Fatalf("StopInstance failed: %v", err) } - // Compare llama server options - if loadedOpts.Model != originalOpts.Model { - t.Errorf("Model mismatch: original=%q, loaded=%q", originalOpts.Model, loadedOpts.Model) + stoppedInst, err := testManager.GetInstance("timeout-test") + if err != nil { + t.Fatalf("GetInstance failed: %v", err) } - if loadedOpts.Port != originalOpts.Port { - t.Errorf("Port mismatch: original=%d, loaded=%d", originalOpts.Port, loadedOpts.Port) + if stoppedInst.Running { + t.Error("Instance should not be running after timeout stop") } - if loadedOpts.Host != originalOpts.Host { - t.Errorf("Host mismatch: original=%q, loaded=%q", originalOpts.Host, loadedOpts.Host) - } - if loadedOpts.CtxSize != originalOpts.CtxSize { - t.Errorf("CtxSize mismatch: original=%d, loaded=%d", originalOpts.CtxSize, loadedOpts.CtxSize) - } - if loadedOpts.GPULayers != originalOpts.GPULayers { - t.Errorf("GPULayers mismatch: original=%d, loaded=%d", originalOpts.GPULayers, loadedOpts.GPULayers) - } - if loadedOpts.Temperature != originalOpts.Temperature { - t.Errorf("Temperature mismatch: original=%f, loaded=%f", originalOpts.Temperature, loadedOpts.Temperature) - } - if loadedOpts.TopK != originalOpts.TopK { - t.Errorf("TopK mismatch: original=%d, loaded=%d", originalOpts.TopK, loadedOpts.TopK) - } - if loadedOpts.TopP != originalOpts.TopP { - t.Errorf("TopP mismatch: original=%f, loaded=%f", originalOpts.TopP, loadedOpts.TopP) - } - if loadedOpts.Verbose != originalOpts.Verbose { - t.Errorf("Verbose mismatch: original=%v, loaded=%v", originalOpts.Verbose, loadedOpts.Verbose) - } - if loadedOpts.FlashAttn != originalOpts.FlashAttn { - t.Errorf("FlashAttn mismatch: original=%v, loaded=%v", originalOpts.FlashAttn, loadedOpts.FlashAttn) - } - if loadedOpts.HFRepo != originalOpts.HFRepo { - t.Errorf("HFRepo mismatch: original=%q, loaded=%q", originalOpts.HFRepo, loadedOpts.HFRepo) +} + +func TestConcurrentAccess(t *testing.T) { + manager := createTestManager() + defer manager.Shutdown() + + // Test concurrent operations + var wg sync.WaitGroup + errChan := make(chan error, 10) + + // Concurrent instance creation + for i := 0; i < 5; i++ { + wg.Add(1) + go func(index int) { + defer wg.Done() + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + instanceName := fmt.Sprintf("concurrent-test-%d", index) + if _, err := manager.CreateInstance(instanceName, options); err != nil { + errChan <- err + } + }(i) } - // Compare slice fields - if !reflect.DeepEqual(loadedOpts.Lora, originalOpts.Lora) { - t.Errorf("Lora mismatch: original=%v, loaded=%v", originalOpts.Lora, loadedOpts.Lora) + // Concurrent list operations + for i := 0; i < 3; i++ { + wg.Add(1) + go func() { + defer wg.Done() + if _, err := manager.ListInstances(); err != nil { + errChan <- err + } + }() } - // Verify created timestamp is preserved - if loadedInstance.Created != originalInstance.Created { - t.Errorf("Created timestamp mismatch: original=%d, loaded=%d", originalInstance.Created, loadedInstance.Created) + wg.Wait() + close(errChan) + + // Check for any errors during concurrent access + for err := range errChan { + t.Errorf("Concurrent access error: %v", err) } } +func TestShutdown(t *testing.T) { + manager := createTestManager() + + // Create test instance + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + _, err := manager.CreateInstance("test-instance", options) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Shutdown should not panic + manager.Shutdown() + + // Multiple shutdowns should not panic + manager.Shutdown() +} + // Helper function to create a test manager with standard config func createTestManager() manager.InstanceManager { cfg := config.InstancesConfig{ @@ -900,3 +456,25 @@ func createTestManager() manager.InstanceManager { } return manager.NewInstanceManager(cfg) } + +// Helper for timeout tests +type MockTimeProvider struct { + currentTime time.Time + mu sync.RWMutex +} + +func NewMockTimeProvider(t time.Time) *MockTimeProvider { + return &MockTimeProvider{currentTime: t} +} + +func (m *MockTimeProvider) Now() time.Time { + m.mu.RLock() + defer m.mu.RUnlock() + return m.currentTime +} + +func (m *MockTimeProvider) SetTime(t time.Time) { + m.mu.Lock() + defer m.mu.Unlock() + m.currentTime = t +}