From 902be409d5533ad24e6deaccc5d92432e6e655ca Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 19:06:09 +0200 Subject: [PATCH 01/15] Add IdleTimeout option to CreateInstanceOptions and update JSON handling --- pkg/instance/instance.go | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index dd4a45f..2f795a1 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -18,11 +18,12 @@ import ( type CreateInstanceOptions struct { // Auto restart - AutoRestart *bool `json:"auto_restart,omitempty"` - MaxRestarts *int `json:"max_restarts,omitempty"` - // RestartDelay duration in seconds - RestartDelay *int `json:"restart_delay_seconds,omitempty"` - + AutoRestart *bool `json:"auto_restart,omitempty"` + MaxRestarts *int `json:"max_restarts,omitempty"` + RestartDelay *int `json:"restart_delay,omitempty"` + // Timeout + IdleTimeout *int `json:"idle_timeout,omitempty"` + // LlamaServerOptions contains the options for the llama server llamacpp.LlamaServerOptions `json:",inline"` } @@ -34,7 +35,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error { type tempCreateOptions struct { AutoRestart *bool `json:"auto_restart,omitempty"` MaxRestarts *int `json:"max_restarts,omitempty"` - RestartDelay *int `json:"restart_delay_seconds,omitempty"` + RestartDelay *int `json:"restart_delay,omitempty"` + IdleTimeout *int `json:"idle_timeout,omitempty"` } var temp tempCreateOptions @@ -46,6 +48,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error { c.AutoRestart = temp.AutoRestart c.MaxRestarts = temp.MaxRestarts c.RestartDelay = temp.RestartDelay + c.IdleTimeout = temp.IdleTimeout // Now unmarshal the embedded LlamaServerOptions if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil { @@ -117,6 +120,15 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create } optionsCopy.RestartDelay = &restartDelay } + + if options.IdleTimeout != nil { + idleTimeout := *options.IdleTimeout + if idleTimeout < 0 { + log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 seconds", name, idleTimeout) + idleTimeout = 0 + } + optionsCopy.IdleTimeout = &idleTimeout + } } return optionsCopy @@ -142,6 +154,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config. defaultRestartDelay := globalSettings.DefaultRestartDelay options.RestartDelay = &defaultRestartDelay } + + if options.IdleTimeout == nil { + defaultIdleTimeout := 0 // Default to 0 seconds if not set + options.IdleTimeout = &defaultIdleTimeout + } } // NewInstance creates a new instance with the given name, log path, and options From ccffbca6b23fa8ae5c8473ab6a628826ef263c41 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 19:26:21 +0200 Subject: [PATCH 02/15] Add timeout check interval and update instance configuration --- README.md | 22 ++++++++++++---------- pkg/config/config.go | 29 +++++++++++++++++++---------- pkg/instance/instance.go | 4 ++-- pkg/instance/timeout.go | 1 + 4 files changed, 34 insertions(+), 22 deletions(-) create mode 100644 pkg/instance/timeout.go diff --git a/README.md b/README.md index e944e5f..697a384 100644 --- a/README.md +++ b/README.md @@ -172,16 +172,17 @@ server: ```yaml instances: - port_range: [8000, 9000] # Port range for instances (default: [8000, 9000]) - data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS) - configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances) - logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs) - auto_create_dirs: true # Automatically create data/config/logs directories (default: true) - max_instances: -1 # Maximum instances (-1 = unlimited) - llama_executable: "llama-server" # Path to llama-server executable - default_auto_restart: true # Default auto-restart setting - default_max_restarts: 3 # Default maximum restart attempts - default_restart_delay: 5 # Default restart delay in seconds + port_range: [8000, 9000] # Port range for instances (default: [8000, 9000]) + data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS) + configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances) + logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs) + auto_create_dirs: true # Automatically create data/config/logs directories (default: true) + max_instances: -1 # Maximum instances (-1 = unlimited) + llama_executable: "llama-server" # Path to llama-server executable + default_auto_restart: true # Default auto-restart setting + default_max_restarts: 3 # Default maximum restart attempts + default_restart_delay: 5 # Default restart delay in seconds + timeout_check_interval: 5 # Default instance timeout check interval in minutes ``` **Environment Variables:** @@ -195,6 +196,7 @@ instances: - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false) - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds +- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes #### Authentication Configuration diff --git a/pkg/config/config.go b/pkg/config/config.go index fd33715..386a708 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -66,6 +66,9 @@ type InstancesConfig struct { // Default restart delay for new instances (in seconds) DefaultRestartDelay int `yaml:"default_restart_delay"` + + // Interval for checking instance timeouts (in minutes) + TimeoutCheckInterval int `yaml:"timeout_check_interval"` } // AuthConfig contains authentication settings @@ -98,16 +101,17 @@ func LoadConfig(configPath string) (AppConfig, error) { EnableSwagger: false, }, Instances: InstancesConfig{ - PortRange: [2]int{8000, 9000}, - DataDir: getDefaultDataDirectory(), - InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"), - LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), - AutoCreateDirs: true, - MaxInstances: -1, // -1 means unlimited - LlamaExecutable: "llama-server", - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, + PortRange: [2]int{8000, 9000}, + DataDir: getDefaultDataDirectory(), + InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"), + LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), + AutoCreateDirs: true, + MaxInstances: -1, // -1 means unlimited + LlamaExecutable: "llama-server", + DefaultAutoRestart: true, + DefaultMaxRestarts: 3, + DefaultRestartDelay: 5, + TimeoutCheckInterval: 5, // Check timeouts every 5 minutes }, Auth: AuthConfig{ RequireInferenceAuth: true, @@ -217,6 +221,11 @@ func loadEnvVars(cfg *AppConfig) { cfg.Instances.DefaultRestartDelay = seconds } } + if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" { + if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil { + cfg.Instances.TimeoutCheckInterval = minutes + } + } // Auth config if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" { if b, err := strconv.ParseBool(requireInferenceAuth); err == nil { diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index 2f795a1..7d5d7c5 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -124,7 +124,7 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create if options.IdleTimeout != nil { idleTimeout := *options.IdleTimeout if idleTimeout < 0 { - log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 seconds", name, idleTimeout) + log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout) idleTimeout = 0 } optionsCopy.IdleTimeout = &idleTimeout @@ -156,7 +156,7 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config. } if options.IdleTimeout == nil { - defaultIdleTimeout := 0 // Default to 0 seconds if not set + defaultIdleTimeout := 0 options.IdleTimeout = &defaultIdleTimeout } } diff --git a/pkg/instance/timeout.go b/pkg/instance/timeout.go new file mode 100644 index 0000000..1807170 --- /dev/null +++ b/pkg/instance/timeout.go @@ -0,0 +1 @@ +package instance From e4e7a82294659c9b385db860c12413a1ca8f2e64 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 19:44:57 +0200 Subject: [PATCH 03/15] Implement last request time tracking for instance management --- pkg/instance/instance.go | 4 ++++ pkg/instance/lifecycle.go | 9 +++++++++ pkg/instance/timeout.go | 1 - pkg/server/handlers.go | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) delete mode 100644 pkg/instance/timeout.go diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index 7d5d7c5..b167fec 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -13,6 +13,7 @@ import ( "net/url" "os/exec" "sync" + "sync/atomic" "time" ) @@ -86,6 +87,9 @@ type Process struct { // Restart control restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion + + // Timeout management + lastRequestTime atomic.Int64 // Unix timestamp of last request } // validateAndCopyOptions validates and creates a deep copy of the provided options diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 1442b6e..58b52eb 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -140,6 +140,15 @@ func (i *Process) Stop() error { return nil } +// UpdateLastRequestTime updates the last request access time for the instance via proxy +func (i *Process) UpdateLastRequestTime() { + i.mu.Lock() + defer i.mu.Unlock() + + lastRequestTime := time.Now().Unix() + i.lastRequestTime.Store(lastRequestTime) +} + func (i *Process) monitorProcess() { defer func() { i.mu.Lock() diff --git a/pkg/instance/timeout.go b/pkg/instance/timeout.go deleted file mode 100644 index 1807170..0000000 --- a/pkg/instance/timeout.go +++ /dev/null @@ -1 +0,0 @@ -package instance diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 0bb5285..0d473e7 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -472,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc { proxyPath = "/" + proxyPath } + // Update the last request time for the instance + inst.UpdateLastRequestTime() + // Modify the request to remove the proxy prefix originalPath := r.URL.Path r.URL.Path = proxyPath @@ -582,6 +585,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { return } + // Update last request time for the instance + inst.UpdateLastRequestTime() + // Recreate the request body from the bytes we read r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) r.ContentLength = int64(len(bodyBytes)) From c734bcae4ad36374d8f75101032d427e3e5b6aaf Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 20:37:20 +0200 Subject: [PATCH 04/15] Move UpdateLastRequestTime method to timeout.go and add ShouldTimeout method for idle timeout handling --- pkg/instance/lifecycle.go | 9 --------- pkg/instance/timeout.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 pkg/instance/timeout.go diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 58b52eb..1442b6e 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -140,15 +140,6 @@ func (i *Process) Stop() error { return nil } -// UpdateLastRequestTime updates the last request access time for the instance via proxy -func (i *Process) UpdateLastRequestTime() { - i.mu.Lock() - defer i.mu.Unlock() - - lastRequestTime := time.Now().Unix() - i.lastRequestTime.Store(lastRequestTime) -} - func (i *Process) monitorProcess() { defer func() { i.mu.Lock() diff --git a/pkg/instance/timeout.go b/pkg/instance/timeout.go new file mode 100644 index 0000000..fd9eeb5 --- /dev/null +++ b/pkg/instance/timeout.go @@ -0,0 +1,28 @@ +package instance + +import "time" + +// UpdateLastRequestTime updates the last request access time for the instance via proxy +func (i *Process) UpdateLastRequestTime() { + i.mu.Lock() + defer i.mu.Unlock() + + lastRequestTime := time.Now().Unix() + i.lastRequestTime.Store(lastRequestTime) +} + +func (i *Process) ShouldTimeout() bool { + i.mu.RLock() + defer i.mu.RUnlock() + + // If idle timeout is not set, no timeout + if i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 { + return false + } + + // Check if the last request time exceeds the idle timeout + lastRequest := i.lastRequestTime.Load() + idleTimeout := *i.options.IdleTimeout + + return (time.Now().Unix() - lastRequest) > int64(idleTimeout) +} From 5e3a28398dbb6de3d743a3a0c9bd43a03cdfbf94 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 21:10:48 +0200 Subject: [PATCH 05/15] Implement periodic timeout checking for instances --- pkg/instance/timeout.go | 3 +-- pkg/manager/manager.go | 19 +++++++++++++++++++ pkg/manager/timeout.go | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 pkg/manager/timeout.go diff --git a/pkg/instance/timeout.go b/pkg/instance/timeout.go index fd9eeb5..4b7a8c8 100644 --- a/pkg/instance/timeout.go +++ b/pkg/instance/timeout.go @@ -15,8 +15,7 @@ func (i *Process) ShouldTimeout() bool { i.mu.RLock() defer i.mu.RUnlock() - // If idle timeout is not set, no timeout - if i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 { + if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 { return false } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 43b1fea..12d0e77 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -10,6 +10,7 @@ import ( "path/filepath" "strings" "sync" + "time" ) // InstanceManager defines the interface for managing instances of the llama server. @@ -31,6 +32,9 @@ type instanceManager struct { instances map[string]*instance.Process ports map[int]bool instancesConfig config.InstancesConfig + + // Timeout checker + timeoutChecker *time.Ticker } // NewInstanceManager creates a new instance of InstanceManager. @@ -39,12 +43,21 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager instances: make(map[string]*instance.Process), ports: make(map[int]bool), instancesConfig: instancesConfig, + + timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), } // Load existing instances from disk if err := im.loadInstances(); err != nil { log.Printf("Error loading instances: %v", err) } + + go func() { + for range im.timeoutChecker.C { + im.checkAllTimeouts() + } + }() + return im } @@ -94,6 +107,12 @@ func (im *instanceManager) Shutdown() { im.mu.Lock() defer im.mu.Unlock() + // Stop the timeout checker + if im.timeoutChecker != nil { + im.timeoutChecker.Stop() + im.timeoutChecker = nil + } + var wg sync.WaitGroup wg.Add(len(im.instances)) diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go new file mode 100644 index 0000000..090311b --- /dev/null +++ b/pkg/manager/timeout.go @@ -0,0 +1,19 @@ +package manager + +import "log" + +func (im *instanceManager) checkAllTimeouts() { + im.mu.RLock() + defer im.mu.RUnlock() + + for _, inst := range im.instances { + if inst.ShouldTimeout() { + log.Printf("Instance %s has timed out, stopping it", inst.Name) + if proc, err := im.StopInstance(inst.Name); err != nil { + log.Printf("Error stopping instance %s: %v", inst.Name, err) + } else { + log.Printf("Instance %s stopped successfully, process: %v", inst.Name, proc) + } + } + } +} From c45fa13206245945602824acf2801348ab0598b2 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 21:15:28 +0200 Subject: [PATCH 06/15] Initialize last request time on instance start and update timeout handling logic --- pkg/instance/lifecycle.go | 3 +++ pkg/instance/timeout.go | 7 +++++-- pkg/manager/timeout.go | 21 ++++++++++++++------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 1442b6e..abd6cc2 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -30,6 +30,9 @@ func (i *Process) Start() error { i.restarts = 0 } + // Initialize last request time to current time when starting + i.lastRequestTime.Store(time.Now().Unix()) + // Create log files if err := i.logger.Create(); err != nil { return fmt.Errorf("failed to create log files: %w", err) diff --git a/pkg/instance/timeout.go b/pkg/instance/timeout.go index 4b7a8c8..e17da4d 100644 --- a/pkg/instance/timeout.go +++ b/pkg/instance/timeout.go @@ -21,7 +21,10 @@ func (i *Process) ShouldTimeout() bool { // Check if the last request time exceeds the idle timeout lastRequest := i.lastRequestTime.Load() - idleTimeout := *i.options.IdleTimeout + idleTimeoutMinutes := *i.options.IdleTimeout - return (time.Now().Unix() - lastRequest) > int64(idleTimeout) + // Convert timeout from minutes to seconds for comparison + idleTimeoutSeconds := int64(idleTimeoutMinutes * 60) + + return (time.Now().Unix() - lastRequest) > idleTimeoutSeconds } diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go index 090311b..6b67fa2 100644 --- a/pkg/manager/timeout.go +++ b/pkg/manager/timeout.go @@ -4,16 +4,23 @@ import "log" func (im *instanceManager) checkAllTimeouts() { im.mu.RLock() - defer im.mu.RUnlock() + var timeoutInstances []string + // Identify instances that should timeout for _, inst := range im.instances { if inst.ShouldTimeout() { - log.Printf("Instance %s has timed out, stopping it", inst.Name) - if proc, err := im.StopInstance(inst.Name); err != nil { - log.Printf("Error stopping instance %s: %v", inst.Name, err) - } else { - log.Printf("Instance %s stopped successfully, process: %v", inst.Name, proc) - } + timeoutInstances = append(timeoutInstances, inst.Name) + } + } + im.mu.RUnlock() // Release read lock before calling StopInstance + + // Stop the timed-out instances + for _, name := range timeoutInstances { + log.Printf("Instance %s has timed out, stopping it", name) + if proc, err := im.StopInstance(name); err != nil { + log.Printf("Error stopping instance %s: %v", name, err) + } else { + log.Printf("Instance %s stopped successfully, process: %v", name, proc) } } } From 41eaebc927f7f4da04e068b773f5e7378483cacf Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 21:42:52 +0200 Subject: [PATCH 07/15] Add TimeoutCheckInterval to instance configuration in tests --- pkg/manager/manager_test.go | 88 +++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index c2f953c..00b9f48 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -15,13 +15,14 @@ import ( func TestNewInstanceManager(t *testing.T) { cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - LogsDir: "/tmp/test", - MaxInstances: 5, - LlamaExecutable: "llama-server", - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, + PortRange: [2]int{8000, 9000}, + LogsDir: "/tmp/test", + MaxInstances: 5, + LlamaExecutable: "llama-server", + DefaultAutoRestart: true, + DefaultMaxRestarts: 3, + DefaultRestartDelay: 5, + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -99,8 +100,9 @@ func TestCreateInstance_DuplicateName(t *testing.T) { func TestCreateInstance_MaxInstancesLimit(t *testing.T) { // Create manager with low max instances limit cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - MaxInstances: 2, // Very low limit for testing + PortRange: [2]int{8000, 9000}, + MaxInstances: 2, // Very low limit for testing + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -235,8 +237,9 @@ func TestCreateInstance_MultiplePortAssignment(t *testing.T) { func TestCreateInstance_PortExhaustion(t *testing.T) { // Create manager with very small port range cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 8001}, // Only 2 ports available - MaxInstances: 10, // Higher than available ports + PortRange: [2]int{8000, 8001}, // Only 2 ports available + MaxInstances: 10, // Higher than available ports + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -497,9 +500,10 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { tempDir := t.TempDir() cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, + PortRange: [2]int{8000, 9000}, + InstancesDir: tempDir, + MaxInstances: 10, + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -542,9 +546,10 @@ func TestPersistence_InstancePersistedOnUpdate(t *testing.T) { tempDir := t.TempDir() cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, + PortRange: [2]int{8000, 9000}, + InstancesDir: tempDir, + MaxInstances: 10, + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -599,9 +604,10 @@ func TestPersistence_InstanceFileDeletedOnDeletion(t *testing.T) { tempDir := t.TempDir() cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, + PortRange: [2]int{8000, 9000}, + InstancesDir: tempDir, + MaxInstances: 10, + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -670,9 +676,10 @@ func TestPersistence_InstancesLoadedFromDisk(t *testing.T) { // Create manager - should load instances from disk cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, + PortRange: [2]int{8000, 9000}, + InstancesDir: tempDir, + MaxInstances: 10, + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -737,9 +744,10 @@ func TestPersistence_PortMapPopulatedFromLoadedInstances(t *testing.T) { // Create manager - should load instance and register port cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, + PortRange: [2]int{8000, 9000}, + InstancesDir: tempDir, + MaxInstances: 10, + TimeoutCheckInterval: 5, } manager := manager.NewInstanceManager(cfg) @@ -764,12 +772,13 @@ func TestPersistence_CompleteInstanceDataRoundTrip(t *testing.T) { tempDir := t.TempDir() cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, + PortRange: [2]int{8000, 9000}, + InstancesDir: tempDir, + MaxInstances: 10, + DefaultAutoRestart: true, + DefaultMaxRestarts: 3, + DefaultRestartDelay: 5, + TimeoutCheckInterval: 5, } // Create first manager and instance with comprehensive options @@ -880,13 +889,14 @@ func TestPersistence_CompleteInstanceDataRoundTrip(t *testing.T) { // Helper function to create a test manager with standard config func createTestManager() manager.InstanceManager { cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - LogsDir: "/tmp/test", - MaxInstances: 10, - LlamaExecutable: "llama-server", - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, + PortRange: [2]int{8000, 9000}, + LogsDir: "/tmp/test", + MaxInstances: 10, + LlamaExecutable: "llama-server", + DefaultAutoRestart: true, + DefaultMaxRestarts: 3, + DefaultRestartDelay: 5, + TimeoutCheckInterval: 5, } return manager.NewInstanceManager(cfg) } From d70bb634cdc46fd4fc03b2718ddda5ec11e7fafa Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 21:50:16 +0200 Subject: [PATCH 08/15] Implement instance tests for timeout --- pkg/instance/instance.go | 24 ++- pkg/instance/lifecycle.go | 2 +- pkg/instance/timeout.go | 6 +- pkg/instance/timeout_test.go | 311 +++++++++++++++++++++++++++++++++++ 4 files changed, 334 insertions(+), 9 deletions(-) create mode 100644 pkg/instance/timeout_test.go diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index b167fec..5ff4bf5 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -17,6 +17,18 @@ import ( "time" ) +// TimeProvider interface allows for testing with mock time +type TimeProvider interface { + Now() time.Time +} + +// realTimeProvider implements TimeProvider using the actual time +type realTimeProvider struct{} + +func (realTimeProvider) Now() time.Time { + return time.Now() +} + type CreateInstanceOptions struct { // Auto restart AutoRestart *bool `json:"auto_restart,omitempty"` @@ -90,6 +102,7 @@ type Process struct { // Timeout management lastRequestTime atomic.Int64 // Unix timestamp of last request + timeProvider TimeProvider `json:"-"` // Time provider for testing } // validateAndCopyOptions validates and creates a deep copy of the provided options @@ -179,10 +192,8 @@ func NewInstance(name string, globalSettings *config.InstancesConfig, options *C options: optionsCopy, globalSettings: globalSettings, logger: logger, - - Running: false, - - Created: time.Now().Unix(), + timeProvider: realTimeProvider{}, + Created: time.Now().Unix(), } } @@ -210,6 +221,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) { i.proxy = nil } +// SetTimeProvider sets a custom time provider for testing +func (i *Process) SetTimeProvider(tp TimeProvider) { + i.timeProvider = tp +} + // GetProxy returns the reverse proxy for this instance, creating it if needed func (i *Process) GetProxy() (*httputil.ReverseProxy, error) { i.mu.Lock() diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index abd6cc2..3db3ca7 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -31,7 +31,7 @@ func (i *Process) Start() error { } // Initialize last request time to current time when starting - i.lastRequestTime.Store(time.Now().Unix()) + i.lastRequestTime.Store(i.timeProvider.Now().Unix()) // Create log files if err := i.logger.Create(); err != nil { diff --git a/pkg/instance/timeout.go b/pkg/instance/timeout.go index e17da4d..19344e6 100644 --- a/pkg/instance/timeout.go +++ b/pkg/instance/timeout.go @@ -1,13 +1,11 @@ package instance -import "time" - // UpdateLastRequestTime updates the last request access time for the instance via proxy func (i *Process) UpdateLastRequestTime() { i.mu.Lock() defer i.mu.Unlock() - lastRequestTime := time.Now().Unix() + lastRequestTime := i.timeProvider.Now().Unix() i.lastRequestTime.Store(lastRequestTime) } @@ -26,5 +24,5 @@ func (i *Process) ShouldTimeout() bool { // Convert timeout from minutes to seconds for comparison idleTimeoutSeconds := int64(idleTimeoutMinutes * 60) - return (time.Now().Unix() - lastRequest) > idleTimeoutSeconds + return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds } diff --git a/pkg/instance/timeout_test.go b/pkg/instance/timeout_test.go new file mode 100644 index 0000000..1a9579f --- /dev/null +++ b/pkg/instance/timeout_test.go @@ -0,0 +1,311 @@ +package instance_test + +import ( + "llamactl/pkg/backends/llamacpp" + "llamactl/pkg/config" + "llamactl/pkg/instance" + "llamactl/pkg/testutil" + "sync/atomic" + "testing" + "time" +) + +// MockTimeProvider implements TimeProvider for testing +type MockTimeProvider struct { + currentTime atomic.Int64 // Unix timestamp +} + +func NewMockTimeProvider(t time.Time) *MockTimeProvider { + m := &MockTimeProvider{} + m.currentTime.Store(t.Unix()) + return m +} + +func (m *MockTimeProvider) Now() time.Time { + return time.Unix(m.currentTime.Load(), 0) +} + +func (m *MockTimeProvider) SetTime(t time.Time) { + m.currentTime.Store(t.Unix()) +} + +// Timeout-related tests + +func TestUpdateLastRequestTime(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + + // Test that UpdateLastRequestTime doesn't panic + inst.UpdateLastRequestTime() + + // Test concurrent calls to ensure thread safety + done := make(chan bool, 10) + for i := 0; i < 10; i++ { + go func() { + inst.UpdateLastRequestTime() + done <- true + }() + } + + // Wait for all goroutines to complete + for i := 0; i < 10; i++ { + <-done + } +} + +func TestShouldTimeout_NotRunning(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + idleTimeout := 1 // 1 minute + options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + + // Instance is not running, should not timeout regardless of configuration + if inst.ShouldTimeout() { + t.Error("Non-running instance should never timeout") + } +} + +func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + tests := []struct { + name string + idleTimeout *int + }{ + {"nil timeout", nil}, + {"zero timeout", testutil.IntPtr(0)}, + {"negative timeout", testutil.IntPtr(-5)}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + options := &instance.CreateInstanceOptions{ + IdleTimeout: tt.idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + // Simulate running state + inst.Running = true + + if inst.ShouldTimeout() { + t.Errorf("Instance with %s should not timeout", tt.name) + } + }) + } +} + +func TestShouldTimeout_WithinTimeLimit(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + idleTimeout := 5 // 5 minutes + options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + inst.Running = true + + // Update last request time to now + inst.UpdateLastRequestTime() + + // Should not timeout immediately + if inst.ShouldTimeout() { + t.Error("Instance should not timeout when last request was recent") + } +} + +func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + idleTimeout := 1 // 1 minute + options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + inst.Running = true + + // Use MockTimeProvider to simulate old last request time + mockTime := NewMockTimeProvider(time.Now()) + inst.SetTimeProvider(mockTime) + + // Set last request time to now + inst.UpdateLastRequestTime() + + // Advance time by 2 minutes (exceeds 1 minute timeout) + mockTime.SetTime(time.Now().Add(2 * time.Minute)) + + if !inst.ShouldTimeout() { + t.Error("Instance should timeout when last request exceeds idle timeout") + } +} + +func TestShouldTimeout_TimeUnitConversion(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + tests := []struct { + name string + idleTimeout int // in minutes + lastRequestAge int // seconds ago + shouldTimeout bool + }{ + {"exactly at timeout boundary", 2, 120, false}, // 2 minutes = 120 seconds + {"just over timeout", 2, 121, true}, // 121 seconds > 120 seconds + {"well under timeout", 5, 60, false}, // 1 minute < 5 minutes + {"well over timeout", 1, 300, true}, // 5 minutes > 1 minute + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + options := &instance.CreateInstanceOptions{ + IdleTimeout: &tt.idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + inst.Running = true + + // Use MockTimeProvider to control time + mockTime := NewMockTimeProvider(time.Now()) + inst.SetTimeProvider(mockTime) + + // Set last request time to now + inst.UpdateLastRequestTime() + + // Advance time by the specified amount + mockTime.SetTime(time.Now().Add(time.Duration(tt.lastRequestAge) * time.Second)) + + result := inst.ShouldTimeout() + if result != tt.shouldTimeout { + t.Errorf("Expected timeout=%v for %s, got %v", tt.shouldTimeout, tt.name, result) + } + }) + } +} + +func TestInstanceTimeoutInitialization(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + idleTimeout := 5 + options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + inst.Running = true + + // Use MockTimeProvider to control time and verify initialization + mockTime := NewMockTimeProvider(time.Now()) + inst.SetTimeProvider(mockTime) + + // Update last request time (simulates what Start() does) + inst.UpdateLastRequestTime() + + // Should not timeout immediately after initialization with a 5-minute timeout + if inst.ShouldTimeout() { + t.Error("Fresh instance should not timeout immediately") + } + + // Now advance time by 6 minutes and verify it would timeout + mockTime.SetTime(time.Now().Add(6 * time.Minute)) + if !inst.ShouldTimeout() { + t.Error("Instance should timeout after exceeding idle timeout period") + } +} + +func TestTimeoutConfiguration_DefaultValue(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + // Create instance without specifying idle timeout + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + opts := inst.GetOptions() + + // Default should be 0 (disabled) + if opts.IdleTimeout == nil || *opts.IdleTimeout != 0 { + t.Errorf("Expected default IdleTimeout to be 0, got %v", opts.IdleTimeout) + } +} + +func TestTimeoutConfiguration_Validation(t *testing.T) { + globalSettings := &config.InstancesConfig{ + LogsDir: "/tmp/test", + } + + tests := []struct { + name string + inputTimeout *int + expectedTimeout int + }{ + {"positive value", testutil.IntPtr(10), 10}, + {"zero value", testutil.IntPtr(0), 0}, + {"negative value gets corrected", testutil.IntPtr(-5), 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + options := &instance.CreateInstanceOptions{ + IdleTimeout: tt.inputTimeout, + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + + inst := instance.NewInstance("test-instance", globalSettings, options) + opts := inst.GetOptions() + + if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout { + t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout) + } + }) + } +} From 78eda77e443a07c0ae622682f0133519d6ad6fcb Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 17 Aug 2025 22:49:28 +0200 Subject: [PATCH 09/15] Enhance timeout handling in InstanceManager with goroutine recovery and shutdown support --- pkg/manager/manager.go | 32 +- pkg/manager/manager_test.go | 832 +++++++++--------------------------- 2 files changed, 235 insertions(+), 629 deletions(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 12d0e77..cf65227 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -35,16 +35,21 @@ type instanceManager struct { // Timeout checker timeoutChecker *time.Ticker + shutdownChan chan struct{} } // NewInstanceManager creates a new instance of InstanceManager. func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager { + if instancesConfig.TimeoutCheckInterval <= 0 { + instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set + } im := &instanceManager{ instances: make(map[string]*instance.Process), ports: make(map[int]bool), instancesConfig: instancesConfig, timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), + shutdownChan: make(chan struct{}), } // Load existing instances from disk @@ -52,9 +57,24 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager log.Printf("Error loading instances: %v", err) } + // Start the timeout checker goroutine after initialization is complete go func() { - for range im.timeoutChecker.C { - im.checkAllTimeouts() + defer func() { + if r := recover(); r != nil { + log.Printf("Timeout checker goroutine recovered from panic: %v", r) + } + }() + + for { + select { + case <-im.timeoutChecker.C: + // Check if manager is still valid before proceeding + if im != nil { + im.checkAllTimeouts() + } + case <-im.shutdownChan: + return // Exit goroutine on shutdown + } } }() @@ -113,6 +133,14 @@ func (im *instanceManager) Shutdown() { im.timeoutChecker = nil } + // Signal the shutdown channel to exit the goroutine + select { + case <-im.shutdownChan: + // Channel already closed + default: + close(im.shutdownChan) + } + var wg sync.WaitGroup wg.Add(len(im.instances)) diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 00b9f48..e14286f 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1,16 +1,17 @@ package manager_test import ( - "encoding/json" + "fmt" "llamactl/pkg/backends/llamacpp" "llamactl/pkg/config" "llamactl/pkg/instance" "llamactl/pkg/manager" "os" "path/filepath" - "reflect" "strings" + "sync" "testing" + "time" ) func TestNewInstanceManager(t *testing.T) { @@ -66,397 +67,139 @@ func TestCreateInstance_Success(t *testing.T) { } } -func TestCreateInstance_DuplicateName(t *testing.T) { - manager := createTestManager() - - options1 := &instance.CreateInstanceOptions{ +func TestCreateInstance_ValidationAndLimits(t *testing.T) { + // Test duplicate names + mngr := createTestManager() + options := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", }, } - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create first instance - _, err := manager.CreateInstance("test-instance", options1) + _, err := mngr.CreateInstance("test-instance", options) if err != nil { t.Fatalf("First CreateInstance failed: %v", err) } // Try to create duplicate - _, err = manager.CreateInstance("test-instance", options2) + _, err = mngr.CreateInstance("test-instance", options) if err == nil { t.Error("Expected error for duplicate instance name") } if !strings.Contains(err.Error(), "already exists") { t.Errorf("Expected duplicate name error, got: %v", err) } -} -func TestCreateInstance_MaxInstancesLimit(t *testing.T) { - // Create manager with low max instances limit + // Test max instances limit cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, - MaxInstances: 2, // Very low limit for testing + MaxInstances: 1, // Very low limit for testing TimeoutCheckInterval: 5, } - manager := manager.NewInstanceManager(cfg) + limitedManager := manager.NewInstanceManager(cfg) - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options3 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create instances up to the limit - _, err := manager.CreateInstance("instance1", options1) + _, err = limitedManager.CreateInstance("instance1", options) if err != nil { t.Fatalf("CreateInstance 1 failed: %v", err) } - _, err = manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - // This should fail due to max instances limit - _, err = manager.CreateInstance("instance3", options3) + _, err = limitedManager.CreateInstance("instance2", options) if err == nil { t.Error("Expected error when exceeding max instances limit") } - if !strings.Contains(err.Error(), "maximum number of instances") && !strings.Contains(err.Error(), "limit") { + if !strings.Contains(err.Error(), "maximum number of instances") { t.Errorf("Expected max instances error, got: %v", err) } } -func TestCreateInstance_PortAssignment(t *testing.T) { +func TestPortManagement(t *testing.T) { manager := createTestManager() - // Create instance without specifying port - options := &instance.CreateInstanceOptions{ + // Test auto port assignment + options1 := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", }, } - inst, err := manager.CreateInstance("test-instance", options) + inst1, err := manager.CreateInstance("instance1", options1) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Should auto-assign a port in the range - port := inst.GetOptions().Port - if port < 8000 || port > 9000 { - t.Errorf("Expected port in range 8000-9000, got %d", port) - } -} - -func TestCreateInstance_PortConflictDetection(t *testing.T) { - manager := createTestManager() - - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, // Explicit port - }, + port1 := inst1.GetOptions().Port + if port1 < 8000 || port1 > 9000 { + t.Errorf("Expected port in range 8000-9000, got %d", port1) } + // Test port conflict detection options2 := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model2.gguf", - Port: 8080, // Same port - should conflict + Port: port1, // Same port - should conflict }, } - // Create first instance - _, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - // Try to create second instance with same port _, err = manager.CreateInstance("instance2", options2) if err == nil { t.Error("Expected error for port conflict") } - if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "conflict") && !strings.Contains(err.Error(), "in use") { + if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") { t.Errorf("Expected port conflict error, got: %v", err) } -} - -func TestCreateInstance_MultiplePortAssignment(t *testing.T) { - manager := createTestManager() - - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - // Create multiple instances and verify they get different ports - instance1, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - instance2, err := manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - - port1 := instance1.GetOptions().Port - port2 := instance2.GetOptions().Port - - if port1 == port2 { - t.Errorf("Expected different ports, both got %d", port1) - } -} - -func TestCreateInstance_PortExhaustion(t *testing.T) { - // Create manager with very small port range - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 8001}, // Only 2 ports available - MaxInstances: 10, // Higher than available ports - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } + // Test port release on deletion + specificPort := 8080 options3 := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", + Port: specificPort, }, } - // Create instances to exhaust all ports - _, err := manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - _, err = manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - - // This should fail due to port exhaustion - _, err = manager.CreateInstance("instance3", options3) - if err == nil { - t.Error("Expected error when ports are exhausted") - } - if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "available") { - t.Errorf("Expected port exhaustion error, got: %v", err) - } -} - -func TestDeleteInstance_PortRelease(t *testing.T) { - manager := createTestManager() - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - - // Create instance with specific port - _, err := manager.CreateInstance("test-instance", options) + _, err = manager.CreateInstance("port-test", options3) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Delete the instance - err = manager.DeleteInstance("test-instance") + err = manager.DeleteInstance("port-test") if err != nil { t.Fatalf("DeleteInstance failed: %v", err) } // Should be able to create new instance with same port - _, err = manager.CreateInstance("new-instance", options) + _, err = manager.CreateInstance("new-port-test", options3) if err != nil { t.Errorf("Expected to reuse port after deletion, got error: %v", err) } } -func TestGetInstance_Success(t *testing.T) { +func TestInstanceOperations(t *testing.T) { manager := createTestManager() - // Create an instance first options := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", }, } + + // Create instance created, err := manager.CreateInstance("test-instance", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Retrieve it + // Get instance retrieved, err := manager.GetInstance("test-instance") if err != nil { t.Fatalf("GetInstance failed: %v", err) } - if retrieved.Name != created.Name { t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name) } -} -func TestGetInstance_NotFound(t *testing.T) { - manager := createTestManager() - - _, err := manager.GetInstance("nonexistent") - if err == nil { - t.Error("Expected error for nonexistent instance") - } - if !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } -} - -func TestListInstances(t *testing.T) { - manager := createTestManager() - - // Initially empty - instances, err := manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) - } - if len(instances) != 0 { - t.Errorf("Expected 0 instances, got %d", len(instances)) - } - - // Create some instances - options1 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - options2 := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - _, err = manager.CreateInstance("instance1", options1) - if err != nil { - t.Fatalf("CreateInstance 1 failed: %v", err) - } - - _, err = manager.CreateInstance("instance2", options2) - if err != nil { - t.Fatalf("CreateInstance 2 failed: %v", err) - } - - // List should return both - instances, err = manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) - } - if len(instances) != 2 { - t.Errorf("Expected 2 instances, got %d", len(instances)) - } - - // Check names are present - names := make(map[string]bool) - for _, inst := range instances { - names[inst.Name] = true - } - if !names["instance1"] || !names["instance2"] { - t.Error("Expected both instance1 and instance2 in list") - } -} - -func TestDeleteInstance_Success(t *testing.T) { - manager := createTestManager() - - // Create an instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Delete it - err = manager.DeleteInstance("test-instance") - if err != nil { - t.Fatalf("DeleteInstance failed: %v", err) - } - - // Should no longer exist - _, err = manager.GetInstance("test-instance") - if err == nil { - t.Error("Instance should not exist after deletion") - } -} - -func TestDeleteInstance_NotFound(t *testing.T) { - manager := createTestManager() - - err := manager.DeleteInstance("nonexistent") - if err == nil { - t.Error("Expected error for deleting nonexistent instance") - } - if !strings.Contains(err.Error(), "not found") { - t.Errorf("Expected 'not found' error, got: %v", err) - } -} - -func TestUpdateInstance_Success(t *testing.T) { - manager := createTestManager() - - // Create an instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Update it + // Update instance newOptions := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/new-model.gguf", @@ -468,35 +211,48 @@ func TestUpdateInstance_Success(t *testing.T) { if err != nil { t.Fatalf("UpdateInstance failed: %v", err) } - if updated.GetOptions().Model != "/path/to/new-model.gguf" { t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model) } - if updated.GetOptions().Port != 8081 { - t.Errorf("Expected port 8081, got %d", updated.GetOptions().Port) + + // List instances + instances, err := manager.ListInstances() + if err != nil { + t.Fatalf("ListInstances failed: %v", err) } -} - -func TestUpdateInstance_NotFound(t *testing.T) { - manager := createTestManager() - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, + if len(instances) != 1 { + t.Errorf("Expected 1 instance, got %d", len(instances)) } - _, err := manager.UpdateInstance("nonexistent", options) + // Delete instance + err = manager.DeleteInstance("test-instance") + if err != nil { + t.Fatalf("DeleteInstance failed: %v", err) + } + + _, err = manager.GetInstance("test-instance") if err == nil { - t.Error("Expected error for updating nonexistent instance") + t.Error("Instance should not exist after deletion") } - if !strings.Contains(err.Error(), "not found") { + + // Test operations on non-existent instances + _, err = manager.GetInstance("nonexistent") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } + + err = manager.DeleteInstance("nonexistent") + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Errorf("Expected 'not found' error, got: %v", err) + } + + _, err = manager.UpdateInstance("nonexistent", options) + if err == nil || !strings.Contains(err.Error(), "not found") { t.Errorf("Expected 'not found' error, got: %v", err) } } -func TestPersistence_InstancePersistedOnCreation(t *testing.T) { - // Create temporary directory for persistence +func TestPersistence(t *testing.T) { tempDir := t.TempDir() cfg := config.InstancesConfig{ @@ -505,8 +261,9 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { MaxInstances: 10, TimeoutCheckInterval: 5, } - manager := manager.NewInstanceManager(cfg) + // Test instance persistence on creation + manager1 := manager.NewInstanceManager(cfg) options := &instance.CreateInstanceOptions{ LlamaServerOptions: llamacpp.LlamaServerOptions{ Model: "/path/to/model.gguf", @@ -514,8 +271,7 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { }, } - // Create instance - _, err := manager.CreateInstance("test-instance", options) + _, err := manager1.CreateInstance("test-instance", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } @@ -526,366 +282,166 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) { t.Errorf("Expected persistence file %s to exist", expectedPath) } - // Verify file contains correct data - data, err := os.ReadFile(expectedPath) + // Test loading instances from disk + manager2 := manager.NewInstanceManager(cfg) + instances, err := manager2.ListInstances() if err != nil { - t.Fatalf("Failed to read persistence file: %v", err) + t.Fatalf("ListInstances failed: %v", err) + } + if len(instances) != 1 { + t.Fatalf("Expected 1 loaded instance, got %d", len(instances)) + } + if instances[0].Name != "test-instance" { + t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name) } - var persistedInstance map[string]interface{} - if err := json.Unmarshal(data, &persistedInstance); err != nil { - t.Fatalf("Failed to unmarshal persisted data: %v", err) + // Test port map populated from loaded instances (port conflict should be detected) + _, err = manager2.CreateInstance("new-instance", options) // Same port + if err == nil || !strings.Contains(err.Error(), "port") { + t.Errorf("Expected port conflict error, got: %v", err) } - if persistedInstance["name"] != "test-instance" { - t.Errorf("Expected name 'test-instance', got %v", persistedInstance["name"]) - } -} - -func TestPersistence_InstancePersistedOnUpdate(t *testing.T) { - tempDir := t.TempDir() - - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - // Create instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - // Update instance - newOptions := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/new-model.gguf", - Port: 8081, - }, - } - _, err = manager.UpdateInstance("test-instance", newOptions) - if err != nil { - t.Fatalf("UpdateInstance failed: %v", err) - } - - // Verify persistence file was updated - expectedPath := filepath.Join(tempDir, "test-instance.json") - data, err := os.ReadFile(expectedPath) - if err != nil { - t.Fatalf("Failed to read persistence file: %v", err) - } - - var persistedInstance map[string]interface{} - if err := json.Unmarshal(data, &persistedInstance); err != nil { - t.Fatalf("Failed to unmarshal persisted data: %v", err) - } - - // Check that the options were updated - options_data, ok := persistedInstance["options"].(map[string]interface{}) - if !ok { - t.Fatal("Expected options to be present in persisted data") - } - - if options_data["model"] != "/path/to/new-model.gguf" { - t.Errorf("Expected updated model '/path/to/new-model.gguf', got %v", options_data["model"]) - } -} - -func TestPersistence_InstanceFileDeletedOnDeletion(t *testing.T) { - tempDir := t.TempDir() - - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - // Create instance - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - _, err := manager.CreateInstance("test-instance", options) - if err != nil { - t.Fatalf("CreateInstance failed: %v", err) - } - - expectedPath := filepath.Join(tempDir, "test-instance.json") - - // Verify file exists - if _, err := os.Stat(expectedPath); os.IsNotExist(err) { - t.Fatal("Expected persistence file to exist before deletion") - } - - // Delete instance - err = manager.DeleteInstance("test-instance") + // Test file deletion on instance deletion + err = manager2.DeleteInstance("test-instance") if err != nil { t.Fatalf("DeleteInstance failed: %v", err) } - // Verify file was deleted if _, err := os.Stat(expectedPath); !os.IsNotExist(err) { t.Error("Expected persistence file to be deleted") } } -func TestPersistence_InstancesLoadedFromDisk(t *testing.T) { - tempDir := t.TempDir() - - // Create JSON files manually (simulating previous run) - instance1JSON := `{ - "name": "instance1", - "running": false, - "options": { - "model": "/path/to/model1.gguf", - "port": 8080 - } - }` - - instance2JSON := `{ - "name": "instance2", - "running": false, - "options": { - "model": "/path/to/model2.gguf", - "port": 8081 - } - }` - - // Write JSON files - err := os.WriteFile(filepath.Join(tempDir, "instance1.json"), []byte(instance1JSON), 0644) - if err != nil { - t.Fatalf("Failed to write test JSON file: %v", err) - } - - err = os.WriteFile(filepath.Join(tempDir, "instance2.json"), []byte(instance2JSON), 0644) - if err != nil { - t.Fatalf("Failed to write test JSON file: %v", err) - } - - // Create manager - should load instances from disk +func TestTimeoutFunctionality(t *testing.T) { + // Test timeout checker initialization cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, + TimeoutCheckInterval: 10, + MaxInstances: 5, } + manager := manager.NewInstanceManager(cfg) - - // Verify instances were loaded - instances, err := manager.ListInstances() - if err != nil { - t.Fatalf("ListInstances failed: %v", err) + if manager == nil { + t.Fatal("Manager should be initialized with timeout checker") } + manager.Shutdown() // Clean up - if len(instances) != 2 { - t.Fatalf("Expected 2 loaded instances, got %d", len(instances)) - } + // Test timeout behavior with actual timeout logic + testManager := createTestManager() + defer testManager.Shutdown() - // Check instances by name - instancesByName := make(map[string]*instance.Process) - for _, inst := range instances { - instancesByName[inst.Name] = inst - } - - instance1, exists := instancesByName["instance1"] - if !exists { - t.Error("Expected instance1 to be loaded") - } else { - if instance1.GetOptions().Model != "/path/to/model1.gguf" { - t.Errorf("Expected model '/path/to/model1.gguf', got %q", instance1.GetOptions().Model) - } - if instance1.GetOptions().Port != 8080 { - t.Errorf("Expected port 8080, got %d", instance1.GetOptions().Port) - } - } - - instance2, exists := instancesByName["instance2"] - if !exists { - t.Error("Expected instance2 to be loaded") - } else { - if instance2.GetOptions().Model != "/path/to/model2.gguf" { - t.Errorf("Expected model '/path/to/model2.gguf', got %q", instance2.GetOptions().Model) - } - if instance2.GetOptions().Port != 8081 { - t.Errorf("Expected port 8081, got %d", instance2.GetOptions().Port) - } - } -} - -func TestPersistence_PortMapPopulatedFromLoadedInstances(t *testing.T) { - tempDir := t.TempDir() - - // Create JSON file with specific port - instanceJSON := `{ - "name": "test-instance", - "running": false, - "options": { - "model": "/path/to/model.gguf", - "port": 8080 - } - }` - - err := os.WriteFile(filepath.Join(tempDir, "test-instance.json"), []byte(instanceJSON), 0644) - if err != nil { - t.Fatalf("Failed to write test JSON file: %v", err) - } - - // Create manager - should load instance and register port - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - TimeoutCheckInterval: 5, - } - manager := manager.NewInstanceManager(cfg) - - // Try to create new instance with same port - should fail due to conflict + idleTimeout := 1 // 1 minute options := &instance.CreateInstanceOptions{ + IdleTimeout: &idleTimeout, LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model2.gguf", - Port: 8080, // Same port as loaded instance + Model: "/path/to/model.gguf", }, } - _, err = manager.CreateInstance("new-instance", options) - if err == nil { - t.Error("Expected error for port conflict with loaded instance") - } - if !strings.Contains(err.Error(), "port") || !strings.Contains(err.Error(), "in use") { - t.Errorf("Expected port conflict error, got: %v", err) - } -} - -func TestPersistence_CompleteInstanceDataRoundTrip(t *testing.T) { - tempDir := t.TempDir() - - cfg := config.InstancesConfig{ - PortRange: [2]int{8000, 9000}, - InstancesDir: tempDir, - MaxInstances: 10, - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, - TimeoutCheckInterval: 5, - } - - // Create first manager and instance with comprehensive options - manager1 := manager.NewInstanceManager(cfg) - - autoRestart := false - maxRestarts := 10 - restartDelay := 30 - - originalOptions := &instance.CreateInstanceOptions{ - AutoRestart: &autoRestart, - MaxRestarts: &maxRestarts, - RestartDelay: &restartDelay, - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - Port: 8080, - Host: "localhost", - CtxSize: 4096, - GPULayers: 32, - Temperature: 0.7, - TopK: 40, - TopP: 0.9, - Verbose: true, - FlashAttn: false, - Lora: []string{"adapter1.bin", "adapter2.bin"}, - HFRepo: "microsoft/DialoGPT-medium", - }, - } - - originalInstance, err := manager1.CreateInstance("roundtrip-test", originalOptions) + inst, err := testManager.CreateInstance("timeout-test", options) if err != nil { t.Fatalf("CreateInstance failed: %v", err) } - // Create second manager (simulating restart) - should load the instance - manager2 := manager.NewInstanceManager(cfg) - - loadedInstance, err := manager2.GetInstance("roundtrip-test") + _, err = testManager.StartInstance("timeout-test") if err != nil { - t.Fatalf("GetInstance failed after reload: %v", err) + t.Fatalf("StartInstance failed: %v", err) } - // Compare all data - if loadedInstance.Name != originalInstance.Name { - t.Errorf("Name mismatch: original=%q, loaded=%q", originalInstance.Name, loadedInstance.Name) + // Create a mock time provider to simulate timeout + mockTime := NewMockTimeProvider(time.Now()) + inst.SetTimeProvider(mockTime) + inst.UpdateLastRequestTime() + + // Advance time to trigger timeout + mockTime.SetTime(time.Now().Add(2 * time.Minute)) + + // Verify the instance should timeout + if !inst.ShouldTimeout() { + t.Fatal("Instance should be configured to timeout") } - originalOpts := originalInstance.GetOptions() - loadedOpts := loadedInstance.GetOptions() - - // Compare restart options - if *loadedOpts.AutoRestart != *originalOpts.AutoRestart { - t.Errorf("AutoRestart mismatch: original=%v, loaded=%v", *originalOpts.AutoRestart, *loadedOpts.AutoRestart) - } - if *loadedOpts.MaxRestarts != *originalOpts.MaxRestarts { - t.Errorf("MaxRestarts mismatch: original=%v, loaded=%v", *originalOpts.MaxRestarts, *loadedOpts.MaxRestarts) - } - if *loadedOpts.RestartDelay != *originalOpts.RestartDelay { - t.Errorf("RestartDelay mismatch: original=%v, loaded=%v", *originalOpts.RestartDelay, *loadedOpts.RestartDelay) + // Test stopping timed out instance + _, err = testManager.StopInstance("timeout-test") + if err != nil { + t.Fatalf("StopInstance failed: %v", err) } - // Compare llama server options - if loadedOpts.Model != originalOpts.Model { - t.Errorf("Model mismatch: original=%q, loaded=%q", originalOpts.Model, loadedOpts.Model) + stoppedInst, err := testManager.GetInstance("timeout-test") + if err != nil { + t.Fatalf("GetInstance failed: %v", err) } - if loadedOpts.Port != originalOpts.Port { - t.Errorf("Port mismatch: original=%d, loaded=%d", originalOpts.Port, loadedOpts.Port) + if stoppedInst.Running { + t.Error("Instance should not be running after timeout stop") } - if loadedOpts.Host != originalOpts.Host { - t.Errorf("Host mismatch: original=%q, loaded=%q", originalOpts.Host, loadedOpts.Host) - } - if loadedOpts.CtxSize != originalOpts.CtxSize { - t.Errorf("CtxSize mismatch: original=%d, loaded=%d", originalOpts.CtxSize, loadedOpts.CtxSize) - } - if loadedOpts.GPULayers != originalOpts.GPULayers { - t.Errorf("GPULayers mismatch: original=%d, loaded=%d", originalOpts.GPULayers, loadedOpts.GPULayers) - } - if loadedOpts.Temperature != originalOpts.Temperature { - t.Errorf("Temperature mismatch: original=%f, loaded=%f", originalOpts.Temperature, loadedOpts.Temperature) - } - if loadedOpts.TopK != originalOpts.TopK { - t.Errorf("TopK mismatch: original=%d, loaded=%d", originalOpts.TopK, loadedOpts.TopK) - } - if loadedOpts.TopP != originalOpts.TopP { - t.Errorf("TopP mismatch: original=%f, loaded=%f", originalOpts.TopP, loadedOpts.TopP) - } - if loadedOpts.Verbose != originalOpts.Verbose { - t.Errorf("Verbose mismatch: original=%v, loaded=%v", originalOpts.Verbose, loadedOpts.Verbose) - } - if loadedOpts.FlashAttn != originalOpts.FlashAttn { - t.Errorf("FlashAttn mismatch: original=%v, loaded=%v", originalOpts.FlashAttn, loadedOpts.FlashAttn) - } - if loadedOpts.HFRepo != originalOpts.HFRepo { - t.Errorf("HFRepo mismatch: original=%q, loaded=%q", originalOpts.HFRepo, loadedOpts.HFRepo) +} + +func TestConcurrentAccess(t *testing.T) { + manager := createTestManager() + defer manager.Shutdown() + + // Test concurrent operations + var wg sync.WaitGroup + errChan := make(chan error, 10) + + // Concurrent instance creation + for i := 0; i < 5; i++ { + wg.Add(1) + go func(index int) { + defer wg.Done() + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + instanceName := fmt.Sprintf("concurrent-test-%d", index) + if _, err := manager.CreateInstance(instanceName, options); err != nil { + errChan <- err + } + }(i) } - // Compare slice fields - if !reflect.DeepEqual(loadedOpts.Lora, originalOpts.Lora) { - t.Errorf("Lora mismatch: original=%v, loaded=%v", originalOpts.Lora, loadedOpts.Lora) + // Concurrent list operations + for i := 0; i < 3; i++ { + wg.Add(1) + go func() { + defer wg.Done() + if _, err := manager.ListInstances(); err != nil { + errChan <- err + } + }() } - // Verify created timestamp is preserved - if loadedInstance.Created != originalInstance.Created { - t.Errorf("Created timestamp mismatch: original=%d, loaded=%d", originalInstance.Created, loadedInstance.Created) + wg.Wait() + close(errChan) + + // Check for any errors during concurrent access + for err := range errChan { + t.Errorf("Concurrent access error: %v", err) } } +func TestShutdown(t *testing.T) { + manager := createTestManager() + + // Create test instance + options := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + } + _, err := manager.CreateInstance("test-instance", options) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Shutdown should not panic + manager.Shutdown() + + // Multiple shutdowns should not panic + manager.Shutdown() +} + // Helper function to create a test manager with standard config func createTestManager() manager.InstanceManager { cfg := config.InstancesConfig{ @@ -900,3 +456,25 @@ func createTestManager() manager.InstanceManager { } return manager.NewInstanceManager(cfg) } + +// Helper for timeout tests +type MockTimeProvider struct { + currentTime time.Time + mu sync.RWMutex +} + +func NewMockTimeProvider(t time.Time) *MockTimeProvider { + return &MockTimeProvider{currentTime: t} +} + +func (m *MockTimeProvider) Now() time.Time { + m.mu.RLock() + defer m.mu.RUnlock() + return m.currentTime +} + +func (m *MockTimeProvider) SetTime(t time.Time) { + m.mu.Lock() + defer m.mu.Unlock() + m.currentTime = t +} From 1aaab96cec7edabd1196527dabb77a446373c29d Mon Sep 17 00:00:00 2001 From: LordMathis Date: Tue, 19 Aug 2025 19:24:54 +0200 Subject: [PATCH 10/15] Add idle timeout configuration to instance options and basic fields --- webui/src/lib/zodFormUtils.ts | 7 ++++++- webui/src/schemas/instanceOptions.ts | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts index e5400d3..5a964bb 100644 --- a/webui/src/lib/zodFormUtils.ts +++ b/webui/src/lib/zodFormUtils.ts @@ -1,7 +1,7 @@ import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions' // Only define the basic fields we want to show by default -export const basicFieldsConfig: Record Date: Tue, 19 Aug 2025 19:25:15 +0200 Subject: [PATCH 11/15] Refactor logging in checkAllTimeouts --- pkg/manager/timeout.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go index 6b67fa2..a28bc67 100644 --- a/pkg/manager/timeout.go +++ b/pkg/manager/timeout.go @@ -17,10 +17,10 @@ func (im *instanceManager) checkAllTimeouts() { // Stop the timed-out instances for _, name := range timeoutInstances { log.Printf("Instance %s has timed out, stopping it", name) - if proc, err := im.StopInstance(name); err != nil { + if _, err := im.StopInstance(name); err != nil { log.Printf("Error stopping instance %s: %v", name, err) } else { - log.Printf("Instance %s stopped successfully, process: %v", name, proc) + log.Printf("Instance %s stopped successfully", name) } } } From eb1d4ab55f64e697cede2a60d66ab0ec9ba0adeb Mon Sep 17 00:00:00 2001 From: LordMathis Date: Tue, 19 Aug 2025 20:52:59 +0200 Subject: [PATCH 12/15] Enhance timeout functionality tests to validate configuration and logic without starting instances --- pkg/manager/manager_test.go | 57 ++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index e14286f..6c48366 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -326,7 +326,7 @@ func TestTimeoutFunctionality(t *testing.T) { } manager.Shutdown() // Clean up - // Test timeout behavior with actual timeout logic + // Test timeout configuration and logic without starting the actual process testManager := createTestManager() defer testManager.Shutdown() @@ -343,37 +343,66 @@ func TestTimeoutFunctionality(t *testing.T) { t.Fatalf("CreateInstance failed: %v", err) } - _, err = testManager.StartInstance("timeout-test") - if err != nil { - t.Fatalf("StartInstance failed: %v", err) + // Test timeout configuration is properly set + if inst.GetOptions().IdleTimeout == nil { + t.Fatal("Instance should have idle timeout configured") + } + if *inst.GetOptions().IdleTimeout != 1 { + t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout) } + // Test timeout logic without actually starting the process // Create a mock time provider to simulate timeout mockTime := NewMockTimeProvider(time.Now()) inst.SetTimeProvider(mockTime) + + // Set instance to running state so timeout logic can work + inst.Running = true + + // Simulate instance being "running" for timeout check (without actual process) + // We'll test the ShouldTimeout logic directly inst.UpdateLastRequestTime() + // Initially should not timeout (just updated) + if inst.ShouldTimeout() { + t.Error("Instance should not timeout immediately after request") + } + // Advance time to trigger timeout mockTime.SetTime(time.Now().Add(2 * time.Minute)) - // Verify the instance should timeout + // Now it should timeout if !inst.ShouldTimeout() { - t.Fatal("Instance should be configured to timeout") + t.Error("Instance should timeout after idle period") } - // Test stopping timed out instance - _, err = testManager.StopInstance("timeout-test") - if err != nil { - t.Fatalf("StopInstance failed: %v", err) + // Reset running state to avoid shutdown issues + inst.Running = false + + // Test that instance without timeout doesn't timeout + noTimeoutOptions := &instance.CreateInstanceOptions{ + LlamaServerOptions: llamacpp.LlamaServerOptions{ + Model: "/path/to/model.gguf", + }, + // No IdleTimeout set } - stoppedInst, err := testManager.GetInstance("timeout-test") + noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions) if err != nil { - t.Fatalf("GetInstance failed: %v", err) + t.Fatalf("CreateInstance failed: %v", err) } - if stoppedInst.Running { - t.Error("Instance should not be running after timeout stop") + + noTimeoutInst.SetTimeProvider(mockTime) + noTimeoutInst.Running = true // Set to running for timeout check + noTimeoutInst.UpdateLastRequestTime() + + // Even with time advanced, should not timeout + if noTimeoutInst.ShouldTimeout() { + t.Error("Instance without timeout configuration should never timeout") } + + // Reset running state to avoid shutdown issues + noTimeoutInst.Running = false } func TestConcurrentAccess(t *testing.T) { From 00a3cba717dd748ee3e422645a40d13567b47ba8 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Tue, 19 Aug 2025 22:34:48 +0200 Subject: [PATCH 13/15] Enhance shutdown handling in InstanceManager with proper synchronization and max instances check --- pkg/manager/manager.go | 41 +++++++++++++++++++++------------------ pkg/manager/operations.go | 9 +++++---- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index cf65227..bac7567 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -36,6 +36,8 @@ type instanceManager struct { // Timeout checker timeoutChecker *time.Ticker shutdownChan chan struct{} + shutdownDone chan struct{} + isShutdown bool } // NewInstanceManager creates a new instance of InstanceManager. @@ -50,6 +52,7 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), shutdownChan: make(chan struct{}), + shutdownDone: make(chan struct{}), } // Load existing instances from disk @@ -59,19 +62,12 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager // Start the timeout checker goroutine after initialization is complete go func() { - defer func() { - if r := recover(); r != nil { - log.Printf("Timeout checker goroutine recovered from panic: %v", r) - } - }() + defer close(im.shutdownDone) for { select { case <-im.timeoutChecker.C: - // Check if manager is still valid before proceeding - if im != nil { - im.checkAllTimeouts() - } + im.checkAllTimeouts() case <-im.shutdownChan: return // Exit goroutine on shutdown } @@ -127,18 +123,25 @@ func (im *instanceManager) Shutdown() { im.mu.Lock() defer im.mu.Unlock() - // Stop the timeout checker + // Check if already shutdown + if im.isShutdown { + return + } + im.isShutdown = true + + // Signal the timeout checker to stop + close(im.shutdownChan) + + // Release lock temporarily to wait for goroutine + im.mu.Unlock() + // Wait for the timeout checker goroutine to actually stop + <-im.shutdownDone + // Reacquire lock + im.mu.Lock() + + // Now stop the ticker if im.timeoutChecker != nil { im.timeoutChecker.Stop() - im.timeoutChecker = nil - } - - // Signal the shutdown channel to exit the goroutine - select { - case <-im.shutdownChan: - // Channel already closed - default: - close(im.shutdownChan) } var wg sync.WaitGroup diff --git a/pkg/manager/operations.go b/pkg/manager/operations.go index baa4eb7..7849152 100644 --- a/pkg/manager/operations.go +++ b/pkg/manager/operations.go @@ -27,10 +27,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI return nil, fmt.Errorf("instance options cannot be nil") } - if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 { - return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances) - } - name, err := validation.ValidateInstanceName(name) if err != nil { return nil, err @@ -44,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI im.mu.Lock() defer im.mu.Unlock() + // Check max instances limit after acquiring the lock + if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 { + return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances) + } + // Check if instance with this name already exists if im.instances[name] != nil { return nil, fmt.Errorf("instance with name %s already exists", name) From 492c3ff2709f56142e22f12f072d6ac03e5fad05 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 13:25:56 +0200 Subject: [PATCH 14/15] Remove redundant timeout tests and improve test coverage for instance timeout validation --- pkg/instance/instance_test.go | 138 ++-------------------------------- pkg/instance/timeout_test.go | 118 +---------------------------- 2 files changed, 9 insertions(+), 247 deletions(-) diff --git a/pkg/instance/instance_test.go b/pkg/instance/instance_test.go index d89bad3..3ed6c08 100644 --- a/pkg/instance/instance_test.go +++ b/pkg/instance/instance_test.go @@ -91,38 +91,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) { } } -func TestNewInstance_ValidationAndDefaults(t *testing.T) { - globalSettings := &config.InstancesConfig{ - LogsDir: "/tmp/test", - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, - } - - // Test with invalid negative values - invalidMaxRestarts := -5 - invalidRestartDelay := -10 - - options := &instance.CreateInstanceOptions{ - MaxRestarts: &invalidMaxRestarts, - RestartDelay: &invalidRestartDelay, - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - instance := instance.NewInstance("test-instance", globalSettings, options) - opts := instance.GetOptions() - - // Check that negative values were corrected to 0 - if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 { - t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts) - } - if opts.RestartDelay == nil || *opts.RestartDelay != 0 { - t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay) - } -} - func TestSetOptions(t *testing.T) { globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", @@ -164,33 +132,6 @@ func TestSetOptions(t *testing.T) { } } -func TestSetOptions_NilOptions(t *testing.T) { - globalSettings := &config.InstancesConfig{ - LogsDir: "/tmp/test", - DefaultAutoRestart: true, - DefaultMaxRestarts: 3, - DefaultRestartDelay: 5, - } - - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - instance := instance.NewInstance("test-instance", globalSettings, options) - originalOptions := instance.GetOptions() - - // Try to set nil options - instance.SetOptions(nil) - - // Options should remain unchanged - currentOptions := instance.GetOptions() - if currentOptions.Model != originalOptions.Model { - t.Error("Options should not change when setting nil options") - } -} - func TestGetProxy(t *testing.T) { globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", @@ -317,58 +258,6 @@ func TestUnmarshalJSON(t *testing.T) { } } -func TestUnmarshalJSON_PartialOptions(t *testing.T) { - jsonData := `{ - "name": "test-instance", - "running": false, - "options": { - "model": "/path/to/model.gguf" - } - }` - - var inst instance.Process - err := json.Unmarshal([]byte(jsonData), &inst) - if err != nil { - t.Fatalf("JSON unmarshal failed: %v", err) - } - - opts := inst.GetOptions() - if opts.Model != "/path/to/model.gguf" { - t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model) - } - - // Note: Defaults are NOT applied during unmarshaling - // They should only be applied by NewInstance or SetOptions - if opts.AutoRestart != nil { - t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)") - } -} - -func TestUnmarshalJSON_NoOptions(t *testing.T) { - jsonData := `{ - "name": "test-instance", - "running": false - }` - - var inst instance.Process - err := json.Unmarshal([]byte(jsonData), &inst) - if err != nil { - t.Fatalf("JSON unmarshal failed: %v", err) - } - - if inst.Name != "test-instance" { - t.Errorf("Expected name 'test-instance', got %q", inst.Name) - } - if inst.Running { - t.Error("Expected running to be false") - } - - opts := inst.GetOptions() - if opts != nil { - t.Error("Expected options to be nil when not provided in JSON") - } -} - func TestCreateInstanceOptionsValidation(t *testing.T) { tests := []struct { name string @@ -377,13 +266,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) { expectedMax int expectedDelay int }{ - { - name: "nil values", - maxRestarts: nil, - restartDelay: nil, - expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure - expectedDelay: 0, - }, { name: "valid positive values", maxRestarts: testutil.IntPtr(10), @@ -424,20 +306,16 @@ func TestCreateInstanceOptionsValidation(t *testing.T) { instance := instance.NewInstance("test", globalSettings, options) opts := instance.GetOptions() - if tt.maxRestarts != nil { - if opts.MaxRestarts == nil { - t.Error("Expected MaxRestarts to be set") - } else if *opts.MaxRestarts != tt.expectedMax { - t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts) - } + if opts.MaxRestarts == nil { + t.Error("Expected MaxRestarts to be set") + } else if *opts.MaxRestarts != tt.expectedMax { + t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts) } - if tt.restartDelay != nil { - if opts.RestartDelay == nil { - t.Error("Expected RestartDelay to be set") - } else if *opts.RestartDelay != tt.expectedDelay { - t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay) - } + if opts.RestartDelay == nil { + t.Error("Expected RestartDelay to be set") + } else if *opts.RestartDelay != tt.expectedDelay { + t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay) } }) } diff --git a/pkg/instance/timeout_test.go b/pkg/instance/timeout_test.go index 1a9579f..5cd67f2 100644 --- a/pkg/instance/timeout_test.go +++ b/pkg/instance/timeout_test.go @@ -46,20 +46,6 @@ func TestUpdateLastRequestTime(t *testing.T) { // Test that UpdateLastRequestTime doesn't panic inst.UpdateLastRequestTime() - - // Test concurrent calls to ensure thread safety - done := make(chan bool, 10) - for i := 0; i < 10; i++ { - go func() { - inst.UpdateLastRequestTime() - done <- true - }() - } - - // Wait for all goroutines to complete - for i := 0; i < 10; i++ { - <-done - } } func TestShouldTimeout_NotRunning(t *testing.T) { @@ -173,109 +159,6 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { } } -func TestShouldTimeout_TimeUnitConversion(t *testing.T) { - globalSettings := &config.InstancesConfig{ - LogsDir: "/tmp/test", - } - - tests := []struct { - name string - idleTimeout int // in minutes - lastRequestAge int // seconds ago - shouldTimeout bool - }{ - {"exactly at timeout boundary", 2, 120, false}, // 2 minutes = 120 seconds - {"just over timeout", 2, 121, true}, // 121 seconds > 120 seconds - {"well under timeout", 5, 60, false}, // 1 minute < 5 minutes - {"well over timeout", 1, 300, true}, // 5 minutes > 1 minute - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - options := &instance.CreateInstanceOptions{ - IdleTimeout: &tt.idleTimeout, - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - inst := instance.NewInstance("test-instance", globalSettings, options) - inst.Running = true - - // Use MockTimeProvider to control time - mockTime := NewMockTimeProvider(time.Now()) - inst.SetTimeProvider(mockTime) - - // Set last request time to now - inst.UpdateLastRequestTime() - - // Advance time by the specified amount - mockTime.SetTime(time.Now().Add(time.Duration(tt.lastRequestAge) * time.Second)) - - result := inst.ShouldTimeout() - if result != tt.shouldTimeout { - t.Errorf("Expected timeout=%v for %s, got %v", tt.shouldTimeout, tt.name, result) - } - }) - } -} - -func TestInstanceTimeoutInitialization(t *testing.T) { - globalSettings := &config.InstancesConfig{ - LogsDir: "/tmp/test", - } - - idleTimeout := 5 - options := &instance.CreateInstanceOptions{ - IdleTimeout: &idleTimeout, - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - inst := instance.NewInstance("test-instance", globalSettings, options) - inst.Running = true - - // Use MockTimeProvider to control time and verify initialization - mockTime := NewMockTimeProvider(time.Now()) - inst.SetTimeProvider(mockTime) - - // Update last request time (simulates what Start() does) - inst.UpdateLastRequestTime() - - // Should not timeout immediately after initialization with a 5-minute timeout - if inst.ShouldTimeout() { - t.Error("Fresh instance should not timeout immediately") - } - - // Now advance time by 6 minutes and verify it would timeout - mockTime.SetTime(time.Now().Add(6 * time.Minute)) - if !inst.ShouldTimeout() { - t.Error("Instance should timeout after exceeding idle timeout period") - } -} - -func TestTimeoutConfiguration_DefaultValue(t *testing.T) { - globalSettings := &config.InstancesConfig{ - LogsDir: "/tmp/test", - } - - // Create instance without specifying idle timeout - options := &instance.CreateInstanceOptions{ - LlamaServerOptions: llamacpp.LlamaServerOptions{ - Model: "/path/to/model.gguf", - }, - } - - inst := instance.NewInstance("test-instance", globalSettings, options) - opts := inst.GetOptions() - - // Default should be 0 (disabled) - if opts.IdleTimeout == nil || *opts.IdleTimeout != 0 { - t.Errorf("Expected default IdleTimeout to be 0, got %v", opts.IdleTimeout) - } -} - func TestTimeoutConfiguration_Validation(t *testing.T) { globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", @@ -286,6 +169,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) { inputTimeout *int expectedTimeout int }{ + {"default value when nil", nil, 0}, {"positive value", testutil.IntPtr(10), 10}, {"zero value", testutil.IntPtr(0), 0}, {"negative value gets corrected", testutil.IntPtr(-5), 0}, From 7194e1fdd1cadc39b37f5fde5c1ac423a95e77d0 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 20 Aug 2025 13:32:03 +0200 Subject: [PATCH 15/15] Update README to clarify idle timeout management and state persistence features --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 697a384..65dfb42 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) 🔐 **API Key Authentication**: Separate keys for management vs inference access 📊 **Instance Monitoring**: Health checks, auto-restart, log management -⚡ **Persistent State**: Instances survive server restarts +⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period +💾 **State Persistence**: Ensure instances remain intact across server restarts ![Dashboard Screenshot](docs/images/screenshot.png)