mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
Merge pull request #22 from lordmathis/feat/timeout
feat: Implement idle instance timeout
This commit is contained in:
@@ -11,7 +11,8 @@
|
||||
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
||||
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
||||
⚡ **Persistent State**: Instances survive server restarts
|
||||
⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period
|
||||
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
||||
|
||||

|
||||
|
||||
@@ -182,6 +183,7 @@ instances:
|
||||
default_auto_restart: true # Default auto-restart setting
|
||||
default_max_restarts: 3 # Default maximum restart attempts
|
||||
default_restart_delay: 5 # Default restart delay in seconds
|
||||
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
||||
```
|
||||
|
||||
**Environment Variables:**
|
||||
@@ -195,6 +197,7 @@ instances:
|
||||
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
||||
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
||||
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
||||
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
||||
|
||||
#### Authentication Configuration
|
||||
|
||||
|
||||
@@ -66,6 +66,9 @@ type InstancesConfig struct {
|
||||
|
||||
// Default restart delay for new instances (in seconds)
|
||||
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
||||
|
||||
// Interval for checking instance timeouts (in minutes)
|
||||
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
|
||||
}
|
||||
|
||||
// AuthConfig contains authentication settings
|
||||
@@ -108,6 +111,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
||||
DefaultAutoRestart: true,
|
||||
DefaultMaxRestarts: 3,
|
||||
DefaultRestartDelay: 5,
|
||||
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
|
||||
},
|
||||
Auth: AuthConfig{
|
||||
RequireInferenceAuth: true,
|
||||
@@ -217,6 +221,11 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
cfg.Instances.DefaultRestartDelay = seconds
|
||||
}
|
||||
}
|
||||
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
|
||||
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
|
||||
cfg.Instances.TimeoutCheckInterval = minutes
|
||||
}
|
||||
}
|
||||
// Auth config
|
||||
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
|
||||
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
|
||||
|
||||
@@ -13,16 +13,30 @@ import (
|
||||
"net/url"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TimeProvider interface allows for testing with mock time
|
||||
type TimeProvider interface {
|
||||
Now() time.Time
|
||||
}
|
||||
|
||||
// realTimeProvider implements TimeProvider using the actual time
|
||||
type realTimeProvider struct{}
|
||||
|
||||
func (realTimeProvider) Now() time.Time {
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
type CreateInstanceOptions struct {
|
||||
// Auto restart
|
||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||
// RestartDelay duration in seconds
|
||||
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
|
||||
|
||||
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||
// Timeout
|
||||
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||
// LlamaServerOptions contains the options for the llama server
|
||||
llamacpp.LlamaServerOptions `json:",inline"`
|
||||
}
|
||||
|
||||
@@ -34,7 +48,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
||||
type tempCreateOptions struct {
|
||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
|
||||
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||
}
|
||||
|
||||
var temp tempCreateOptions
|
||||
@@ -46,6 +61,7 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
||||
c.AutoRestart = temp.AutoRestart
|
||||
c.MaxRestarts = temp.MaxRestarts
|
||||
c.RestartDelay = temp.RestartDelay
|
||||
c.IdleTimeout = temp.IdleTimeout
|
||||
|
||||
// Now unmarshal the embedded LlamaServerOptions
|
||||
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
|
||||
@@ -83,6 +99,10 @@ type Process struct {
|
||||
// Restart control
|
||||
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
|
||||
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
|
||||
|
||||
// Timeout management
|
||||
lastRequestTime atomic.Int64 // Unix timestamp of last request
|
||||
timeProvider TimeProvider `json:"-"` // Time provider for testing
|
||||
}
|
||||
|
||||
// validateAndCopyOptions validates and creates a deep copy of the provided options
|
||||
@@ -117,6 +137,15 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
|
||||
}
|
||||
optionsCopy.RestartDelay = &restartDelay
|
||||
}
|
||||
|
||||
if options.IdleTimeout != nil {
|
||||
idleTimeout := *options.IdleTimeout
|
||||
if idleTimeout < 0 {
|
||||
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
|
||||
idleTimeout = 0
|
||||
}
|
||||
optionsCopy.IdleTimeout = &idleTimeout
|
||||
}
|
||||
}
|
||||
|
||||
return optionsCopy
|
||||
@@ -142,6 +171,11 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
|
||||
defaultRestartDelay := globalSettings.DefaultRestartDelay
|
||||
options.RestartDelay = &defaultRestartDelay
|
||||
}
|
||||
|
||||
if options.IdleTimeout == nil {
|
||||
defaultIdleTimeout := 0
|
||||
options.IdleTimeout = &defaultIdleTimeout
|
||||
}
|
||||
}
|
||||
|
||||
// NewInstance creates a new instance with the given name, log path, and options
|
||||
@@ -158,9 +192,7 @@ func NewInstance(name string, globalSettings *config.InstancesConfig, options *C
|
||||
options: optionsCopy,
|
||||
globalSettings: globalSettings,
|
||||
logger: logger,
|
||||
|
||||
Running: false,
|
||||
|
||||
timeProvider: realTimeProvider{},
|
||||
Created: time.Now().Unix(),
|
||||
}
|
||||
}
|
||||
@@ -189,6 +221,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
|
||||
i.proxy = nil
|
||||
}
|
||||
|
||||
// SetTimeProvider sets a custom time provider for testing
|
||||
func (i *Process) SetTimeProvider(tp TimeProvider) {
|
||||
i.timeProvider = tp
|
||||
}
|
||||
|
||||
// GetProxy returns the reverse proxy for this instance, creating it if needed
|
||||
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||
i.mu.Lock()
|
||||
|
||||
@@ -91,38 +91,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
DefaultAutoRestart: true,
|
||||
DefaultMaxRestarts: 3,
|
||||
DefaultRestartDelay: 5,
|
||||
}
|
||||
|
||||
// Test with invalid negative values
|
||||
invalidMaxRestarts := -5
|
||||
invalidRestartDelay := -10
|
||||
|
||||
options := &instance.CreateInstanceOptions{
|
||||
MaxRestarts: &invalidMaxRestarts,
|
||||
RestartDelay: &invalidRestartDelay,
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
instance := instance.NewInstance("test-instance", globalSettings, options)
|
||||
opts := instance.GetOptions()
|
||||
|
||||
// Check that negative values were corrected to 0
|
||||
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
|
||||
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
|
||||
}
|
||||
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
|
||||
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetOptions(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
@@ -164,33 +132,6 @@ func TestSetOptions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetOptions_NilOptions(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
DefaultAutoRestart: true,
|
||||
DefaultMaxRestarts: 3,
|
||||
DefaultRestartDelay: 5,
|
||||
}
|
||||
|
||||
options := &instance.CreateInstanceOptions{
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
instance := instance.NewInstance("test-instance", globalSettings, options)
|
||||
originalOptions := instance.GetOptions()
|
||||
|
||||
// Try to set nil options
|
||||
instance.SetOptions(nil)
|
||||
|
||||
// Options should remain unchanged
|
||||
currentOptions := instance.GetOptions()
|
||||
if currentOptions.Model != originalOptions.Model {
|
||||
t.Error("Options should not change when setting nil options")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProxy(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
@@ -317,58 +258,6 @@ func TestUnmarshalJSON(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
|
||||
jsonData := `{
|
||||
"name": "test-instance",
|
||||
"running": false,
|
||||
"options": {
|
||||
"model": "/path/to/model.gguf"
|
||||
}
|
||||
}`
|
||||
|
||||
var inst instance.Process
|
||||
err := json.Unmarshal([]byte(jsonData), &inst)
|
||||
if err != nil {
|
||||
t.Fatalf("JSON unmarshal failed: %v", err)
|
||||
}
|
||||
|
||||
opts := inst.GetOptions()
|
||||
if opts.Model != "/path/to/model.gguf" {
|
||||
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
|
||||
}
|
||||
|
||||
// Note: Defaults are NOT applied during unmarshaling
|
||||
// They should only be applied by NewInstance or SetOptions
|
||||
if opts.AutoRestart != nil {
|
||||
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalJSON_NoOptions(t *testing.T) {
|
||||
jsonData := `{
|
||||
"name": "test-instance",
|
||||
"running": false
|
||||
}`
|
||||
|
||||
var inst instance.Process
|
||||
err := json.Unmarshal([]byte(jsonData), &inst)
|
||||
if err != nil {
|
||||
t.Fatalf("JSON unmarshal failed: %v", err)
|
||||
}
|
||||
|
||||
if inst.Name != "test-instance" {
|
||||
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
|
||||
}
|
||||
if inst.Running {
|
||||
t.Error("Expected running to be false")
|
||||
}
|
||||
|
||||
opts := inst.GetOptions()
|
||||
if opts != nil {
|
||||
t.Error("Expected options to be nil when not provided in JSON")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateInstanceOptionsValidation(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -377,13 +266,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
||||
expectedMax int
|
||||
expectedDelay int
|
||||
}{
|
||||
{
|
||||
name: "nil values",
|
||||
maxRestarts: nil,
|
||||
restartDelay: nil,
|
||||
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
|
||||
expectedDelay: 0,
|
||||
},
|
||||
{
|
||||
name: "valid positive values",
|
||||
maxRestarts: testutil.IntPtr(10),
|
||||
@@ -424,21 +306,17 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
||||
instance := instance.NewInstance("test", globalSettings, options)
|
||||
opts := instance.GetOptions()
|
||||
|
||||
if tt.maxRestarts != nil {
|
||||
if opts.MaxRestarts == nil {
|
||||
t.Error("Expected MaxRestarts to be set")
|
||||
} else if *opts.MaxRestarts != tt.expectedMax {
|
||||
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
|
||||
}
|
||||
}
|
||||
|
||||
if tt.restartDelay != nil {
|
||||
if opts.RestartDelay == nil {
|
||||
t.Error("Expected RestartDelay to be set")
|
||||
} else if *opts.RestartDelay != tt.expectedDelay {
|
||||
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,9 @@ func (i *Process) Start() error {
|
||||
i.restarts = 0
|
||||
}
|
||||
|
||||
// Initialize last request time to current time when starting
|
||||
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
|
||||
|
||||
// Create log files
|
||||
if err := i.logger.Create(); err != nil {
|
||||
return fmt.Errorf("failed to create log files: %w", err)
|
||||
|
||||
28
pkg/instance/timeout.go
Normal file
28
pkg/instance/timeout.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package instance
|
||||
|
||||
// UpdateLastRequestTime updates the last request access time for the instance via proxy
|
||||
func (i *Process) UpdateLastRequestTime() {
|
||||
i.mu.Lock()
|
||||
defer i.mu.Unlock()
|
||||
|
||||
lastRequestTime := i.timeProvider.Now().Unix()
|
||||
i.lastRequestTime.Store(lastRequestTime)
|
||||
}
|
||||
|
||||
func (i *Process) ShouldTimeout() bool {
|
||||
i.mu.RLock()
|
||||
defer i.mu.RUnlock()
|
||||
|
||||
if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if the last request time exceeds the idle timeout
|
||||
lastRequest := i.lastRequestTime.Load()
|
||||
idleTimeoutMinutes := *i.options.IdleTimeout
|
||||
|
||||
// Convert timeout from minutes to seconds for comparison
|
||||
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
|
||||
|
||||
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
|
||||
}
|
||||
195
pkg/instance/timeout_test.go
Normal file
195
pkg/instance/timeout_test.go
Normal file
@@ -0,0 +1,195 @@
|
||||
package instance_test
|
||||
|
||||
import (
|
||||
"llamactl/pkg/backends/llamacpp"
|
||||
"llamactl/pkg/config"
|
||||
"llamactl/pkg/instance"
|
||||
"llamactl/pkg/testutil"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MockTimeProvider implements TimeProvider for testing
|
||||
type MockTimeProvider struct {
|
||||
currentTime atomic.Int64 // Unix timestamp
|
||||
}
|
||||
|
||||
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
|
||||
m := &MockTimeProvider{}
|
||||
m.currentTime.Store(t.Unix())
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *MockTimeProvider) Now() time.Time {
|
||||
return time.Unix(m.currentTime.Load(), 0)
|
||||
}
|
||||
|
||||
func (m *MockTimeProvider) SetTime(t time.Time) {
|
||||
m.currentTime.Store(t.Unix())
|
||||
}
|
||||
|
||||
// Timeout-related tests
|
||||
|
||||
func TestUpdateLastRequestTime(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
}
|
||||
|
||||
options := &instance.CreateInstanceOptions{
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||
|
||||
// Test that UpdateLastRequestTime doesn't panic
|
||||
inst.UpdateLastRequestTime()
|
||||
}
|
||||
|
||||
func TestShouldTimeout_NotRunning(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
}
|
||||
|
||||
idleTimeout := 1 // 1 minute
|
||||
options := &instance.CreateInstanceOptions{
|
||||
IdleTimeout: &idleTimeout,
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||
|
||||
// Instance is not running, should not timeout regardless of configuration
|
||||
if inst.ShouldTimeout() {
|
||||
t.Error("Non-running instance should never timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
idleTimeout *int
|
||||
}{
|
||||
{"nil timeout", nil},
|
||||
{"zero timeout", testutil.IntPtr(0)},
|
||||
{"negative timeout", testutil.IntPtr(-5)},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
options := &instance.CreateInstanceOptions{
|
||||
IdleTimeout: tt.idleTimeout,
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||
// Simulate running state
|
||||
inst.Running = true
|
||||
|
||||
if inst.ShouldTimeout() {
|
||||
t.Errorf("Instance with %s should not timeout", tt.name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
}
|
||||
|
||||
idleTimeout := 5 // 5 minutes
|
||||
options := &instance.CreateInstanceOptions{
|
||||
IdleTimeout: &idleTimeout,
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||
inst.Running = true
|
||||
|
||||
// Update last request time to now
|
||||
inst.UpdateLastRequestTime()
|
||||
|
||||
// Should not timeout immediately
|
||||
if inst.ShouldTimeout() {
|
||||
t.Error("Instance should not timeout when last request was recent")
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
}
|
||||
|
||||
idleTimeout := 1 // 1 minute
|
||||
options := &instance.CreateInstanceOptions{
|
||||
IdleTimeout: &idleTimeout,
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||
inst.Running = true
|
||||
|
||||
// Use MockTimeProvider to simulate old last request time
|
||||
mockTime := NewMockTimeProvider(time.Now())
|
||||
inst.SetTimeProvider(mockTime)
|
||||
|
||||
// Set last request time to now
|
||||
inst.UpdateLastRequestTime()
|
||||
|
||||
// Advance time by 2 minutes (exceeds 1 minute timeout)
|
||||
mockTime.SetTime(time.Now().Add(2 * time.Minute))
|
||||
|
||||
if !inst.ShouldTimeout() {
|
||||
t.Error("Instance should timeout when last request exceeds idle timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTimeoutConfiguration_Validation(t *testing.T) {
|
||||
globalSettings := &config.InstancesConfig{
|
||||
LogsDir: "/tmp/test",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
inputTimeout *int
|
||||
expectedTimeout int
|
||||
}{
|
||||
{"default value when nil", nil, 0},
|
||||
{"positive value", testutil.IntPtr(10), 10},
|
||||
{"zero value", testutil.IntPtr(0), 0},
|
||||
{"negative value gets corrected", testutil.IntPtr(-5), 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
options := &instance.CreateInstanceOptions{
|
||||
IdleTimeout: tt.inputTimeout,
|
||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||
opts := inst.GetOptions()
|
||||
|
||||
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
|
||||
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// InstanceManager defines the interface for managing instances of the llama server.
|
||||
@@ -31,20 +32,48 @@ type instanceManager struct {
|
||||
instances map[string]*instance.Process
|
||||
ports map[int]bool
|
||||
instancesConfig config.InstancesConfig
|
||||
|
||||
// Timeout checker
|
||||
timeoutChecker *time.Ticker
|
||||
shutdownChan chan struct{}
|
||||
shutdownDone chan struct{}
|
||||
isShutdown bool
|
||||
}
|
||||
|
||||
// NewInstanceManager creates a new instance of InstanceManager.
|
||||
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
|
||||
if instancesConfig.TimeoutCheckInterval <= 0 {
|
||||
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
||||
}
|
||||
im := &instanceManager{
|
||||
instances: make(map[string]*instance.Process),
|
||||
ports: make(map[int]bool),
|
||||
instancesConfig: instancesConfig,
|
||||
|
||||
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
||||
shutdownChan: make(chan struct{}),
|
||||
shutdownDone: make(chan struct{}),
|
||||
}
|
||||
|
||||
// Load existing instances from disk
|
||||
if err := im.loadInstances(); err != nil {
|
||||
log.Printf("Error loading instances: %v", err)
|
||||
}
|
||||
|
||||
// Start the timeout checker goroutine after initialization is complete
|
||||
go func() {
|
||||
defer close(im.shutdownDone)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-im.timeoutChecker.C:
|
||||
im.checkAllTimeouts()
|
||||
case <-im.shutdownChan:
|
||||
return // Exit goroutine on shutdown
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return im
|
||||
}
|
||||
|
||||
@@ -94,6 +123,27 @@ func (im *instanceManager) Shutdown() {
|
||||
im.mu.Lock()
|
||||
defer im.mu.Unlock()
|
||||
|
||||
// Check if already shutdown
|
||||
if im.isShutdown {
|
||||
return
|
||||
}
|
||||
im.isShutdown = true
|
||||
|
||||
// Signal the timeout checker to stop
|
||||
close(im.shutdownChan)
|
||||
|
||||
// Release lock temporarily to wait for goroutine
|
||||
im.mu.Unlock()
|
||||
// Wait for the timeout checker goroutine to actually stop
|
||||
<-im.shutdownDone
|
||||
// Reacquire lock
|
||||
im.mu.Lock()
|
||||
|
||||
// Now stop the ticker
|
||||
if im.timeoutChecker != nil {
|
||||
im.timeoutChecker.Stop()
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(im.instances))
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,10 +27,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
||||
return nil, fmt.Errorf("instance options cannot be nil")
|
||||
}
|
||||
|
||||
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
||||
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
||||
}
|
||||
|
||||
name, err := validation.ValidateInstanceName(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -44,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
||||
im.mu.Lock()
|
||||
defer im.mu.Unlock()
|
||||
|
||||
// Check max instances limit after acquiring the lock
|
||||
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
||||
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
||||
}
|
||||
|
||||
// Check if instance with this name already exists
|
||||
if im.instances[name] != nil {
|
||||
return nil, fmt.Errorf("instance with name %s already exists", name)
|
||||
|
||||
26
pkg/manager/timeout.go
Normal file
26
pkg/manager/timeout.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package manager
|
||||
|
||||
import "log"
|
||||
|
||||
func (im *instanceManager) checkAllTimeouts() {
|
||||
im.mu.RLock()
|
||||
var timeoutInstances []string
|
||||
|
||||
// Identify instances that should timeout
|
||||
for _, inst := range im.instances {
|
||||
if inst.ShouldTimeout() {
|
||||
timeoutInstances = append(timeoutInstances, inst.Name)
|
||||
}
|
||||
}
|
||||
im.mu.RUnlock() // Release read lock before calling StopInstance
|
||||
|
||||
// Stop the timed-out instances
|
||||
for _, name := range timeoutInstances {
|
||||
log.Printf("Instance %s has timed out, stopping it", name)
|
||||
if _, err := im.StopInstance(name); err != nil {
|
||||
log.Printf("Error stopping instance %s: %v", name, err)
|
||||
} else {
|
||||
log.Printf("Instance %s stopped successfully", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -472,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
||||
proxyPath = "/" + proxyPath
|
||||
}
|
||||
|
||||
// Update the last request time for the instance
|
||||
inst.UpdateLastRequestTime()
|
||||
|
||||
// Modify the request to remove the proxy prefix
|
||||
originalPath := r.URL.Path
|
||||
r.URL.Path = proxyPath
|
||||
@@ -582,6 +585,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
// Update last request time for the instance
|
||||
inst.UpdateLastRequestTime()
|
||||
|
||||
// Recreate the request body from the bytes we read
|
||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||
r.ContentLength = int64(len(bodyBytes))
|
||||
|
||||
@@ -21,6 +21,11 @@ export const basicFieldsConfig: Record<string, {
|
||||
placeholder: '5',
|
||||
description: 'Delay in seconds before attempting restart'
|
||||
},
|
||||
idle_timeout: {
|
||||
label: 'Idle Timeout (minutes)',
|
||||
placeholder: '60',
|
||||
description: 'Time in minutes before instance is considered idle and stopped'
|
||||
},
|
||||
model: {
|
||||
label: 'Model Path',
|
||||
placeholder: '/path/to/model.gguf',
|
||||
|
||||
@@ -6,6 +6,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
||||
auto_restart: z.boolean().optional(),
|
||||
max_restarts: z.number().optional(),
|
||||
restart_delay: z.number().optional(),
|
||||
idle_timeout: z.number().optional(),
|
||||
|
||||
// Common params
|
||||
verbose_prompt: z.boolean().optional(),
|
||||
|
||||
Reference in New Issue
Block a user