Merge pull request #26 from lordmathis/feat/lru-eviction

feat: Implement least recently used instance eviction
2025-12-23 01:24:24 +00:00 · 2025-08-31 12:44:06 +02:00
parent 8fdebb069c 9579930a6a
commit 9a4dafeee8
10 changed files with 663 additions and 358 deletions
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
 🔐 **API Key Authentication**: Separate keys for management vs inference access  
 📊 **Instance Monitoring**: Health checks, auto-restart, log management  
-⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period  
+⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  

@@ -113,6 +113,8 @@ instances:
  logs_dir: ~/.local/share/llamactl/logs    # Logs directory
  auto_create_dirs: true         # Auto-create data/config/logs dirs if missing
  max_instances: -1              # Max instances (-1 = unlimited)
+  max_running_instances: -1      # Max running instances (-1 = unlimited)
+  enable_lru_eviction: true      # Enable LRU eviction for idle instances
  llama_executable: llama-server # Path to llama-server executable
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
@@ -184,6 +186,8 @@ instances:
  logs_dir: "~/.local/share/llamactl/logs"          # Directory for instance logs (default: data_dir/logs)
  auto_create_dirs: true                            # Automatically create data/config/logs directories (default: true)
  max_instances: -1                                 # Maximum instances (-1 = unlimited)
+  max_running_instances: -1                         # Maximum running instances (-1 = unlimited)
+  enable_lru_eviction: true                         # Enable LRU eviction for idle instances
  llama_executable: "llama-server"                  # Path to llama-server executable
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
@@ -200,6 +204,8 @@ instances:
 - `LLAMACTL_LOGS_DIR` - Log directory path
 - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
 - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
+- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
+- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
 - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -58,6 +58,9 @@ type InstancesConfig struct {
 	// Maximum number of instances that can be running at the same time
 	MaxRunningInstances int `yaml:"max_running_instances,omitempty"`

+	// Enable LRU eviction for instance logs
+	EnableLRUEviction bool `yaml:"enable_lru_eviction"`
+
 	// Path to llama-server executable
 	LlamaExecutable string `yaml:"llama_executable"`

@@ -117,6 +120,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			AutoCreateDirs:       true,
 			MaxInstances:         -1, // -1 means unlimited
 			MaxRunningInstances:  -1, // -1 means unlimited
+			EnableLRUEviction:    true,
 			LlamaExecutable:      "llama-server",
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
@@ -220,6 +224,11 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.MaxRunningInstances = m
 		}
 	}
+	if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
+		if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
+			cfg.Instances.EnableLRUEviction = b
+		}
+	}
 	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
 		cfg.Instances.LlamaExecutable = llamaExec
 	}
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -110,19 +110,25 @@ func (i *Process) Stop() error {

 	i.mu.Unlock()

-	// Stop the process with SIGINT
-	if i.cmd.Process != nil {
+	// Stop the process with SIGINT if cmd exists
+	if i.cmd != nil && i.cmd.Process != nil {
 		if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
 			log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
 		}
 	}

+	// If no process exists, we can return immediately
+	if i.cmd == nil || monitorDone == nil {
+		i.logger.Close()
+		return nil
+	}
+
 	select {
 	case <-monitorDone:
 		// Process exited normally
 	case <-time.After(30 * time.Second):
 		// Force kill if it doesn't exit within 30 seconds
-		if i.cmd.Process != nil {
+		if i.cmd != nil && i.cmd.Process != nil {
 			killErr := i.cmd.Process.Kill()
 			if killErr != nil {
 				log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -144,6 +150,10 @@ func (i *Process) Stop() error {
 	return nil
 }

+func (i *Process) LastRequestTime() int64 {
+	return i.lastRequestTime.Load()
+}
+
 func (i *Process) WaitForHealthy(timeout int) error {
 	if !i.IsRunning() {
 		return fmt.Errorf("instance %s is not running", i.Name)
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -21,7 +21,9 @@ type InstanceManager interface {
 	UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
 	DeleteInstance(name string) error
 	StartInstance(name string) (*instance.Process, error)
+	IsMaxRunningInstancesReached() bool
 	StopInstance(name string) (*instance.Process, error)
+	EvictLRUInstance() error
 	RestartInstance(name string) (*instance.Process, error)
 	GetInstanceLogs(name string) (string, error)
 	Shutdown()
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -11,7 +11,6 @@ import (
 	"strings"
 	"sync"
 	"testing"
-	"time"
 )

 func TestNewInstanceManager(t *testing.T) {
@@ -26,13 +25,13 @@ func TestNewInstanceManager(t *testing.T) {
 		TimeoutCheckInterval: 5,
 	}

-	manager := manager.NewInstanceManager(cfg)
-	if manager == nil {
+	mgr := manager.NewInstanceManager(cfg)
+	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}

 	// Test initial state
-	instances, err := manager.ListInstances()
+	instances, err := mgr.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
 	}
@@ -41,217 +40,6 @@ func TestNewInstanceManager(t *testing.T) {
 	}
 }

-func TestCreateInstance_Success(t *testing.T) {
-	manager := createTestManager()
-
-	options := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-			Port:  8080,
-		},
-	}
-
-	inst, err := manager.CreateInstance("test-instance", options)
-	if err != nil {
-		t.Fatalf("CreateInstance failed: %v", err)
-	}
-
-	if inst.Name != "test-instance" {
-		t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
-	}
-	if inst.GetStatus() != instance.Stopped {
-		t.Error("New instance should not be running")
-	}
-	if inst.GetOptions().Port != 8080 {
-		t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
-	}
-}
-
-func TestCreateInstance_ValidationAndLimits(t *testing.T) {
-	// Test duplicate names
-	mngr := createTestManager()
-	options := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-	}
-
-	_, err := mngr.CreateInstance("test-instance", options)
-	if err != nil {
-		t.Fatalf("First CreateInstance failed: %v", err)
-	}
-
-	// Try to create duplicate
-	_, err = mngr.CreateInstance("test-instance", options)
-	if err == nil {
-		t.Error("Expected error for duplicate instance name")
-	}
-	if !strings.Contains(err.Error(), "already exists") {
-		t.Errorf("Expected duplicate name error, got: %v", err)
-	}
-
-	// Test max instances limit
-	cfg := config.InstancesConfig{
-		PortRange:            [2]int{8000, 9000},
-		MaxInstances:         1, // Very low limit for testing
-		TimeoutCheckInterval: 5,
-	}
-	limitedManager := manager.NewInstanceManager(cfg)
-
-	_, err = limitedManager.CreateInstance("instance1", options)
-	if err != nil {
-		t.Fatalf("CreateInstance 1 failed: %v", err)
-	}
-
-	// This should fail due to max instances limit
-	_, err = limitedManager.CreateInstance("instance2", options)
-	if err == nil {
-		t.Error("Expected error when exceeding max instances limit")
-	}
-	if !strings.Contains(err.Error(), "maximum number of instances") {
-		t.Errorf("Expected max instances error, got: %v", err)
-	}
-}
-
-func TestPortManagement(t *testing.T) {
-	manager := createTestManager()
-
-	// Test auto port assignment
-	options1 := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-	}
-
-	inst1, err := manager.CreateInstance("instance1", options1)
-	if err != nil {
-		t.Fatalf("CreateInstance failed: %v", err)
-	}
-
-	port1 := inst1.GetOptions().Port
-	if port1 < 8000 || port1 > 9000 {
-		t.Errorf("Expected port in range 8000-9000, got %d", port1)
-	}
-
-	// Test port conflict detection
-	options2 := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model2.gguf",
-			Port:  port1, // Same port - should conflict
-		},
-	}
-
-	_, err = manager.CreateInstance("instance2", options2)
-	if err == nil {
-		t.Error("Expected error for port conflict")
-	}
-	if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
-		t.Errorf("Expected port conflict error, got: %v", err)
-	}
-
-	// Test port release on deletion
-	specificPort := 8080
-	options3 := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-			Port:  specificPort,
-		},
-	}
-
-	_, err = manager.CreateInstance("port-test", options3)
-	if err != nil {
-		t.Fatalf("CreateInstance failed: %v", err)
-	}
-
-	err = manager.DeleteInstance("port-test")
-	if err != nil {
-		t.Fatalf("DeleteInstance failed: %v", err)
-	}
-
-	// Should be able to create new instance with same port
-	_, err = manager.CreateInstance("new-port-test", options3)
-	if err != nil {
-		t.Errorf("Expected to reuse port after deletion, got error: %v", err)
-	}
-}
-
-func TestInstanceOperations(t *testing.T) {
-	manager := createTestManager()
-
-	options := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-	}
-
-	// Create instance
-	created, err := manager.CreateInstance("test-instance", options)
-	if err != nil {
-		t.Fatalf("CreateInstance failed: %v", err)
-	}
-
-	// Get instance
-	retrieved, err := manager.GetInstance("test-instance")
-	if err != nil {
-		t.Fatalf("GetInstance failed: %v", err)
-	}
-	if retrieved.Name != created.Name {
-		t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
-	}
-
-	// Update instance
-	newOptions := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/new-model.gguf",
-			Port:  8081,
-		},
-	}
-
-	updated, err := manager.UpdateInstance("test-instance", newOptions)
-	if err != nil {
-		t.Fatalf("UpdateInstance failed: %v", err)
-	}
-	if updated.GetOptions().Model != "/path/to/new-model.gguf" {
-		t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
-	}
-
-	// List instances
-	instances, err := manager.ListInstances()
-	if err != nil {
-		t.Fatalf("ListInstances failed: %v", err)
-	}
-	if len(instances) != 1 {
-		t.Errorf("Expected 1 instance, got %d", len(instances))
-	}
-
-	// Delete instance
-	err = manager.DeleteInstance("test-instance")
-	if err != nil {
-		t.Fatalf("DeleteInstance failed: %v", err)
-	}
-
-	_, err = manager.GetInstance("test-instance")
-	if err == nil {
-		t.Error("Instance should not exist after deletion")
-	}
-
-	// Test operations on non-existent instances
-	_, err = manager.GetInstance("nonexistent")
-	if err == nil || !strings.Contains(err.Error(), "not found") {
-		t.Errorf("Expected 'not found' error, got: %v", err)
-	}
-
-	err = manager.DeleteInstance("nonexistent")
-	if err == nil || !strings.Contains(err.Error(), "not found") {
-		t.Errorf("Expected 'not found' error, got: %v", err)
-	}
-
-	_, err = manager.UpdateInstance("nonexistent", options)
-	if err == nil || !strings.Contains(err.Error(), "not found") {
-		t.Errorf("Expected 'not found' error, got: %v", err)
-	}
-}
-
 func TestPersistence(t *testing.T) {
 	tempDir := t.TempDir()

@@ -312,102 +100,9 @@ func TestPersistence(t *testing.T) {
 	}
 }

-func TestTimeoutFunctionality(t *testing.T) {
-	// Test timeout checker initialization
-	cfg := config.InstancesConfig{
-		PortRange:            [2]int{8000, 9000},
-		TimeoutCheckInterval: 10,
-		MaxInstances:         5,
-	}
-
-	manager := manager.NewInstanceManager(cfg)
-	if manager == nil {
-		t.Fatal("Manager should be initialized with timeout checker")
-	}
-	manager.Shutdown() // Clean up
-
-	// Test timeout configuration and logic without starting the actual process
-	testManager := createTestManager()
-	defer testManager.Shutdown()
-
-	idleTimeout := 1 // 1 minute
-	options := &instance.CreateInstanceOptions{
-		IdleTimeout: &idleTimeout,
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-	}
-
-	inst, err := testManager.CreateInstance("timeout-test", options)
-	if err != nil {
-		t.Fatalf("CreateInstance failed: %v", err)
-	}
-
-	// Test timeout configuration is properly set
-	if inst.GetOptions().IdleTimeout == nil {
-		t.Fatal("Instance should have idle timeout configured")
-	}
-	if *inst.GetOptions().IdleTimeout != 1 {
-		t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
-	}
-
-	// Test timeout logic without actually starting the process
-	// Create a mock time provider to simulate timeout
-	mockTime := NewMockTimeProvider(time.Now())
-	inst.SetTimeProvider(mockTime)
-
-	// Set instance to running state so timeout logic can work
-	inst.SetStatus(instance.Running)
-
-	// Simulate instance being "running" for timeout check (without actual process)
-	// We'll test the ShouldTimeout logic directly
-	inst.UpdateLastRequestTime()
-
-	// Initially should not timeout (just updated)
-	if inst.ShouldTimeout() {
-		t.Error("Instance should not timeout immediately after request")
-	}
-
-	// Advance time to trigger timeout
-	mockTime.SetTime(time.Now().Add(2 * time.Minute))
-
-	// Now it should timeout
-	if !inst.ShouldTimeout() {
-		t.Error("Instance should timeout after idle period")
-	}
-
-	// Reset running state to avoid shutdown issues
-	inst.SetStatus(instance.Stopped)
-
-	// Test that instance without timeout doesn't timeout
-	noTimeoutOptions := &instance.CreateInstanceOptions{
-		LlamaServerOptions: llamacpp.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
-		},
-		// No IdleTimeout set
-	}
-
-	noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
-	if err != nil {
-		t.Fatalf("CreateInstance failed: %v", err)
-	}
-
-	noTimeoutInst.SetTimeProvider(mockTime)
-	noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
-	noTimeoutInst.UpdateLastRequestTime()
-
-	// Even with time advanced, should not timeout
-	if noTimeoutInst.ShouldTimeout() {
-		t.Error("Instance without timeout configuration should never timeout")
-	}
-
-	// Reset running state to avoid shutdown issues
-	noTimeoutInst.SetStatus(instance.Stopped)
-}
-
 func TestConcurrentAccess(t *testing.T) {
-	manager := createTestManager()
-	defer manager.Shutdown()
+	mgr := createTestManager()
+	defer mgr.Shutdown()

 	// Test concurrent operations
 	var wg sync.WaitGroup
@@ -424,7 +119,7 @@ func TestConcurrentAccess(t *testing.T) {
 				},
 			}
 			instanceName := fmt.Sprintf("concurrent-test-%d", index)
-			if _, err := manager.CreateInstance(instanceName, options); err != nil {
+			if _, err := mgr.CreateInstance(instanceName, options); err != nil {
 				errChan <- err
 			}
 		}(i)
@@ -435,7 +130,7 @@ func TestConcurrentAccess(t *testing.T) {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
-			if _, err := manager.ListInstances(); err != nil {
+			if _, err := mgr.ListInstances(); err != nil {
 				errChan <- err
 			}
 		}()
@@ -451,7 +146,7 @@ func TestConcurrentAccess(t *testing.T) {
 }

 func TestShutdown(t *testing.T) {
-	manager := createTestManager()
+	mgr := createTestManager()

 	// Create test instance
 	options := &instance.CreateInstanceOptions{
@@ -459,16 +154,16 @@ func TestShutdown(t *testing.T) {
 			Model: "/path/to/model.gguf",
 		},
 	}
-	_, err := manager.CreateInstance("test-instance", options)
+	_, err := mgr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}

 	// Shutdown should not panic
-	manager.Shutdown()
+	mgr.Shutdown()

 	// Multiple shutdowns should not panic
-	manager.Shutdown()
+	mgr.Shutdown()
 }

 // Helper function to create a test manager with standard config
@@ -485,25 +180,3 @@ func createTestManager() manager.InstanceManager {
 	}
 	return manager.NewInstanceManager(cfg)
 }
-
-// Helper for timeout tests
-type MockTimeProvider struct {
-	currentTime time.Time
-	mu          sync.RWMutex
-}
-
-func NewMockTimeProvider(t time.Time) *MockTimeProvider {
-	return &MockTimeProvider{currentTime: t}
-}
-
-func (m *MockTimeProvider) Now() time.Time {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-	return m.currentTime
-}
-
-func (m *MockTimeProvider) SetTime(t time.Time) {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	m.currentTime = t
-}
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -174,6 +174,7 @@ func (im *instanceManager) DeleteInstance(name string) error {
 func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
 	instance, exists := im.instances[name]
+	maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
 	im.mu.RUnlock()

 	if !exists {
@@ -183,7 +184,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
 		return instance, fmt.Errorf("instance with name %s is already running", name)
 	}

-	if len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1 {
+	if maxRunningExceeded {
 		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
 	}

@@ -201,6 +202,17 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
 	return instance, nil
 }

+func (im *instanceManager) IsMaxRunningInstancesReached() bool {
+	im.mu.RLock()
+	defer im.mu.RUnlock()
+
+	if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
+		return true
+	}
+
+	return false
+}
+
 // StopInstance stops a running instance and returns it.
 func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -0,0 +1,221 @@
+package manager_test
+
+import (
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/config"
+	"llamactl/pkg/instance"
+	"llamactl/pkg/manager"
+	"strings"
+	"testing"
+)
+
+func TestCreateInstance_Success(t *testing.T) {
+	manager := createTestManager()
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+			Port:  8080,
+		},
+	}
+
+	inst, err := manager.CreateInstance("test-instance", options)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	if inst.Name != "test-instance" {
+		t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
+	}
+	if inst.GetStatus() != instance.Stopped {
+		t.Error("New instance should not be running")
+	}
+	if inst.GetOptions().Port != 8080 {
+		t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
+	}
+}
+
+func TestCreateInstance_ValidationAndLimits(t *testing.T) {
+	// Test duplicate names
+	mngr := createTestManager()
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	_, err := mngr.CreateInstance("test-instance", options)
+	if err != nil {
+		t.Fatalf("First CreateInstance failed: %v", err)
+	}
+
+	// Try to create duplicate
+	_, err = mngr.CreateInstance("test-instance", options)
+	if err == nil {
+		t.Error("Expected error for duplicate instance name")
+	}
+	if !strings.Contains(err.Error(), "already exists") {
+		t.Errorf("Expected duplicate name error, got: %v", err)
+	}
+
+	// Test max instances limit
+	cfg := config.InstancesConfig{
+		PortRange:            [2]int{8000, 9000},
+		MaxInstances:         1, // Very low limit for testing
+		TimeoutCheckInterval: 5,
+	}
+	limitedManager := manager.NewInstanceManager(cfg)
+
+	_, err = limitedManager.CreateInstance("instance1", options)
+	if err != nil {
+		t.Fatalf("CreateInstance 1 failed: %v", err)
+	}
+
+	// This should fail due to max instances limit
+	_, err = limitedManager.CreateInstance("instance2", options)
+	if err == nil {
+		t.Error("Expected error when exceeding max instances limit")
+	}
+	if !strings.Contains(err.Error(), "maximum number of instances") {
+		t.Errorf("Expected max instances error, got: %v", err)
+	}
+}
+
+func TestPortManagement(t *testing.T) {
+	manager := createTestManager()
+
+	// Test auto port assignment
+	options1 := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	inst1, err := manager.CreateInstance("instance1", options1)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	port1 := inst1.GetOptions().Port
+	if port1 < 8000 || port1 > 9000 {
+		t.Errorf("Expected port in range 8000-9000, got %d", port1)
+	}
+
+	// Test port conflict detection
+	options2 := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model2.gguf",
+			Port:  port1, // Same port - should conflict
+		},
+	}
+
+	_, err = manager.CreateInstance("instance2", options2)
+	if err == nil {
+		t.Error("Expected error for port conflict")
+	}
+	if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
+		t.Errorf("Expected port conflict error, got: %v", err)
+	}
+
+	// Test port release on deletion
+	specificPort := 8080
+	options3 := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+			Port:  specificPort,
+		},
+	}
+
+	_, err = manager.CreateInstance("port-test", options3)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	err = manager.DeleteInstance("port-test")
+	if err != nil {
+		t.Fatalf("DeleteInstance failed: %v", err)
+	}
+
+	// Should be able to create new instance with same port
+	_, err = manager.CreateInstance("new-port-test", options3)
+	if err != nil {
+		t.Errorf("Expected to reuse port after deletion, got error: %v", err)
+	}
+}
+
+func TestInstanceOperations(t *testing.T) {
+	manager := createTestManager()
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	// Create instance
+	created, err := manager.CreateInstance("test-instance", options)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	// Get instance
+	retrieved, err := manager.GetInstance("test-instance")
+	if err != nil {
+		t.Fatalf("GetInstance failed: %v", err)
+	}
+	if retrieved.Name != created.Name {
+		t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
+	}
+
+	// Update instance
+	newOptions := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/new-model.gguf",
+			Port:  8081,
+		},
+	}
+
+	updated, err := manager.UpdateInstance("test-instance", newOptions)
+	if err != nil {
+		t.Fatalf("UpdateInstance failed: %v", err)
+	}
+	if updated.GetOptions().Model != "/path/to/new-model.gguf" {
+		t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
+	}
+
+	// List instances
+	instances, err := manager.ListInstances()
+	if err != nil {
+		t.Fatalf("ListInstances failed: %v", err)
+	}
+	if len(instances) != 1 {
+		t.Errorf("Expected 1 instance, got %d", len(instances))
+	}
+
+	// Delete instance
+	err = manager.DeleteInstance("test-instance")
+	if err != nil {
+		t.Fatalf("DeleteInstance failed: %v", err)
+	}
+
+	_, err = manager.GetInstance("test-instance")
+	if err == nil {
+		t.Error("Instance should not exist after deletion")
+	}
+
+	// Test operations on non-existent instances
+	_, err = manager.GetInstance("nonexistent")
+	if err == nil || !strings.Contains(err.Error(), "not found") {
+		t.Errorf("Expected 'not found' error, got: %v", err)
+	}
+
+	err = manager.DeleteInstance("nonexistent")
+	if err == nil || !strings.Contains(err.Error(), "not found") {
+		t.Errorf("Expected 'not found' error, got: %v", err)
+	}
+
+	_, err = manager.UpdateInstance("nonexistent", options)
+	if err == nil || !strings.Contains(err.Error(), "not found") {
+		t.Errorf("Expected 'not found' error, got: %v", err)
+	}
+}
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -1,6 +1,10 @@
 package manager

-import "log"
+import (
+	"fmt"
+	"llamactl/pkg/instance"
+	"log"
+)

 func (im *instanceManager) checkAllTimeouts() {
 	im.mu.RLock()
@@ -24,3 +28,37 @@ func (im *instanceManager) checkAllTimeouts() {
 		}
 	}
 }
+
+// EvictLRUInstance finds and stops the least recently used running instance.
+func (im *instanceManager) EvictLRUInstance() error {
+	im.mu.RLock()
+	var lruInstance *instance.Process
+
+	for name, _ := range im.runningInstances {
+		inst := im.instances[name]
+		if inst == nil {
+			continue
+		}
+
+		if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
+			continue // Skip instances without idle timeout
+		}
+
+		if lruInstance == nil {
+			lruInstance = inst
+		}
+
+		if inst.LastRequestTime() < lruInstance.LastRequestTime() {
+			lruInstance = inst
+		}
+	}
+	im.mu.RUnlock()
+
+	if lruInstance == nil {
+		return fmt.Errorf("failed to find lru instance")
+	}
+
+	// Evict Instance
+	_, err := im.StopInstance(lruInstance.Name)
+	return err
+}
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -0,0 +1,321 @@
+package manager_test
+
+import (
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/config"
+	"llamactl/pkg/instance"
+	"llamactl/pkg/manager"
+	"sync"
+	"testing"
+	"time"
+)
+
+func TestTimeoutFunctionality(t *testing.T) {
+	// Test timeout checker initialization
+	cfg := config.InstancesConfig{
+		PortRange:            [2]int{8000, 9000},
+		TimeoutCheckInterval: 10,
+		MaxInstances:         5,
+	}
+
+	manager := manager.NewInstanceManager(cfg)
+	if manager == nil {
+		t.Fatal("Manager should be initialized with timeout checker")
+	}
+	manager.Shutdown() // Clean up
+
+	// Test timeout configuration and logic without starting the actual process
+	testManager := createTestManager()
+	defer testManager.Shutdown()
+
+	idleTimeout := 1 // 1 minute
+	options := &instance.CreateInstanceOptions{
+		IdleTimeout: &idleTimeout,
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	inst, err := testManager.CreateInstance("timeout-test", options)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	// Test timeout configuration is properly set
+	if inst.GetOptions().IdleTimeout == nil {
+		t.Fatal("Instance should have idle timeout configured")
+	}
+	if *inst.GetOptions().IdleTimeout != 1 {
+		t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
+	}
+
+	// Test timeout logic without actually starting the process
+	// Create a mock time provider to simulate timeout
+	mockTime := NewMockTimeProvider(time.Now())
+	inst.SetTimeProvider(mockTime)
+
+	// Set instance to running state so timeout logic can work
+	inst.SetStatus(instance.Running)
+
+	// Simulate instance being "running" for timeout check (without actual process)
+	// We'll test the ShouldTimeout logic directly
+	inst.UpdateLastRequestTime()
+
+	// Initially should not timeout (just updated)
+	if inst.ShouldTimeout() {
+		t.Error("Instance should not timeout immediately after request")
+	}
+
+	// Advance time to trigger timeout
+	mockTime.SetTime(time.Now().Add(2 * time.Minute))
+
+	// Now it should timeout
+	if !inst.ShouldTimeout() {
+		t.Error("Instance should timeout after idle period")
+	}
+
+	// Reset running state to avoid shutdown issues
+	inst.SetStatus(instance.Stopped)
+
+	// Test that instance without timeout doesn't timeout
+	noTimeoutOptions := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+		// No IdleTimeout set
+	}
+
+	noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	noTimeoutInst.SetTimeProvider(mockTime)
+	noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
+	noTimeoutInst.UpdateLastRequestTime()
+
+	// Even with time advanced, should not timeout
+	if noTimeoutInst.ShouldTimeout() {
+		t.Error("Instance without timeout configuration should never timeout")
+	}
+
+	// Reset running state to avoid shutdown issues
+	noTimeoutInst.SetStatus(instance.Stopped)
+}
+
+func TestEvictLRUInstance_Success(t *testing.T) {
+	manager := createTestManager()
+	// Don't defer manager.Shutdown() - we'll handle cleanup manually
+
+	// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
+	options1 := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model1.gguf",
+		},
+		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
+	}
+	options2 := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model2.gguf",
+		},
+		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
+	}
+	options3 := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model3.gguf",
+		},
+		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
+	}
+
+	inst1, err := manager.CreateInstance("instance-1", options1)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+	inst2, err := manager.CreateInstance("instance-2", options2)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+	inst3, err := manager.CreateInstance("instance-3", options3)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	// Set up mock time and set instances to running
+	mockTime := NewMockTimeProvider(time.Now())
+	inst1.SetTimeProvider(mockTime)
+	inst2.SetTimeProvider(mockTime)
+	inst3.SetTimeProvider(mockTime)
+
+	inst1.SetStatus(instance.Running)
+	inst2.SetStatus(instance.Running)
+	inst3.SetStatus(instance.Running)
+
+	// Set different last request times (oldest to newest)
+	// inst1: oldest (will be evicted)
+	inst1.UpdateLastRequestTime()
+
+	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
+	inst2.UpdateLastRequestTime()
+
+	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
+	inst3.UpdateLastRequestTime()
+
+	// Evict LRU instance (should be inst1)
+	err = manager.EvictLRUInstance()
+	if err != nil {
+		t.Fatalf("EvictLRUInstance failed: %v", err)
+	}
+
+	// Verify inst1 is stopped
+	if inst1.IsRunning() {
+		t.Error("Expected instance-1 to be stopped after eviction")
+	}
+
+	// Verify inst2 and inst3 are still running
+	if !inst2.IsRunning() {
+		t.Error("Expected instance-2 to still be running")
+	}
+	if !inst3.IsRunning() {
+		t.Error("Expected instance-3 to still be running")
+	}
+
+	// Clean up manually - set all to stopped and then shutdown
+	inst2.SetStatus(instance.Stopped)
+	inst3.SetStatus(instance.Stopped)
+}
+
+func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
+	// Helper function to create instances with different timeout configurations
+	createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
+		options := &instance.CreateInstanceOptions{
+			LlamaServerOptions: llamacpp.LlamaServerOptions{
+				Model: model,
+			},
+			IdleTimeout: timeout,
+		}
+		inst, err := manager.CreateInstance(name, options)
+		if err != nil {
+			t.Fatalf("CreateInstance failed: %v", err)
+		}
+		return inst
+	}
+
+	t.Run("no running instances", func(t *testing.T) {
+		manager := createTestManager()
+		defer manager.Shutdown()
+
+		err := manager.EvictLRUInstance()
+		if err == nil {
+			t.Error("Expected error when no running instances exist")
+		}
+		if err.Error() != "failed to find lru instance" {
+			t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
+		}
+	})
+
+	t.Run("only instances without timeout", func(t *testing.T) {
+		manager := createTestManager()
+		defer manager.Shutdown()
+
+		// Create instances with various non-eligible timeout configurations
+		zeroTimeout := 0
+		negativeTimeout := -1
+		inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
+		inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
+		inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
+
+		// Set instances to running
+		instances := []*instance.Process{inst1, inst2, inst3}
+		for _, inst := range instances {
+			inst.SetStatus(instance.Running)
+		}
+		defer func() {
+			// Reset instances to stopped to avoid shutdown panics
+			for _, inst := range instances {
+				inst.SetStatus(instance.Stopped)
+			}
+		}()
+
+		// Try to evict - should fail because no eligible instances
+		err := manager.EvictLRUInstance()
+		if err == nil {
+			t.Error("Expected error when no eligible instances exist")
+		}
+		if err.Error() != "failed to find lru instance" {
+			t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
+		}
+
+		// Verify all instances are still running
+		for i, inst := range instances {
+			if !inst.IsRunning() {
+				t.Errorf("Expected instance %d to still be running", i+1)
+			}
+		}
+	})
+
+	t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
+		manager := createTestManager()
+		defer manager.Shutdown()
+
+		// Create mix of instances: some with timeout enabled, some disabled
+		validTimeout := 1
+		zeroTimeout := 0
+		instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
+		instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
+		instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
+
+		// Set all instances to running
+		instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
+		for _, inst := range instances {
+			inst.SetStatus(instance.Running)
+			inst.UpdateLastRequestTime()
+		}
+		defer func() {
+			// Reset instances to stopped to avoid shutdown panics
+			for _, inst := range instances {
+				if inst.IsRunning() {
+					inst.SetStatus(instance.Stopped)
+				}
+			}
+		}()
+
+		// Evict LRU instance - should only consider the one with timeout
+		err := manager.EvictLRUInstance()
+		if err != nil {
+			t.Fatalf("EvictLRUInstance failed: %v", err)
+		}
+
+		// Verify only the instance with timeout was evicted
+		if instWithTimeout.IsRunning() {
+			t.Error("Expected with-timeout instance to be stopped after eviction")
+		}
+		if !instNoTimeout1.IsRunning() {
+			t.Error("Expected no-timeout-1 instance to still be running")
+		}
+		if !instNoTimeout2.IsRunning() {
+			t.Error("Expected no-timeout-2 instance to still be running")
+		}
+	})
+}
+
+// Helper for timeout tests
+type MockTimeProvider struct {
+	currentTime time.Time
+	mu          sync.RWMutex
+}
+
+func NewMockTimeProvider(t time.Time) *MockTimeProvider {
+	return &MockTimeProvider{currentTime: t}
+}
+
+func (m *MockTimeProvider) Now() time.Time {
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	return m.currentTime
+}
+
+func (m *MockTimeProvider) SetTime(t time.Time) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.currentTime = t
+}
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -581,7 +581,25 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 		}

 		if !inst.IsRunning() {
-			if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
+			allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
+			if !allowOnDemand {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
+
+			if h.InstanceManager.IsMaxRunningInstancesReached() {
+				if h.cfg.Instances.EnableLRUEviction {
+					err := h.InstanceManager.EvictLRUInstance()
+					if err != nil {
+						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
+						return
+					}
+				} else {
+					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
+					return
+				}
+			}
+
 			// If on-demand start is enabled, start the instance
 			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
 				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
@@ -593,11 +611,6 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
 				return
 			}
-
-			} else {
-				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-				return
-			}
 		}

 		proxy, err := inst.GetProxy()