Merge pull request #75 from lordmathis/fix/delete-instance

fix: Prevent restarting instance from getting deleted
This commit is contained in:
2025-10-27 19:27:58 +01:00
committed by GitHub
11 changed files with 68 additions and 119 deletions

View File

@@ -5,6 +5,7 @@ import (
"llamactl/pkg/config"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"log"
"net/http"
"os"
"os/signal"
@@ -38,8 +39,7 @@ func main() {
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.")
log.Printf("Error loading config: %v\nUsing default configuration.", err)
}
// Set version information
@@ -50,13 +50,11 @@ func main() {
// Create the data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
log.Printf("Error creating config directory %s: %v\nPersistence will not be available.", cfg.Instances.InstancesDir, err)
}
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err)
}
}
@@ -81,7 +79,7 @@ func main() {
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
log.Printf("Error starting server: %v\n", err)
}
}()
@@ -90,7 +88,7 @@ func main() {
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
log.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}

View File

@@ -1,6 +1,7 @@
package config
import (
"fmt"
"log"
"os"
"path/filepath"
@@ -231,6 +232,11 @@ func LoadConfig(configPath string) (AppConfig, error) {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
// Validate port range
if cfg.Instances.PortRange[0] <= 0 || cfg.Instances.PortRange[1] <= 0 || cfg.Instances.PortRange[0] >= cfg.Instances.PortRange[1] {
return AppConfig{}, fmt.Errorf("invalid port range: %v", cfg.Instances.PortRange)
}
return cfg, nil
}

View File

@@ -7,13 +7,14 @@ import (
"os"
"strings"
"sync"
"sync/atomic"
"time"
)
type logger struct {
name string
logDir string
logFile *os.File
logFile atomic.Pointer[os.File]
logFilePath string
mu sync.RWMutex
}
@@ -47,11 +48,11 @@ func (i *logger) create() error {
return fmt.Errorf("failed to create stdout log file: %w", err)
}
i.logFile = logFile
i.logFile.Store(logFile)
// Write a startup marker to both files
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
fmt.Fprintf(logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
return nil
}
@@ -102,11 +103,12 @@ func (i *logger) close() {
i.mu.Lock()
defer i.mu.Unlock()
if i.logFile != nil {
logFile := i.logFile.Swap(nil)
if logFile != nil {
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
i.logFile.Close()
i.logFile = nil
fmt.Fprintf(logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
logFile.Sync() // Ensure all buffered data is written to disk
logFile.Close()
}
}
@@ -117,9 +119,9 @@ func (i *logger) readOutput(reader io.ReadCloser) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if i.logFile != nil {
fmt.Fprintln(i.logFile, line)
i.logFile.Sync() // Ensure data is written to disk
// Use atomic load to avoid lock contention on every line
if logFile := i.logFile.Load(); logFile != nil {
fmt.Fprintln(logFile, line)
}
}
}

View File

@@ -10,7 +10,7 @@ import (
)
func TestInstanceTimeoutLogic(t *testing.T) {
testManager := createTestManager()
testManager := createTestManager(t)
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
@@ -42,7 +42,7 @@ func TestInstanceTimeoutLogic(t *testing.T) {
}
func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
testManager := createTestManager()
testManager := createTestManager(t)
defer testManager.Shutdown()
noTimeoutInst := createInstanceWithTimeout(t, testManager, "no-timeout-test", "/path/to/model.gguf", nil)
@@ -64,7 +64,7 @@ func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
defer manager.Shutdown()
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
@@ -121,7 +121,7 @@ func TestEvictLRUInstance_Success(t *testing.T) {
}
func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
defer manager.Shutdown()
err := manager.EvictLRUInstance()
@@ -134,7 +134,7 @@ func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
}
func TestEvictLRUInstance_OnlyEvictsTimeoutEnabledInstances(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled

View File

@@ -54,16 +54,10 @@ func New(globalConfig *config.AppConfig) InstanceManager {
// Initialize port allocator
portRange := globalConfig.Instances.PortRange
ports, err := newPortAllocator(portRange[0], portRange[1])
if err != nil {
log.Fatalf("Failed to create port allocator: %v", err)
}
ports := newPortAllocator(portRange[0], portRange[1])
// Initialize persistence
persistence, err := newInstancePersister(globalConfig.Instances.InstancesDir)
if err != nil {
log.Fatalf("Failed to create instance persister: %v", err)
}
persistence := newInstancePersister(globalConfig.Instances.InstancesDir)
// Initialize remote manager
remote := newRemoteManager(globalConfig.Nodes, 30*time.Second)
@@ -116,7 +110,7 @@ func (im *instanceManager) Shutdown() {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", inst.Name)
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", inst.Name, err)
log.Printf("Error stopping instance %s: %v\n", inst.Name, err)
}
}(inst)
}

View File

@@ -85,7 +85,7 @@ func TestDeleteInstance_RemovesPersistenceFile(t *testing.T) {
}
func TestConcurrentAccess(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
// Test concurrent operations
@@ -113,7 +113,7 @@ func TestConcurrentAccess(t *testing.T) {
}
// Concurrent list operations
for i := 0; i < 3; i++ {
for range 3 {
wg.Add(1)
go func() {
defer wg.Done()
@@ -134,16 +134,17 @@ func TestConcurrentAccess(t *testing.T) {
// Helper functions for test configuration
func createTestAppConfig(instancesDir string) *config.AppConfig {
// Use 'sleep' as a test command instead of 'llama-server'
// This allows tests to run in CI environments without requiring actual LLM binaries
// The sleep command will be invoked with model paths and other args, which it ignores
// Use 'sh -c "sleep 999999"' as a test command instead of 'llama-server'
// The shell ignores all additional arguments passed after the command
return &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "sleep",
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
MLX: config.BackendSettings{
Command: "sleep",
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
},
Instances: config.InstancesConfig{
@@ -162,28 +163,8 @@ func createTestAppConfig(instancesDir string) *config.AppConfig {
}
}
func createTestManager() manager.InstanceManager {
appConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "sleep",
},
MLX: config.BackendSettings{
Command: "sleep",
},
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
MaxRunningInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
func createTestManager(t *testing.T) manager.InstanceManager {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
return manager.New(appConfig)
}

View File

@@ -330,7 +330,8 @@ func (im *instanceManager) DeleteInstance(name string) error {
lock.Lock()
defer im.unlockAndCleanup(name)
if inst.IsRunning() {
status := inst.GetStatus()
if status == instance.Running || status == instance.Restarting {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}

View File

@@ -10,7 +10,7 @@ import (
)
func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
mngr := createTestManager()
mngr := createTestManager(t)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
@@ -36,6 +36,7 @@ func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
}
func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
tempDir := t.TempDir()
appConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
@@ -44,6 +45,7 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
},
@@ -77,7 +79,7 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
}
func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
options1 := &instance.Options{
BackendOptions: backends.Options{
@@ -115,7 +117,7 @@ func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
}
func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
options := &instance.Options{
BackendOptions: backends.Options{
@@ -143,7 +145,7 @@ func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
}
func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
@@ -155,15 +157,13 @@ func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
},
}
_, err := mgr.CreateInstance("test-instance", options)
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
_, err = mgr.StartInstance("test-instance")
if err != nil {
t.Fatalf("StartInstance failed: %v", err)
}
// Simulate starting the instance
inst.SetStatus(instance.Running)
// Should fail to delete running instance
err = mgr.DeleteInstance("test-instance")
@@ -173,7 +173,7 @@ func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
}
func TestUpdateInstance(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
@@ -186,14 +186,14 @@ func TestUpdateInstance(t *testing.T) {
},
}
_, err := mgr.CreateInstance("test-instance", options)
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
_, err = mgr.StartInstance("test-instance")
if err != nil {
t.Fatalf("StartInstance failed: %v", err)
// Start the instance (will use 'yes' command from test config)
if err := inst.Start(); err != nil {
t.Fatalf("Failed to start instance: %v", err)
}
// Update running instance with new model
@@ -212,9 +212,9 @@ func TestUpdateInstance(t *testing.T) {
t.Fatalf("UpdateInstance failed: %v", err)
}
// Should still be running after update
// Should be running after update (was running before, should be restarted)
if !updated.IsRunning() {
t.Error("Instance should be running after update")
t.Errorf("Instance should be running after update, got: %v", updated.GetStatus())
}
if updated.GetOptions().BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
@@ -223,7 +223,7 @@ func TestUpdateInstance(t *testing.T) {
}
func TestUpdateInstance_ReleasesOldPort(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{

View File

@@ -15,35 +15,18 @@ import (
type instancePersister struct {
mu sync.Mutex
instancesDir string
enabled bool
}
// newInstancePersister creates a new instance persister.
// If instancesDir is empty, persistence is disabled.
func newInstancePersister(instancesDir string) (*instancePersister, error) {
if instancesDir == "" {
return &instancePersister{
enabled: false,
}, nil
}
// Ensure the instances directory exists
if err := os.MkdirAll(instancesDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create instances directory: %w", err)
}
func newInstancePersister(instancesDir string) *instancePersister {
return &instancePersister{
instancesDir: instancesDir,
enabled: true,
}, nil
}
}
// Save persists an instance to disk with atomic write
func (p *instancePersister) save(inst *instance.Instance) error {
if !p.enabled {
return nil
}
if inst == nil {
return fmt.Errorf("cannot save nil instance")
}
@@ -103,10 +86,6 @@ func (p *instancePersister) save(inst *instance.Instance) error {
// Delete removes an instance's persistence file from disk.
func (p *instancePersister) delete(name string) error {
if !p.enabled {
return nil
}
validatedName, err := p.validateInstanceName(name)
if err != nil {
return err
@@ -131,10 +110,6 @@ func (p *instancePersister) delete(name string) error {
// LoadAll loads all persisted instances from disk.
// Returns a slice of instances and any errors encountered during loading.
func (p *instancePersister) loadAll() ([]*instance.Instance, error) {
if !p.enabled {
return nil, nil
}
p.mu.Lock()
defer p.mu.Unlock()

View File

@@ -24,15 +24,7 @@ type portAllocator struct {
}
// newPortAllocator creates a new port allocator for the given port range.
// Returns an error if the port range is invalid.
func newPortAllocator(minPort, maxPort int) (*portAllocator, error) {
if minPort <= 0 || maxPort <= 0 {
return nil, fmt.Errorf("invalid port range: min=%d, max=%d (must be > 0)", minPort, maxPort)
}
if minPort > maxPort {
return nil, fmt.Errorf("invalid port range: min=%d > max=%d", minPort, maxPort)
}
func newPortAllocator(minPort, maxPort int) *portAllocator {
rangeSize := maxPort - minPort + 1
bitmapSize := (rangeSize + 63) / 64 // Round up to nearest uint64
@@ -42,7 +34,7 @@ func newPortAllocator(minPort, maxPort int) (*portAllocator, error) {
minPort: minPort,
maxPort: maxPort,
rangeSize: rangeSize,
}, nil
}
}
// allocate finds and allocates the first available port for the given instance.

View File

@@ -1,7 +1,7 @@
package server
import (
"fmt"
"log"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
@@ -159,7 +159,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Serve WebUI files
if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err)
log.Printf("Failed to set up WebUI: %v\n", err)
}
return r