Split large package into subpackages

2025-12-23 17:44:24 +00:00 · 2025-08-04 19:23:56 +02:00
parent a3c44dad1e
commit 6a7a9a2d09
21 changed files with 413 additions and 396 deletions
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -0,0 +1,276 @@
+package instance
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/config"
+	"log"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"os/exec"
+	"sync"
+	"time"
+)
+
+type CreateInstanceOptions struct {
+	// Auto restart
+	AutoRestart *bool `json:"auto_restart,omitempty"`
+	MaxRestarts *int  `json:"max_restarts,omitempty"`
+	// RestartDelay duration in seconds
+	RestartDelay *int `json:"restart_delay_seconds,omitempty"`
+
+	llamacpp.LlamaServerOptions `json:",inline"`
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
+// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON
+// which can interfere with proper unmarshaling of the pointer fields
+func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
+	// First, unmarshal into a temporary struct without the embedded type
+	type tempCreateOptions struct {
+		AutoRestart  *bool `json:"auto_restart,omitempty"`
+		MaxRestarts  *int  `json:"max_restarts,omitempty"`
+		RestartDelay *int  `json:"restart_delay_seconds,omitempty"`
+	}
+
+	var temp tempCreateOptions
+	if err := json.Unmarshal(data, &temp); err != nil {
+		return err
+	}
+
+	// Copy the pointer fields
+	c.AutoRestart = temp.AutoRestart
+	c.MaxRestarts = temp.MaxRestarts
+	c.RestartDelay = temp.RestartDelay
+
+	// Now unmarshal the embedded LlamaServerOptions
+	if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Instance represents a running instance of the llama server
+type Instance struct {
+	Name           string                 `json:"name"`
+	options        *CreateInstanceOptions `json:"-"`
+	globalSettings *config.InstancesConfig
+
+	// Status
+	Running bool `json:"running"`
+
+	// Creation time
+	Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
+
+	// Logging file
+	logger *InstanceLogger `json:"-"`
+
+	// internal
+	cmd      *exec.Cmd              `json:"-"` // Command to run the instance
+	ctx      context.Context        `json:"-"` // Context for managing the instance lifecycle
+	cancel   context.CancelFunc     `json:"-"` // Function to cancel the context
+	stdout   io.ReadCloser          `json:"-"` // Standard output stream
+	stderr   io.ReadCloser          `json:"-"` // Standard error stream
+	mu       sync.RWMutex           `json:"-"` // RWMutex for better read/write separation
+	restarts int                    `json:"-"` // Number of restarts
+	proxy    *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
+
+	// Restart control
+	restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
+	monitorDone   chan struct{}      `json:"-"` // Channel to signal monitor goroutine completion
+}
+
+// validateAndCopyOptions validates and creates a deep copy of the provided options
+// It applies validation rules and returns a safe copy
+func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions {
+	optionsCopy := &CreateInstanceOptions{}
+
+	if options != nil {
+		// Copy the embedded LlamaServerOptions
+		optionsCopy.LlamaServerOptions = options.LlamaServerOptions
+
+		// Copy and validate pointer fields
+		if options.AutoRestart != nil {
+			autoRestart := *options.AutoRestart
+			optionsCopy.AutoRestart = &autoRestart
+		}
+
+		if options.MaxRestarts != nil {
+			maxRestarts := *options.MaxRestarts
+			if maxRestarts < 0 {
+				log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
+				maxRestarts = 0
+			}
+			optionsCopy.MaxRestarts = &maxRestarts
+		}
+
+		if options.RestartDelay != nil {
+			restartDelay := *options.RestartDelay
+			if restartDelay < 0 {
+				log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
+				restartDelay = 0
+			}
+			optionsCopy.RestartDelay = &restartDelay
+		}
+	}
+
+	return optionsCopy
+}
+
+// applyDefaultOptions applies default values from global settings to any nil options
+func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.InstancesConfig) {
+	if globalSettings == nil {
+		return
+	}
+
+	if options.AutoRestart == nil {
+		defaultAutoRestart := globalSettings.DefaultAutoRestart
+		options.AutoRestart = &defaultAutoRestart
+	}
+
+	if options.MaxRestarts == nil {
+		defaultMaxRestarts := globalSettings.DefaultMaxRestarts
+		options.MaxRestarts = &defaultMaxRestarts
+	}
+
+	if options.RestartDelay == nil {
+		defaultRestartDelay := globalSettings.DefaultRestartDelay
+		options.RestartDelay = &defaultRestartDelay
+	}
+}
+
+// NewInstance creates a new instance with the given name, log path, and options
+func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions) *Instance {
+	// Validate and copy options
+	optionsCopy := validateAndCopyOptions(name, options)
+	// Apply defaults
+	applyDefaultOptions(optionsCopy, globalSettings)
+	// Create the instance logger
+	logger := NewInstanceLogger(name, globalSettings.LogsDir)
+
+	return &Instance{
+		Name:           name,
+		options:        optionsCopy,
+		globalSettings: globalSettings,
+		logger:         logger,
+
+		Running: false,
+
+		Created: time.Now().Unix(),
+	}
+}
+
+func (i *Instance) GetOptions() *CreateInstanceOptions {
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+	return i.options
+}
+
+func (i *Instance) SetOptions(options *CreateInstanceOptions) {
+	i.mu.Lock()
+	defer i.mu.Unlock()
+
+	if options == nil {
+		log.Println("Warning: Attempted to set nil options on instance", i.Name)
+		return
+	}
+
+	// Validate and copy options and apply defaults
+	optionsCopy := validateAndCopyOptions(i.Name, options)
+	applyDefaultOptions(optionsCopy, i.globalSettings)
+
+	i.options = optionsCopy
+	// Clear the proxy so it gets recreated with new options
+	i.proxy = nil
+}
+
+// GetProxy returns the reverse proxy for this instance, creating it if needed
+func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
+	i.mu.Lock()
+	defer i.mu.Unlock()
+
+	if i.proxy != nil {
+		return i.proxy, nil
+	}
+
+	if i.options == nil {
+		return nil, fmt.Errorf("instance %s has no options set", i.Name)
+	}
+
+	targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
+	}
+
+	proxy := httputil.NewSingleHostReverseProxy(targetURL)
+
+	proxy.ModifyResponse = func(resp *http.Response) error {
+		// Remove CORS headers from llama-server response to avoid conflicts
+		// llamactl will add its own CORS headers
+		resp.Header.Del("Access-Control-Allow-Origin")
+		resp.Header.Del("Access-Control-Allow-Methods")
+		resp.Header.Del("Access-Control-Allow-Headers")
+		resp.Header.Del("Access-Control-Allow-Credentials")
+		resp.Header.Del("Access-Control-Max-Age")
+		resp.Header.Del("Access-Control-Expose-Headers")
+		return nil
+	}
+
+	i.proxy = proxy
+
+	return i.proxy, nil
+}
+
+// MarshalJSON implements json.Marshaler for Instance
+func (i *Instance) MarshalJSON() ([]byte, error) {
+	// Use read lock since we're only reading data
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+
+	// Create a temporary struct with exported fields for JSON marshalling
+	temp := struct {
+		Name    string                 `json:"name"`
+		Options *CreateInstanceOptions `json:"options,omitempty"`
+		Running bool                   `json:"running"`
+		Created int64                  `json:"created,omitempty"`
+	}{
+		Name:    i.Name,
+		Options: i.options,
+		Running: i.Running,
+		Created: i.Created,
+	}
+
+	return json.Marshal(temp)
+}
+
+// UnmarshalJSON implements json.Unmarshaler for Instance
+func (i *Instance) UnmarshalJSON(data []byte) error {
+	// Create a temporary struct for unmarshalling
+	temp := struct {
+		Name    string                 `json:"name"`
+		Options *CreateInstanceOptions `json:"options,omitempty"`
+		Running bool                   `json:"running"`
+		Created int64                  `json:"created,omitempty"`
+	}{}
+
+	if err := json.Unmarshal(data, &temp); err != nil {
+		return err
+	}
+
+	// Set the fields
+	i.Name = temp.Name
+	i.Running = temp.Running
+	i.Created = temp.Created
+
+	// Handle options with validation but no defaults
+	if temp.Options != nil {
+		i.options = validateAndCopyOptions(i.Name, temp.Options)
+	}
+
+	return nil
+}
--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -0,0 +1,444 @@
+package instance_test
+
+import (
+	"encoding/json"
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/config"
+	"llamactl/pkg/instance"
+	"llamactl/pkg/testutil"
+	"testing"
+)
+
+func TestNewInstance(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir:             "/tmp/test",
+		DefaultAutoRestart:  true,
+		DefaultMaxRestarts:  3,
+		DefaultRestartDelay: 5,
+	}
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+			Port:  8080,
+		},
+	}
+
+	instance := instance.NewInstance("test-instance", globalSettings, options)
+
+	if instance.Name != "test-instance" {
+		t.Errorf("Expected name 'test-instance', got %q", instance.Name)
+	}
+	if instance.Running {
+		t.Error("New instance should not be running")
+	}
+
+	// Check that options were properly set with defaults applied
+	opts := instance.GetOptions()
+	if opts.Model != "/path/to/model.gguf" {
+		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
+	}
+	if opts.Port != 8080 {
+		t.Errorf("Expected port 8080, got %d", opts.Port)
+	}
+
+	// Check that defaults were applied
+	if opts.AutoRestart == nil || !*opts.AutoRestart {
+		t.Error("Expected AutoRestart to be true (default)")
+	}
+	if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
+		t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
+	}
+	if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
+		t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
+	}
+}
+
+func TestNewInstance_WithRestartOptions(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir:             "/tmp/test",
+		DefaultAutoRestart:  true,
+		DefaultMaxRestarts:  3,
+		DefaultRestartDelay: 5,
+	}
+
+	// Override some defaults
+	autoRestart := false
+	maxRestarts := 10
+	restartDelay := 15
+
+	options := &instance.CreateInstanceOptions{
+		AutoRestart:  &autoRestart,
+		MaxRestarts:  &maxRestarts,
+		RestartDelay: &restartDelay,
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	instance := instance.NewInstance("test-instance", globalSettings, options)
+	opts := instance.GetOptions()
+
+	// Check that explicit values override defaults
+	if opts.AutoRestart == nil || *opts.AutoRestart {
+		t.Error("Expected AutoRestart to be false (overridden)")
+	}
+	if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
+		t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
+	}
+	if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
+		t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
+	}
+}
+
+func TestNewInstance_ValidationAndDefaults(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir:             "/tmp/test",
+		DefaultAutoRestart:  true,
+		DefaultMaxRestarts:  3,
+		DefaultRestartDelay: 5,
+	}
+
+	// Test with invalid negative values
+	invalidMaxRestarts := -5
+	invalidRestartDelay := -10
+
+	options := &instance.CreateInstanceOptions{
+		MaxRestarts:  &invalidMaxRestarts,
+		RestartDelay: &invalidRestartDelay,
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	instance := instance.NewInstance("test-instance", globalSettings, options)
+	opts := instance.GetOptions()
+
+	// Check that negative values were corrected to 0
+	if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
+		t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
+	}
+	if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
+		t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
+	}
+}
+
+func TestSetOptions(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir:             "/tmp/test",
+		DefaultAutoRestart:  true,
+		DefaultMaxRestarts:  3,
+		DefaultRestartDelay: 5,
+	}
+
+	initialOptions := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+			Port:  8080,
+		},
+	}
+
+	inst := instance.NewInstance("test-instance", globalSettings, initialOptions)
+
+	// Update options
+	newOptions := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/new-model.gguf",
+			Port:  8081,
+		},
+	}
+
+	inst.SetOptions(newOptions)
+	opts := inst.GetOptions()
+
+	if opts.Model != "/path/to/new-model.gguf" {
+		t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model)
+	}
+	if opts.Port != 8081 {
+		t.Errorf("Expected updated port 8081, got %d", opts.Port)
+	}
+
+	// Check that defaults are still applied
+	if opts.AutoRestart == nil || !*opts.AutoRestart {
+		t.Error("Expected AutoRestart to be true (default)")
+	}
+}
+
+func TestSetOptions_NilOptions(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir:             "/tmp/test",
+		DefaultAutoRestart:  true,
+		DefaultMaxRestarts:  3,
+		DefaultRestartDelay: 5,
+	}
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+		},
+	}
+
+	instance := instance.NewInstance("test-instance", globalSettings, options)
+	originalOptions := instance.GetOptions()
+
+	// Try to set nil options
+	instance.SetOptions(nil)
+
+	// Options should remain unchanged
+	currentOptions := instance.GetOptions()
+	if currentOptions.Model != originalOptions.Model {
+		t.Error("Options should not change when setting nil options")
+	}
+}
+
+func TestGetProxy(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Host: "localhost",
+			Port: 8080,
+		},
+	}
+
+	inst := instance.NewInstance("test-instance", globalSettings, options)
+
+	// Get proxy for the first time
+	proxy1, err := inst.GetProxy()
+	if err != nil {
+		t.Fatalf("GetProxy failed: %v", err)
+	}
+	if proxy1 == nil {
+		t.Error("Expected proxy to be created")
+	}
+
+	// Get proxy again - should return cached version
+	proxy2, err := inst.GetProxy()
+	if err != nil {
+		t.Fatalf("GetProxy failed: %v", err)
+	}
+	if proxy1 != proxy2 {
+		t.Error("Expected cached proxy to be returned")
+	}
+}
+
+func TestMarshalJSON(t *testing.T) {
+	globalSettings := &config.InstancesConfig{
+		LogsDir:             "/tmp/test",
+		DefaultAutoRestart:  true,
+		DefaultMaxRestarts:  3,
+		DefaultRestartDelay: 5,
+	}
+
+	options := &instance.CreateInstanceOptions{
+		LlamaServerOptions: llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+			Port:  8080,
+		},
+	}
+
+	instance := instance.NewInstance("test-instance", globalSettings, options)
+
+	data, err := json.Marshal(instance)
+	if err != nil {
+		t.Fatalf("JSON marshal failed: %v", err)
+	}
+
+	// Check that JSON contains expected fields
+	var result map[string]interface{}
+	err = json.Unmarshal(data, &result)
+	if err != nil {
+		t.Fatalf("JSON unmarshal failed: %v", err)
+	}
+
+	if result["name"] != "test-instance" {
+		t.Errorf("Expected name 'test-instance', got %v", result["name"])
+	}
+	if result["running"] != false {
+		t.Errorf("Expected running false, got %v", result["running"])
+	}
+
+	// Check that options are included
+	options_data, ok := result["options"]
+	if !ok {
+		t.Error("Expected options to be included in JSON")
+	}
+	options_map, ok := options_data.(map[string]interface{})
+	if !ok {
+		t.Error("Expected options to be a map")
+	}
+	if options_map["model"] != "/path/to/model.gguf" {
+		t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"])
+	}
+}
+
+func TestUnmarshalJSON(t *testing.T) {
+	jsonData := `{
+		"name": "test-instance",
+		"running": true,
+		"options": {
+			"model": "/path/to/model.gguf",
+			"port": 8080,
+			"auto_restart": false,
+			"max_restarts": 5
+		}
+	}`
+
+	var inst instance.Instance
+	err := json.Unmarshal([]byte(jsonData), &inst)
+	if err != nil {
+		t.Fatalf("JSON unmarshal failed: %v", err)
+	}
+
+	if inst.Name != "test-instance" {
+		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
+	}
+	if !inst.Running {
+		t.Error("Expected running to be true")
+	}
+
+	opts := inst.GetOptions()
+	if opts == nil {
+		t.Fatal("Expected options to be set")
+	}
+	if opts.Model != "/path/to/model.gguf" {
+		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
+	}
+	if opts.Port != 8080 {
+		t.Errorf("Expected port 8080, got %d", opts.Port)
+	}
+	if opts.AutoRestart == nil || *opts.AutoRestart {
+		t.Error("Expected AutoRestart to be false")
+	}
+	if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
+		t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
+	}
+}
+
+func TestUnmarshalJSON_PartialOptions(t *testing.T) {
+	jsonData := `{
+		"name": "test-instance",
+		"running": false,
+		"options": {
+			"model": "/path/to/model.gguf"
+		}
+	}`
+
+	var inst instance.Instance
+	err := json.Unmarshal([]byte(jsonData), &inst)
+	if err != nil {
+		t.Fatalf("JSON unmarshal failed: %v", err)
+	}
+
+	opts := inst.GetOptions()
+	if opts.Model != "/path/to/model.gguf" {
+		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
+	}
+
+	// Note: Defaults are NOT applied during unmarshaling
+	// They should only be applied by NewInstance or SetOptions
+	if opts.AutoRestart != nil {
+		t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
+	}
+}
+
+func TestUnmarshalJSON_NoOptions(t *testing.T) {
+	jsonData := `{
+		"name": "test-instance",
+		"running": false
+	}`
+
+	var inst instance.Instance
+	err := json.Unmarshal([]byte(jsonData), &inst)
+	if err != nil {
+		t.Fatalf("JSON unmarshal failed: %v", err)
+	}
+
+	if inst.Name != "test-instance" {
+		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
+	}
+	if inst.Running {
+		t.Error("Expected running to be false")
+	}
+
+	opts := inst.GetOptions()
+	if opts != nil {
+		t.Error("Expected options to be nil when not provided in JSON")
+	}
+}
+
+func TestCreateInstanceOptionsValidation(t *testing.T) {
+	tests := []struct {
+		name          string
+		maxRestarts   *int
+		restartDelay  *int
+		expectedMax   int
+		expectedDelay int
+	}{
+		{
+			name:          "nil values",
+			maxRestarts:   nil,
+			restartDelay:  nil,
+			expectedMax:   0, // Should remain nil, but we can't easily test nil in this structure
+			expectedDelay: 0,
+		},
+		{
+			name:          "valid positive values",
+			maxRestarts:   testutil.IntPtr(10),
+			restartDelay:  testutil.IntPtr(30),
+			expectedMax:   10,
+			expectedDelay: 30,
+		},
+		{
+			name:          "zero values",
+			maxRestarts:   testutil.IntPtr(0),
+			restartDelay:  testutil.IntPtr(0),
+			expectedMax:   0,
+			expectedDelay: 0,
+		},
+		{
+			name:          "negative values should be corrected",
+			maxRestarts:   testutil.IntPtr(-5),
+			restartDelay:  testutil.IntPtr(-10),
+			expectedMax:   0,
+			expectedDelay: 0,
+		},
+	}
+
+	globalSettings := &config.InstancesConfig{
+		LogsDir: "/tmp/test",
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			options := &instance.CreateInstanceOptions{
+				MaxRestarts:  tt.maxRestarts,
+				RestartDelay: tt.restartDelay,
+				LlamaServerOptions: llamacpp.LlamaServerOptions{
+					Model: "/path/to/model.gguf",
+				},
+			}
+
+			instance := instance.NewInstance("test", globalSettings, options)
+			opts := instance.GetOptions()
+
+			if tt.maxRestarts != nil {
+				if opts.MaxRestarts == nil {
+					t.Error("Expected MaxRestarts to be set")
+				} else if *opts.MaxRestarts != tt.expectedMax {
+					t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
+				}
+			}
+
+			if tt.restartDelay != nil {
+				if opts.RestartDelay == nil {
+					t.Error("Expected RestartDelay to be set")
+				} else if *opts.RestartDelay != tt.expectedDelay {
+					t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
+				}
+			}
+		})
+	}
+}
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -0,0 +1,257 @@
+package instance
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"os/exec"
+	"runtime"
+	"syscall"
+	"time"
+)
+
+// Start starts the llama server instance and returns an error if it fails.
+func (i *Instance) Start() error {
+	i.mu.Lock()
+	defer i.mu.Unlock()
+
+	if i.Running {
+		return fmt.Errorf("instance %s is already running", i.Name)
+	}
+
+	// Safety check: ensure options are valid
+	if i.options == nil {
+		return fmt.Errorf("instance %s has no options set", i.Name)
+	}
+
+	// Reset restart counter when manually starting (not during auto-restart)
+	// We can detect auto-restart by checking if restartCancel is set
+	if i.restartCancel == nil {
+		i.restarts = 0
+	}
+
+	// Create log files
+	if err := i.logger.Create(); err != nil {
+		return fmt.Errorf("failed to create log files: %w", err)
+	}
+
+	args := i.options.BuildCommandArgs()
+
+	i.ctx, i.cancel = context.WithCancel(context.Background())
+	i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
+
+	if runtime.GOOS != "windows" {
+		setProcAttrs(i.cmd)
+	}
+
+	var err error
+	i.stdout, err = i.cmd.StdoutPipe()
+	if err != nil {
+		i.logger.Close()
+		return fmt.Errorf("failed to get stdout pipe: %w", err)
+	}
+	i.stderr, err = i.cmd.StderrPipe()
+	if err != nil {
+		i.stdout.Close()
+		i.logger.Close()
+		return fmt.Errorf("failed to get stderr pipe: %w", err)
+	}
+
+	if err := i.cmd.Start(); err != nil {
+		return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
+	}
+
+	i.Running = true
+
+	// Create channel for monitor completion signaling
+	i.monitorDone = make(chan struct{})
+
+	go i.logger.readOutput(i.stdout)
+	go i.logger.readOutput(i.stderr)
+
+	go i.monitorProcess()
+
+	return nil
+}
+
+// Stop terminates the subprocess
+func (i *Instance) Stop() error {
+	i.mu.Lock()
+
+	if !i.Running {
+		// Even if not running, cancel any pending restart
+		if i.restartCancel != nil {
+			i.restartCancel()
+			i.restartCancel = nil
+			log.Printf("Cancelled pending restart for instance %s", i.Name)
+		}
+		i.mu.Unlock()
+		return fmt.Errorf("instance %s is not running", i.Name)
+	}
+
+	// Cancel any pending restart
+	if i.restartCancel != nil {
+		i.restartCancel()
+		i.restartCancel = nil
+	}
+
+	// Set running to false first to signal intentional stop
+	i.Running = false
+
+	// Clean up the proxy
+	i.proxy = nil
+
+	// Get the monitor done channel before releasing the lock
+	monitorDone := i.monitorDone
+
+	i.mu.Unlock()
+
+	// Stop the process with SIGINT
+	if i.cmd.Process != nil {
+		if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
+			log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
+		}
+	}
+
+	select {
+	case <-monitorDone:
+		// Process exited normally
+	case <-time.After(30 * time.Second):
+		// Force kill if it doesn't exit within 30 seconds
+		if i.cmd.Process != nil {
+			killErr := i.cmd.Process.Kill()
+			if killErr != nil {
+				log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
+			}
+			log.Printf("Instance %s did not stop in time, force killed", i.Name)
+
+			// Wait a bit more for the monitor to finish after force kill
+			select {
+			case <-monitorDone:
+				// Monitor completed after force kill
+			case <-time.After(2 * time.Second):
+				log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
+			}
+		}
+	}
+
+	i.logger.Close()
+
+	return nil
+}
+
+func (i *Instance) monitorProcess() {
+	defer func() {
+		i.mu.Lock()
+		if i.monitorDone != nil {
+			close(i.monitorDone)
+			i.monitorDone = nil
+		}
+		i.mu.Unlock()
+	}()
+
+	err := i.cmd.Wait()
+
+	i.mu.Lock()
+
+	// Check if the instance was intentionally stopped
+	if !i.Running {
+		i.mu.Unlock()
+		return
+	}
+
+	i.Running = false
+	i.logger.Close()
+
+	// Cancel any existing restart context since we're handling a new exit
+	if i.restartCancel != nil {
+		i.restartCancel()
+		i.restartCancel = nil
+	}
+
+	// Log the exit
+	if err != nil {
+		log.Printf("Instance %s crashed with error: %v", i.Name, err)
+		// Handle restart while holding the lock, then release it
+		i.handleRestart()
+	} else {
+		log.Printf("Instance %s exited cleanly", i.Name)
+		i.mu.Unlock()
+	}
+}
+
+// handleRestart manages the restart process while holding the lock
+func (i *Instance) handleRestart() {
+	// Validate restart conditions and get safe parameters
+	shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
+	if !shouldRestart {
+		i.mu.Unlock()
+		return
+	}
+
+	i.restarts++
+	log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
+		i.Name, i.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
+
+	// Create a cancellable context for the restart delay
+	restartCtx, cancel := context.WithCancel(context.Background())
+	i.restartCancel = cancel
+
+	// Release the lock before sleeping
+	i.mu.Unlock()
+
+	// Use context-aware sleep so it can be cancelled
+	select {
+	case <-time.After(time.Duration(restartDelay) * time.Second):
+		// Sleep completed normally, continue with restart
+	case <-restartCtx.Done():
+		// Restart was cancelled
+		log.Printf("Restart cancelled for instance %s", i.Name)
+		return
+	}
+
+	// Restart the instance
+	if err := i.Start(); err != nil {
+		log.Printf("Failed to restart instance %s: %v", i.Name, err)
+	} else {
+		log.Printf("Successfully restarted instance %s", i.Name)
+		// Clear the cancel function
+		i.mu.Lock()
+		i.restartCancel = nil
+		i.mu.Unlock()
+	}
+}
+
+// validateRestartConditions checks if the instance should be restarted and returns the parameters
+func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
+	if i.options == nil {
+		log.Printf("Instance %s not restarting: options are nil", i.Name)
+		return false, 0, 0
+	}
+
+	if i.options.AutoRestart == nil || !*i.options.AutoRestart {
+		log.Printf("Instance %s not restarting: AutoRestart is disabled", i.Name)
+		return false, 0, 0
+	}
+
+	if i.options.MaxRestarts == nil {
+		log.Printf("Instance %s not restarting: MaxRestarts is nil", i.Name)
+		return false, 0, 0
+	}
+
+	if i.options.RestartDelay == nil {
+		log.Printf("Instance %s not restarting: RestartDelay is nil", i.Name)
+		return false, 0, 0
+	}
+
+	// Values are already validated during unmarshaling/SetOptions
+	maxRestarts = *i.options.MaxRestarts
+	restartDelay = *i.options.RestartDelay
+
+	if i.restarts >= maxRestarts {
+		log.Printf("Instance %s exceeded max restart attempts (%d)", i.Name, maxRestarts)
+		return false, 0, 0
+	}
+
+	return true, maxRestarts, restartDelay
+}
--- a/pkg/instance/logging.go
+++ b/pkg/instance/logging.go
@@ -0,0 +1,118 @@
+package instance
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"time"
+)
+
+type InstanceLogger struct {
+	name        string
+	logDir      string
+	logFile     *os.File
+	logFilePath string
+}
+
+func NewInstanceLogger(name string, logDir string) *InstanceLogger {
+	return &InstanceLogger{
+		name:   name,
+		logDir: logDir,
+	}
+}
+
+// Create creates and opens the log files for stdout and stderr
+func (i *InstanceLogger) Create() error {
+	if i.logDir == "" {
+		return fmt.Errorf("logDir is empty for instance %s", i.name)
+	}
+
+	// Set up instance logs
+	logPath := i.logDir + "/" + i.name + ".log"
+
+	i.logFilePath = logPath
+	if err := os.MkdirAll(i.logDir, 0755); err != nil {
+		return fmt.Errorf("failed to create log directory: %w", err)
+	}
+
+	logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		return fmt.Errorf("failed to create stdout log file: %w", err)
+	}
+
+	i.logFile = logFile
+
+	// Write a startup marker to both files
+	timestamp := time.Now().Format("2006-01-02 15:04:05")
+	fmt.Fprintf(i.logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
+
+	return nil
+}
+
+// GetLogs retrieves the last n lines of logs from the instance
+func (i *Instance) GetLogs(num_lines int) (string, error) {
+	i.mu.RLock()
+	logFileName := i.logger.logFilePath
+	i.mu.RUnlock()
+
+	if logFileName == "" {
+		return "", fmt.Errorf("log file not created for instance %s", i.Name)
+	}
+
+	file, err := os.Open(logFileName)
+	if err != nil {
+		return "", fmt.Errorf("failed to open log file: %w", err)
+	}
+	defer file.Close()
+
+	if num_lines <= 0 {
+		content, err := io.ReadAll(file)
+		if err != nil {
+			return "", fmt.Errorf("failed to read log file: %w", err)
+		}
+		return string(content), nil
+	}
+
+	var lines []string
+	scanner := bufio.NewScanner(file)
+
+	// Read all lines into a slice
+	for scanner.Scan() {
+		lines = append(lines, scanner.Text())
+	}
+
+	if err := scanner.Err(); err != nil {
+		return "", fmt.Errorf("error reading file: %w", err)
+	}
+
+	// Return the last N lines
+	start := max(len(lines)-num_lines, 0)
+
+	return strings.Join(lines[start:], "\n"), nil
+}
+
+// closeLogFile closes the log files
+func (i *InstanceLogger) Close() {
+	if i.logFile != nil {
+		timestamp := time.Now().Format("2006-01-02 15:04:05")
+		fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
+		i.logFile.Close()
+		i.logFile = nil
+	}
+}
+
+// readOutput reads from the given reader and writes lines to the log file
+func (i *InstanceLogger) readOutput(reader io.ReadCloser) {
+	defer reader.Close()
+
+	scanner := bufio.NewScanner(reader)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if i.logFile != nil {
+			fmt.Fprintln(i.logFile, line)
+			i.logFile.Sync() // Ensure data is written to disk
+		}
+	}
+}
--- a/pkg/instance/process_group_unix.go
+++ b/pkg/instance/process_group_unix.go
@@ -0,0 +1,15 @@
+//go:build !windows
+
+package instance
+
+import (
+	"os/exec"
+	"syscall"
+)
+
+func setProcAttrs(cmd *exec.Cmd) {
+	if cmd.SysProcAttr == nil {
+		cmd.SysProcAttr = &syscall.SysProcAttr{}
+	}
+	cmd.SysProcAttr.Setpgid = true
+}
--- a/pkg/instance/process_group_windows.go
+++ b/pkg/instance/process_group_windows.go
@@ -0,0 +1,9 @@
+//go:build windows
+
+package instance
+
+import "os/exec"
+
+func setProcAttrs(cmd *exec.Cmd) {
+	// No-op on Windows
+}