mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Split large package into subpackages
This commit is contained in:
257
pkg/instance/lifecycle.go
Normal file
257
pkg/instance/lifecycle.go
Normal file
@@ -0,0 +1,257 @@
|
||||
package instance
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Start starts the llama server instance and returns an error if it fails.
|
||||
func (i *Instance) Start() error {
|
||||
i.mu.Lock()
|
||||
defer i.mu.Unlock()
|
||||
|
||||
if i.Running {
|
||||
return fmt.Errorf("instance %s is already running", i.Name)
|
||||
}
|
||||
|
||||
// Safety check: ensure options are valid
|
||||
if i.options == nil {
|
||||
return fmt.Errorf("instance %s has no options set", i.Name)
|
||||
}
|
||||
|
||||
// Reset restart counter when manually starting (not during auto-restart)
|
||||
// We can detect auto-restart by checking if restartCancel is set
|
||||
if i.restartCancel == nil {
|
||||
i.restarts = 0
|
||||
}
|
||||
|
||||
// Create log files
|
||||
if err := i.logger.Create(); err != nil {
|
||||
return fmt.Errorf("failed to create log files: %w", err)
|
||||
}
|
||||
|
||||
args := i.options.BuildCommandArgs()
|
||||
|
||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
setProcAttrs(i.cmd)
|
||||
}
|
||||
|
||||
var err error
|
||||
i.stdout, err = i.cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
i.logger.Close()
|
||||
return fmt.Errorf("failed to get stdout pipe: %w", err)
|
||||
}
|
||||
i.stderr, err = i.cmd.StderrPipe()
|
||||
if err != nil {
|
||||
i.stdout.Close()
|
||||
i.logger.Close()
|
||||
return fmt.Errorf("failed to get stderr pipe: %w", err)
|
||||
}
|
||||
|
||||
if err := i.cmd.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
|
||||
}
|
||||
|
||||
i.Running = true
|
||||
|
||||
// Create channel for monitor completion signaling
|
||||
i.monitorDone = make(chan struct{})
|
||||
|
||||
go i.logger.readOutput(i.stdout)
|
||||
go i.logger.readOutput(i.stderr)
|
||||
|
||||
go i.monitorProcess()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop terminates the subprocess
|
||||
func (i *Instance) Stop() error {
|
||||
i.mu.Lock()
|
||||
|
||||
if !i.Running {
|
||||
// Even if not running, cancel any pending restart
|
||||
if i.restartCancel != nil {
|
||||
i.restartCancel()
|
||||
i.restartCancel = nil
|
||||
log.Printf("Cancelled pending restart for instance %s", i.Name)
|
||||
}
|
||||
i.mu.Unlock()
|
||||
return fmt.Errorf("instance %s is not running", i.Name)
|
||||
}
|
||||
|
||||
// Cancel any pending restart
|
||||
if i.restartCancel != nil {
|
||||
i.restartCancel()
|
||||
i.restartCancel = nil
|
||||
}
|
||||
|
||||
// Set running to false first to signal intentional stop
|
||||
i.Running = false
|
||||
|
||||
// Clean up the proxy
|
||||
i.proxy = nil
|
||||
|
||||
// Get the monitor done channel before releasing the lock
|
||||
monitorDone := i.monitorDone
|
||||
|
||||
i.mu.Unlock()
|
||||
|
||||
// Stop the process with SIGINT
|
||||
if i.cmd.Process != nil {
|
||||
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
|
||||
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-monitorDone:
|
||||
// Process exited normally
|
||||
case <-time.After(30 * time.Second):
|
||||
// Force kill if it doesn't exit within 30 seconds
|
||||
if i.cmd.Process != nil {
|
||||
killErr := i.cmd.Process.Kill()
|
||||
if killErr != nil {
|
||||
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
|
||||
}
|
||||
log.Printf("Instance %s did not stop in time, force killed", i.Name)
|
||||
|
||||
// Wait a bit more for the monitor to finish after force kill
|
||||
select {
|
||||
case <-monitorDone:
|
||||
// Monitor completed after force kill
|
||||
case <-time.After(2 * time.Second):
|
||||
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i.logger.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *Instance) monitorProcess() {
|
||||
defer func() {
|
||||
i.mu.Lock()
|
||||
if i.monitorDone != nil {
|
||||
close(i.monitorDone)
|
||||
i.monitorDone = nil
|
||||
}
|
||||
i.mu.Unlock()
|
||||
}()
|
||||
|
||||
err := i.cmd.Wait()
|
||||
|
||||
i.mu.Lock()
|
||||
|
||||
// Check if the instance was intentionally stopped
|
||||
if !i.Running {
|
||||
i.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
i.Running = false
|
||||
i.logger.Close()
|
||||
|
||||
// Cancel any existing restart context since we're handling a new exit
|
||||
if i.restartCancel != nil {
|
||||
i.restartCancel()
|
||||
i.restartCancel = nil
|
||||
}
|
||||
|
||||
// Log the exit
|
||||
if err != nil {
|
||||
log.Printf("Instance %s crashed with error: %v", i.Name, err)
|
||||
// Handle restart while holding the lock, then release it
|
||||
i.handleRestart()
|
||||
} else {
|
||||
log.Printf("Instance %s exited cleanly", i.Name)
|
||||
i.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// handleRestart manages the restart process while holding the lock
|
||||
func (i *Instance) handleRestart() {
|
||||
// Validate restart conditions and get safe parameters
|
||||
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
|
||||
if !shouldRestart {
|
||||
i.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
i.restarts++
|
||||
log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
|
||||
i.Name, i.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
|
||||
|
||||
// Create a cancellable context for the restart delay
|
||||
restartCtx, cancel := context.WithCancel(context.Background())
|
||||
i.restartCancel = cancel
|
||||
|
||||
// Release the lock before sleeping
|
||||
i.mu.Unlock()
|
||||
|
||||
// Use context-aware sleep so it can be cancelled
|
||||
select {
|
||||
case <-time.After(time.Duration(restartDelay) * time.Second):
|
||||
// Sleep completed normally, continue with restart
|
||||
case <-restartCtx.Done():
|
||||
// Restart was cancelled
|
||||
log.Printf("Restart cancelled for instance %s", i.Name)
|
||||
return
|
||||
}
|
||||
|
||||
// Restart the instance
|
||||
if err := i.Start(); err != nil {
|
||||
log.Printf("Failed to restart instance %s: %v", i.Name, err)
|
||||
} else {
|
||||
log.Printf("Successfully restarted instance %s", i.Name)
|
||||
// Clear the cancel function
|
||||
i.mu.Lock()
|
||||
i.restartCancel = nil
|
||||
i.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// validateRestartConditions checks if the instance should be restarted and returns the parameters
|
||||
func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
|
||||
if i.options == nil {
|
||||
log.Printf("Instance %s not restarting: options are nil", i.Name)
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
if i.options.AutoRestart == nil || !*i.options.AutoRestart {
|
||||
log.Printf("Instance %s not restarting: AutoRestart is disabled", i.Name)
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
if i.options.MaxRestarts == nil {
|
||||
log.Printf("Instance %s not restarting: MaxRestarts is nil", i.Name)
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
if i.options.RestartDelay == nil {
|
||||
log.Printf("Instance %s not restarting: RestartDelay is nil", i.Name)
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
// Values are already validated during unmarshaling/SetOptions
|
||||
maxRestarts = *i.options.MaxRestarts
|
||||
restartDelay = *i.options.RestartDelay
|
||||
|
||||
if i.restarts >= maxRestarts {
|
||||
log.Printf("Instance %s exceeded max restart attempts (%d)", i.Name, maxRestarts)
|
||||
return false, 0, 0
|
||||
}
|
||||
|
||||
return true, maxRestarts, restartDelay
|
||||
}
|
||||
Reference in New Issue
Block a user