mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
153 lines
3.7 KiB
Go
153 lines
3.7 KiB
Go
package manager
|
|
|
|
import (
|
|
"fmt"
|
|
"llamactl/pkg/instance"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// lifecycleManager handles background timeout checking and LRU eviction.
|
|
// It properly coordinates shutdown to prevent races with the timeout checker.
|
|
type lifecycleManager struct {
|
|
registry *instanceRegistry
|
|
manager InstanceManager // For calling Stop/Evict operations
|
|
|
|
ticker *time.Ticker
|
|
checkInterval time.Duration
|
|
enableLRU bool
|
|
|
|
shutdownChan chan struct{}
|
|
shutdownDone chan struct{}
|
|
shutdownOnce sync.Once
|
|
}
|
|
|
|
// newLifecycleManager creates a new lifecycle manager.
|
|
func newLifecycleManager(
|
|
registry *instanceRegistry,
|
|
manager InstanceManager,
|
|
checkInterval time.Duration,
|
|
enableLRU bool,
|
|
) *lifecycleManager {
|
|
if checkInterval <= 0 {
|
|
checkInterval = 5 * time.Minute // Default to 5 minutes
|
|
}
|
|
|
|
return &lifecycleManager{
|
|
registry: registry,
|
|
manager: manager,
|
|
ticker: time.NewTicker(checkInterval),
|
|
checkInterval: checkInterval,
|
|
enableLRU: enableLRU,
|
|
shutdownChan: make(chan struct{}),
|
|
shutdownDone: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// Start begins the timeout checking loop in a goroutine.
|
|
func (l *lifecycleManager) start() {
|
|
go l.timeoutCheckLoop()
|
|
}
|
|
|
|
// Stop gracefully stops the lifecycle manager.
|
|
// This ensures the timeout checker completes before instance cleanup begins.
|
|
func (l *lifecycleManager) stop() {
|
|
l.shutdownOnce.Do(func() {
|
|
close(l.shutdownChan)
|
|
<-l.shutdownDone // Wait for checker to finish (prevents shutdown race)
|
|
l.ticker.Stop()
|
|
})
|
|
}
|
|
|
|
// timeoutCheckLoop is the main loop that periodically checks for timeouts.
|
|
func (l *lifecycleManager) timeoutCheckLoop() {
|
|
defer close(l.shutdownDone) // Signal completion
|
|
|
|
for {
|
|
select {
|
|
case <-l.ticker.C:
|
|
l.checkTimeouts()
|
|
case <-l.shutdownChan:
|
|
return // Exit goroutine on shutdown
|
|
}
|
|
}
|
|
}
|
|
|
|
// checkTimeouts checks all instances for timeout and stops those that have timed out.
|
|
func (l *lifecycleManager) checkTimeouts() {
|
|
// Get all instances from registry
|
|
instances := l.registry.list()
|
|
|
|
var timeoutInstances []string
|
|
|
|
// Identify instances that should timeout
|
|
for _, inst := range instances {
|
|
// Skip remote instances - they are managed by their respective nodes
|
|
if inst.IsRemote() {
|
|
continue
|
|
}
|
|
|
|
// Only check running instances
|
|
if !l.registry.isRunning(inst.Name) {
|
|
continue
|
|
}
|
|
|
|
if inst.ShouldTimeout() {
|
|
timeoutInstances = append(timeoutInstances, inst.Name)
|
|
}
|
|
}
|
|
|
|
// Stop the timed-out instances
|
|
for _, name := range timeoutInstances {
|
|
log.Printf("Instance %s has timed out, stopping it", name)
|
|
if _, err := l.manager.StopInstance(name); err != nil {
|
|
log.Printf("Error stopping instance %s: %v", name, err)
|
|
} else {
|
|
log.Printf("Instance %s stopped successfully", name)
|
|
}
|
|
}
|
|
}
|
|
|
|
// EvictLRU finds and stops the least recently used running instance.
|
|
// This is called when max running instances limit is reached.
|
|
func (l *lifecycleManager) evictLRU() error {
|
|
if !l.enableLRU {
|
|
return fmt.Errorf("LRU eviction is not enabled")
|
|
}
|
|
|
|
// Get all running instances
|
|
runningInstances := l.registry.listRunning()
|
|
|
|
var lruInstance *instance.Instance
|
|
|
|
for _, inst := range runningInstances {
|
|
// Skip remote instances - they are managed by their respective nodes
|
|
if inst.IsRemote() {
|
|
continue
|
|
}
|
|
|
|
// Skip instances without idle timeout
|
|
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
|
|
continue
|
|
}
|
|
|
|
if lruInstance == nil {
|
|
lruInstance = inst
|
|
}
|
|
|
|
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
|
|
lruInstance = inst
|
|
}
|
|
}
|
|
|
|
if lruInstance == nil {
|
|
return fmt.Errorf("failed to find lru instance")
|
|
}
|
|
|
|
// Evict the LRU instance
|
|
log.Printf("Evicting LRU instance %s", lruInstance.Name)
|
|
_, err := l.manager.StopInstance(lruInstance.Name)
|
|
return err
|
|
}
|