Files
llamactl/pkg/server/handlers.go

70 lines
1.8 KiB
Go

package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"time"
)
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
func writeError(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
type Handler struct {
InstanceManager manager.InstanceManager
cfg config.AppConfig
httpClient *http.Client
}
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
InstanceManager: im,
cfg: cfg,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
return fmt.Errorf("instance is not running and on-demand start is not enabled")
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
return fmt.Errorf("cannot start instance, failed to evict instance: %w", err)
}
} else {
return fmt.Errorf("cannot start instance, maximum number of instances reached")
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(inst.Name); err != nil {
return fmt.Errorf("failed to start instance: %w", err)
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil {
return fmt.Errorf("instance failed to become healthy: %w", err)
}
return nil
}