mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Implement ensureInstanceRunning helper
This commit is contained in:
@@ -1,12 +1,26 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
"llamactl/pkg/manager"
|
"llamactl/pkg/manager"
|
||||||
"net/http"
|
"net/http"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeError(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
|
||||||
type Handler struct {
|
type Handler struct {
|
||||||
InstanceManager manager.InstanceManager
|
InstanceManager manager.InstanceManager
|
||||||
cfg config.AppConfig
|
cfg config.AppConfig
|
||||||
@@ -22,3 +36,34 @@ func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
|
||||||
|
options := inst.GetOptions()
|
||||||
|
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
|
||||||
|
if !allowOnDemand {
|
||||||
|
return fmt.Errorf("instance is not running and on-demand start is not enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||||
|
if h.cfg.Instances.EnableLRUEviction {
|
||||||
|
err := h.InstanceManager.EvictLRUInstance()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("cannot start instance, failed to evict instance: %w", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("cannot start instance, maximum number of instances reached")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(inst.Name); err != nil {
|
||||||
|
return fmt.Errorf("failed to start instance: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil {
|
||||||
|
return fmt.Errorf("instance failed to become healthy: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,17 +18,6 @@ type ParseCommandRequest struct {
|
|||||||
Command string `json:"command"`
|
Command string `json:"command"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func writeError(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
@@ -60,35 +49,10 @@ func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if !inst.IsRemote() && !inst.IsRunning() {
|
if !inst.IsRemote() && !inst.IsRunning() && onDemandStart {
|
||||||
|
err := h.ensureInstanceRunning(inst)
|
||||||
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
|
||||||
if h.cfg.Instances.EnableLRUEviction {
|
|
||||||
err := h.InstanceManager.EvictLRUInstance()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "Failed to ensure instance is running: "+err.Error(), http.StatusInternalServerError)
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If on-demand start is enabled, start the instance
|
|
||||||
if _, err := h.InstanceManager.StartInstance(validatedName); err != nil {
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for the instance to become healthy before proceeding
|
|
||||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
|
||||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,20 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI format
|
||||||
|
type OpenAIListInstancesResponse struct {
|
||||||
|
Object string `json:"object"`
|
||||||
|
Data []OpenAIInstance `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAIInstance represents a single instance (model) in OpenAI format
|
||||||
|
type OpenAIInstance struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Object string `json:"object"`
|
||||||
|
Created int64 `json:"created"`
|
||||||
|
OwnedBy string `json:"owned_by"`
|
||||||
|
}
|
||||||
|
|
||||||
// OpenAIListInstances godoc
|
// OpenAIListInstances godoc
|
||||||
// @Summary List instances in OpenAI-compatible format
|
// @Summary List instances in OpenAI-compatible format
|
||||||
// @Description Returns a list of instances in a format compatible with OpenAI API
|
// @Description Returns a list of instances in a format compatible with OpenAI API
|
||||||
@@ -97,35 +111,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !inst.IsRemote() && !inst.IsRunning() {
|
if !inst.IsRemote() && !inst.IsRunning() {
|
||||||
options := inst.GetOptions()
|
err := h.ensureInstanceRunning(inst)
|
||||||
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
|
|
||||||
if !allowOnDemand {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
|
||||||
if h.cfg.Instances.EnableLRUEviction {
|
|
||||||
err := h.InstanceManager.EvictLRUInstance()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "Failed to ensure instance is running: "+err.Error(), http.StatusInternalServerError)
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If on-demand start is enabled, start the instance
|
|
||||||
if _, err := h.InstanceManager.StartInstance(validatedName); err != nil {
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for the instance to become healthy before proceeding
|
|
||||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
|
||||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
package server
|
|
||||||
|
|
||||||
type OpenAIListInstancesResponse struct {
|
|
||||||
Object string `json:"object"`
|
|
||||||
Data []OpenAIInstance `json:"data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type OpenAIInstance struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Object string `json:"object"`
|
|
||||||
Created int64 `json:"created"`
|
|
||||||
OwnedBy string `json:"owned_by"`
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user