mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Enhance instance management: improve on-demand start handling and add LRU eviction logic
This commit is contained in:
@@ -581,23 +581,36 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !inst.IsRunning() {
|
if !inst.IsRunning() {
|
||||||
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
|
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
|
||||||
// If on-demand start is enabled, start the instance
|
if !allowOnDemand {
|
||||||
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for the instance to become healthy before proceeding
|
|
||||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
|
||||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||||
|
if h.cfg.Instances.EnableLRUEviction {
|
||||||
|
err := h.InstanceManager.EvictLRUInstance()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
proxy, err := inst.GetProxy()
|
proxy, err := inst.GetProxy()
|
||||||
|
|||||||
Reference in New Issue
Block a user