Enhance instance management: improve on-demand start handling and add LRU eviction logic

This commit is contained in:
2025-08-30 23:13:08 +02:00
parent 58cb36bd18
commit 4581d67165

View File

@@ -581,7 +581,25 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
}
if !inst.IsRunning() {
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
@@ -593,11 +611,6 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} else {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()