diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go index 390ecb0..a60c6ce 100644 --- a/pkg/server/handlers_backends.go +++ b/pkg/server/handlers_backends.go @@ -66,17 +66,16 @@ func (h *Handler) LlamaCppUIProxy() http.HandlerFunc { return } - proxy, err := inst.GetProxy() - if err != nil { - writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error()) - return - } - if !inst.IsRemote() { h.stripLlamaCppPrefix(r, inst.Name) } - proxy.ServeHTTP(w, r) + // Use instance's ServeHTTP which tracks inflight requests and handles shutting down state + err = inst.ServeHTTP(w, r) + if err != nil { + // Error is already handled in ServeHTTP (response written) + return + } } } @@ -118,17 +117,16 @@ func (h *Handler) LlamaCppProxy() http.HandlerFunc { } } - proxy, err := inst.GetProxy() - if err != nil { - writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error()) - return - } - if !inst.IsRemote() { h.stripLlamaCppPrefix(r, inst.Name) } - proxy.ServeHTTP(w, r) + // Use instance's ServeHTTP which tracks inflight requests and handles shutting down state + err = inst.ServeHTTP(w, r) + if err != nil { + // Error is already handled in ServeHTTP (response written) + return + } } } diff --git a/pkg/server/handlers_instances.go b/pkg/server/handlers_instances.go index 0480f22..43bed3e 100644 --- a/pkg/server/handlers_instances.go +++ b/pkg/server/handlers_instances.go @@ -332,12 +332,6 @@ func (h *Handler) InstanceProxy() http.HandlerFunc { return } - proxy, err := inst.GetProxy() - if err != nil { - writeError(w, http.StatusInternalServerError, "proxy_failed", "Failed to get proxy: "+err.Error()) - return - } - if !inst.IsRemote() { // Strip the "/api/v1/instances//proxy" prefix from the request URL prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", inst.Name) @@ -348,6 +342,11 @@ func (h *Handler) InstanceProxy() http.HandlerFunc { r.Header.Set("X-Forwarded-Host", r.Header.Get("Host")) r.Header.Set("X-Forwarded-Proto", "http") - proxy.ServeHTTP(w, r) + // Use instance's ServeHTTP which tracks inflight requests and handles shutting down state + err = inst.ServeHTTP(w, r) + if err != nil { + // Error is already handled in ServeHTTP (response written) + return + } } } diff --git a/pkg/server/handlers_openai.go b/pkg/server/handlers_openai.go index d221200..0937e6a 100644 --- a/pkg/server/handlers_openai.go +++ b/pkg/server/handlers_openai.go @@ -114,16 +114,15 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { } } - proxy, err := inst.GetProxy() - if err != nil { - writeError(w, http.StatusInternalServerError, "proxy_failed", err.Error()) - return - } - // Recreate the request body from the bytes we read r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) r.ContentLength = int64(len(bodyBytes)) - proxy.ServeHTTP(w, r) + // Use instance's ServeHTTP which tracks inflight requests and handles shutting down state + err = inst.ServeHTTP(w, r) + if err != nil { + // Error is already handled in ServeHTTP (response written) + return + } } }