Implement WaitForHealthy method and enhance OpenAIProxy to support on-demand instance start

This commit is contained in:
2025-08-20 14:19:12 +02:00
parent 7b4adfa0cd
commit 287a5e0817
2 changed files with 97 additions and 2 deletions

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"log" "log"
"net/http"
"os/exec" "os/exec"
"runtime" "runtime"
"syscall" "syscall"
@@ -143,6 +144,85 @@ func (i *Process) Stop() error {
return nil return nil
} }
func (i *Process) WaitForHealthy(timeout int) error {
if !i.Running {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get the reverse proxy for this instance
proxy, err := i.GetProxy()
if err != nil {
return fmt.Errorf("failed to get proxy for instance %s: %w", i.Name, err)
}
// Polling interval
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
// Helper function to check health using the proxy
checkHealth := func() bool {
// Create a request to /health
req, err := http.NewRequestWithContext(ctx, "GET", "/health", nil)
if err != nil {
return false
}
// Create a custom ResponseRecorder to capture the proxy response
recorder := &healthResponseRecorder{
statusCode: 0,
headers: make(http.Header),
}
// Use the proxy to forward the request
proxy.ServeHTTP(recorder, req)
return recorder.statusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
// healthResponseRecorder implements http.ResponseWriter to capture proxy responses
type healthResponseRecorder struct {
statusCode int
headers http.Header
}
func (r *healthResponseRecorder) Header() http.Header {
return r.headers
}
func (r *healthResponseRecorder) Write([]byte) (int, error) {
// We don't need to capture the body for health checks
return 0, nil
}
func (r *healthResponseRecorder) WriteHeader(statusCode int) {
r.statusCode = statusCode
}
func (i *Process) monitorProcess() { func (i *Process) monitorProcess() {
defer func() { defer func() {
i.mu.Lock() i.mu.Lock()

View File

@@ -575,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
} }
if !inst.Running { if !inst.Running {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
return // If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(120); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} else {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
} }
proxy, err := inst.GetProxy() proxy, err := inst.GetProxy()