From 37107f76d513502e1a902626d557cb7caa3ce652 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sat, 19 Jul 2025 13:39:36 +0200 Subject: [PATCH] Set up instance proxy --- server/pkg/handlers.go | 55 ++++++++++++++++++++++++++++++++++++++++++ server/pkg/instance.go | 43 +++++++++++++++++++++++++++------ server/pkg/routes.go | 27 +++------------------ 3 files changed, 94 insertions(+), 31 deletions(-) diff --git a/server/pkg/handlers.go b/server/pkg/handlers.go index 269be60..c03c467 100644 --- a/server/pkg/handlers.go +++ b/server/pkg/handlers.go @@ -2,8 +2,10 @@ package llamactl import ( "encoding/json" + "fmt" "net/http" "os/exec" + "strings" "github.com/go-chi/chi/v5" ) @@ -338,3 +340,56 @@ func (h *Handler) DeleteInstance() http.HandlerFunc { w.WriteHeader(http.StatusNoContent) } } + +func (h *Handler) ProxyToInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + instance, err := h.InstanceManager.GetInstance(name) + if err != nil { + http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) + return + } + + if !instance.Running { + http.Error(w, "Instance is not running", http.StatusServiceUnavailable) + return + } + + // Get the cached proxy for this instance + proxy, err := instance.GetProxy() + if err != nil { + http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) + return + } + + // Strip the "/api/v1/instances//proxy" prefix from the request URL + prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name) + proxyPath := r.URL.Path[len(prefix):] + + // Ensure the proxy path starts with "/" + if !strings.HasPrefix(proxyPath, "/") { + proxyPath = "/" + proxyPath + } + + // Modify the request to remove the proxy prefix + originalPath := r.URL.Path + r.URL.Path = proxyPath + + // Set forwarded headers + r.Header.Set("X-Forwarded-Host", r.Header.Get("Host")) + r.Header.Set("X-Forwarded-Proto", "http") + + // Restore original path for logging purposes + defer func() { + r.URL.Path = originalPath + }() + + // Forward the request using the cached proxy + proxy.ServeHTTP(w, r) + } +} diff --git a/server/pkg/instance.go b/server/pkg/instance.go index 7f9c27d..c2f2fe6 100644 --- a/server/pkg/instance.go +++ b/server/pkg/instance.go @@ -7,6 +7,8 @@ import ( "fmt" "io" "log" + "net/http/httputil" + "net/url" "os/exec" "sync" "time" @@ -24,13 +26,14 @@ type Instance struct { StdErrChan chan string `json:"-"` // Channel for sending error messages // internal - cmd *exec.Cmd `json:"-"` // Command to run the instance - ctx context.Context `json:"-"` // Context for managing the instance lifecycle - cancel context.CancelFunc `json:"-"` // Function to cancel the context - stdout io.ReadCloser `json:"-"` // Standard output stream - stderr io.ReadCloser `json:"-"` // Standard error stream - mu sync.Mutex `json:"-"` // Mutex for synchronizing access to the instance - restarts int `json:"-"` // Number of restarts + cmd *exec.Cmd `json:"-"` // Command to run the instance + ctx context.Context `json:"-"` // Context for managing the instance lifecycle + cancel context.CancelFunc `json:"-"` // Function to cancel the context + stdout io.ReadCloser `json:"-"` // Standard output stream + stderr io.ReadCloser `json:"-"` // Standard error stream + mu sync.Mutex `json:"-"` // Mutex for synchronizing access to the instance + restarts int `json:"-"` // Number of restarts + proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance } func NewInstance(name string, options *InstanceOptions) *Instance { @@ -59,6 +62,29 @@ func (i *Instance) SetOptions(options *InstanceOptions) { return } i.options = options + // Clear the proxy so it gets recreated with new options + i.proxy = nil +} + +// GetProxy returns the reverse proxy for this instance, creating it if needed +func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) { + i.mu.Lock() + defer i.mu.Unlock() + + if i.proxy == nil { + if i.options == nil { + return nil, fmt.Errorf("instance %s has no options set", i.Name) + } + + targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port)) + if err != nil { + return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err) + } + + i.proxy = httputil.NewSingleHostReverseProxy(targetURL) + } + + return i.proxy, nil } func (i *Instance) Start() error { @@ -110,6 +136,9 @@ func (i *Instance) Stop() error { // Cancel the context to signal termination i.cancel() + // Clean up the proxy + i.proxy = nil + // Wait for process to exit (with timeout) done := make(chan error, 1) go func() { diff --git a/server/pkg/routes.go b/server/pkg/routes.go index 5aaf943..502971b 100644 --- a/server/pkg/routes.go +++ b/server/pkg/routes.go @@ -40,30 +40,9 @@ func SetupRouter(handler *Handler) *chi.Mux { // r.Get("/logs", handler.GetInstanceLogs()) // Get instance logs // Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server) - // r.Get("/health", handler.ProxyHealthCheck()) // Health check - // r.Post("/completion", handler.ProxyCompletion()) // Text completion - // r.Post("/tokenize", handler.ProxyTokenize()) // Tokenize text - // r.Post("/detokenize", handler.ProxyDetokenize()) // Detokenize tokens - // r.Post("/apply-template", handler.ProxyApplyTemplate()) // Apply chat template - // r.Post("/embedding", handler.ProxyEmbedding()) // Generate embeddings - // r.Post("/reranking", handler.ProxyReranking()) // Rerank documents - // r.Post("/rerank", handler.ProxyRerank()) // Rerank documents (alias) - // r.Post("/infill", handler.ProxyInfill()) // Code infilling - // r.Get("/props", handler.ProxyGetProps()) // Get server properties - // r.Post("/props", handler.ProxySetProps()) // Set server properties - // r.Post("/embeddings", handler.ProxyEmbeddings()) // Non-OpenAI embeddings - // r.Get("/slots", handler.ProxyGetSlots()) // Get slots state - // r.Get("/metrics", handler.ProxyGetMetrics()) // Prometheus metrics - // r.Post("/slots/{slot_id}", handler.ProxySlotAction()) // Slot actions (save/restore/erase) - // r.Get("/lora-adapters", handler.ProxyGetLoraAdapters()) // Get LoRA adapters - // r.Post("/lora-adapters", handler.ProxySetLoraAdapters()) // Set LoRA adapters - - // OpenAI-compatible endpoints (proxied to the actual llama.cpp server) - // r.Post("/v1/completions", handler.ProxyV1Completions()) // OpenAI completions - // r.Post("/v1/chat/completions", handler.ProxyV1ChatCompletions()) // OpenAI chat completions - // r.Post("/v1/embeddings", handler.ProxyV1Embeddings()) // OpenAI embeddings - // r.Post("/v1/rerank", handler.ProxyV1Rerank()) // OpenAI rerank - // r.Post("/v1/reranking", handler.ProxyV1Reranking()) // OpenAI reranking + r.Route("/proxy", func(r chi.Router) { + r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests + }) }) }) })