Set up instance proxy

2025-12-22 17:14:22 +00:00 · 2025-07-19 13:39:36 +02:00
parent cbce56cf77
commit 37107f76d5
3 changed files with 94 additions and 31 deletions
--- a/server/pkg/handlers.go
+++ b/server/pkg/handlers.go
@@ -2,8 +2,10 @@ package llamactl
 import (
 	"encoding/json"
 	"fmt"
 	"net/http"
 	"os/exec"
 	"strings"
 	"github.com/go-chi/chi/v5"
 )
@@ -338,3 +340,56 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
 		w.WriteHeader(http.StatusNoContent)
 	}
 }
 func (h *Handler) ProxyToInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		instance, err := h.InstanceManager.GetInstance(name)
 		if err != nil {
 			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		if !instance.Running {
 			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
 			return
 		}
 		// Get the cached proxy for this instance
 		proxy, err := instance.GetProxy()
 		if err != nil {
 			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
 		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
 		proxyPath := r.URL.Path[len(prefix):]
 		// Ensure the proxy path starts with "/"
 		if !strings.HasPrefix(proxyPath, "/") {
 			proxyPath = "/" + proxyPath
 		}
 		// Modify the request to remove the proxy prefix
 		originalPath := r.URL.Path
 		r.URL.Path = proxyPath
 		// Set forwarded headers
 		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
 		r.Header.Set("X-Forwarded-Proto", "http")
 		// Restore original path for logging purposes
 		defer func() {
 			r.URL.Path = originalPath
 		}()
 		// Forward the request using the cached proxy
 		proxy.ServeHTTP(w, r)
 	}
 }
--- a/server/pkg/instance.go
+++ b/server/pkg/instance.go
@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"io"
 	"log"
 	"net/http/httputil"
 	"net/url"
 	"os/exec"
 	"sync"
 	"time"
@@ -31,6 +33,7 @@ type Instance struct {
 	stderr   io.ReadCloser          `json:"-"` // Standard error stream
 	mu       sync.Mutex             `json:"-"` // Mutex for synchronizing access to the instance
 	restarts int                    `json:"-"` // Number of restarts
 	proxy    *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
 }
 func NewInstance(name string, options *InstanceOptions) *Instance {
@@ -59,6 +62,29 @@ func (i *Instance) SetOptions(options *InstanceOptions) {
 		return
 	}
 	i.options = options
 	// Clear the proxy so it gets recreated with new options
 	i.proxy = nil
 }
 // GetProxy returns the reverse proxy for this instance, creating it if needed
 func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	if i.proxy == nil {
 		if i.options == nil {
 			return nil, fmt.Errorf("instance %s has no options set", i.Name)
 		}
 		targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
 		}
 		i.proxy = httputil.NewSingleHostReverseProxy(targetURL)
 	}
 	return i.proxy, nil
 }
 func (i *Instance) Start() error {
@@ -110,6 +136,9 @@ func (i *Instance) Stop() error {
 	// Cancel the context to signal termination
 	i.cancel()
 	// Clean up the proxy
 	i.proxy = nil
 	// Wait for process to exit (with timeout)
 	done := make(chan error, 1)
 	go func() {
--- a/server/pkg/routes.go
+++ b/server/pkg/routes.go
@@ -40,30 +40,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
 				// r.Get("/logs", handler.GetInstanceLogs())        // Get instance logs
 				// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
-				// r.Get("/health", handler.ProxyHealthCheck())         // Health check
+				r.Route("/proxy", func(r chi.Router) {
-				// r.Post("/completion", handler.ProxyCompletion())     // Text completion
+					r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests
-				// r.Post("/tokenize", handler.ProxyTokenize())         // Tokenize text
+				})
 				// r.Post("/detokenize", handler.ProxyDetokenize())     // Detokenize tokens
 				// r.Post("/apply-template", handler.ProxyApplyTemplate()) // Apply chat template
 				// r.Post("/embedding", handler.ProxyEmbedding())       // Generate embeddings
 				// r.Post("/reranking", handler.ProxyReranking())       // Rerank documents
 				// r.Post("/rerank", handler.ProxyRerank())             // Rerank documents (alias)
 				// r.Post("/infill", handler.ProxyInfill())             // Code infilling
 				// r.Get("/props", handler.ProxyGetProps())             // Get server properties
 				// r.Post("/props", handler.ProxySetProps())            // Set server properties
 				// r.Post("/embeddings", handler.ProxyEmbeddings())     // Non-OpenAI embeddings
 				// r.Get("/slots", handler.ProxyGetSlots())             // Get slots state
 				// r.Get("/metrics", handler.ProxyGetMetrics())         // Prometheus metrics
 				// r.Post("/slots/{slot_id}", handler.ProxySlotAction()) // Slot actions (save/restore/erase)
 				// r.Get("/lora-adapters", handler.ProxyGetLoraAdapters()) // Get LoRA adapters
 				// r.Post("/lora-adapters", handler.ProxySetLoraAdapters()) // Set LoRA adapters
 				// OpenAI-compatible endpoints (proxied to the actual llama.cpp server)
 				// r.Post("/v1/completions", handler.ProxyV1Completions()) // OpenAI completions
 				// r.Post("/v1/chat/completions", handler.ProxyV1ChatCompletions()) // OpenAI chat completions
 				// r.Post("/v1/embeddings", handler.ProxyV1Embeddings())   // OpenAI embeddings
 				// r.Post("/v1/rerank", handler.ProxyV1Rerank())           // OpenAI rerank
 				// r.Post("/v1/reranking", handler.ProxyV1Reranking())     // OpenAI reranking
 			})
 		})
 	})