From 37107f76d513502e1a902626d557cb7caa3ce652 Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Sat, 19 Jul 2025 13:39:36 +0200
Subject: [PATCH] Set up instance proxy

---
 server/pkg/handlers.go | 55 ++++++++++++++++++++++++++++++++++++++++++
 server/pkg/instance.go | 43 +++++++++++++++++++++++++++------
 server/pkg/routes.go   | 27 +++------------------
 3 files changed, 94 insertions(+), 31 deletions(-)
diff --git a/server/pkg/handlers.go b/server/pkg/handlers.go
index 269be60..c03c467 100644
--- a/server/pkg/handlers.go
+++ b/server/pkg/handlers.go
@@ -2,8 +2,10 @@ package llamactl
 
 import (
 	"encoding/json"
+	"fmt"
 	"net/http"
 	"os/exec"
+	"strings"
 
 	"github.com/go-chi/chi/v5"
 )
@@ -338,3 +340,56 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
 		w.WriteHeader(http.StatusNoContent)
 	}
 }
+
+func (h *Handler) ProxyToInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		instance, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		if !instance.Running {
+			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+			return
+		}
+
+		// Get the cached proxy for this instance
+		proxy, err := instance.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
+		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
+		proxyPath := r.URL.Path[len(prefix):]
+
+		// Ensure the proxy path starts with "/"
+		if !strings.HasPrefix(proxyPath, "/") {
+			proxyPath = "/" + proxyPath
+		}
+
+		// Modify the request to remove the proxy prefix
+		originalPath := r.URL.Path
+		r.URL.Path = proxyPath
+
+		// Set forwarded headers
+		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
+		r.Header.Set("X-Forwarded-Proto", "http")
+
+		// Restore original path for logging purposes
+		defer func() {
+			r.URL.Path = originalPath
+		}()
+
+		// Forward the request using the cached proxy
+		proxy.ServeHTTP(w, r)
+	}
+}
diff --git a/server/pkg/instance.go b/server/pkg/instance.go
index 7f9c27d..c2f2fe6 100644
--- a/server/pkg/instance.go
+++ b/server/pkg/instance.go
@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"net/http/httputil"
+	"net/url"
 	"os/exec"
 	"sync"
 	"time"
@@ -24,13 +26,14 @@ type Instance struct {
 	StdErrChan chan string `json:"-"` // Channel for sending error messages
 
 	// internal
-	cmd      *exec.Cmd          `json:"-"` // Command to run the instance
-	ctx      context.Context    `json:"-"` // Context for managing the instance lifecycle
-	cancel   context.CancelFunc `json:"-"` // Function to cancel the context
-	stdout   io.ReadCloser      `json:"-"` // Standard output stream
-	stderr   io.ReadCloser      `json:"-"` // Standard error stream
-	mu       sync.Mutex         `json:"-"` // Mutex for synchronizing access to the instance
-	restarts int                `json:"-"` // Number of restarts
+	cmd      *exec.Cmd              `json:"-"` // Command to run the instance
+	ctx      context.Context        `json:"-"` // Context for managing the instance lifecycle
+	cancel   context.CancelFunc     `json:"-"` // Function to cancel the context
+	stdout   io.ReadCloser          `json:"-"` // Standard output stream
+	stderr   io.ReadCloser          `json:"-"` // Standard error stream
+	mu       sync.Mutex             `json:"-"` // Mutex for synchronizing access to the instance
+	restarts int                    `json:"-"` // Number of restarts
+	proxy    *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
 }
 
 func NewInstance(name string, options *InstanceOptions) *Instance {
@@ -59,6 +62,29 @@ func (i *Instance) SetOptions(options *InstanceOptions) {
 		return
 	}
 	i.options = options
+	// Clear the proxy so it gets recreated with new options
+	i.proxy = nil
+}
+
+// GetProxy returns the reverse proxy for this instance, creating it if needed
+func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
+	i.mu.Lock()
+	defer i.mu.Unlock()
+
+	if i.proxy == nil {
+		if i.options == nil {
+			return nil, fmt.Errorf("instance %s has no options set", i.Name)
+		}
+
+		targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
+		}
+
+		i.proxy = httputil.NewSingleHostReverseProxy(targetURL)
+	}
+
+	return i.proxy, nil
 }
 
 func (i *Instance) Start() error {
@@ -110,6 +136,9 @@ func (i *Instance) Stop() error {
 	// Cancel the context to signal termination
 	i.cancel()
 
+	// Clean up the proxy
+	i.proxy = nil
+
 	// Wait for process to exit (with timeout)
 	done := make(chan error, 1)
 	go func() {
diff --git a/server/pkg/routes.go b/server/pkg/routes.go
index 5aaf943..502971b 100644
--- a/server/pkg/routes.go
+++ b/server/pkg/routes.go
@@ -40,30 +40,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
 				// r.Get("/logs", handler.GetInstanceLogs())        // Get instance logs
 
 				// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
-				// r.Get("/health", handler.ProxyHealthCheck())         // Health check
-				// r.Post("/completion", handler.ProxyCompletion())     // Text completion
-				// r.Post("/tokenize", handler.ProxyTokenize())         // Tokenize text
-				// r.Post("/detokenize", handler.ProxyDetokenize())     // Detokenize tokens
-				// r.Post("/apply-template", handler.ProxyApplyTemplate()) // Apply chat template
-				// r.Post("/embedding", handler.ProxyEmbedding())       // Generate embeddings
-				// r.Post("/reranking", handler.ProxyReranking())       // Rerank documents
-				// r.Post("/rerank", handler.ProxyRerank())             // Rerank documents (alias)
-				// r.Post("/infill", handler.ProxyInfill())             // Code infilling
-				// r.Get("/props", handler.ProxyGetProps())             // Get server properties
-				// r.Post("/props", handler.ProxySetProps())            // Set server properties
-				// r.Post("/embeddings", handler.ProxyEmbeddings())     // Non-OpenAI embeddings
-				// r.Get("/slots", handler.ProxyGetSlots())             // Get slots state
-				// r.Get("/metrics", handler.ProxyGetMetrics())         // Prometheus metrics
-				// r.Post("/slots/{slot_id}", handler.ProxySlotAction()) // Slot actions (save/restore/erase)
-				// r.Get("/lora-adapters", handler.ProxyGetLoraAdapters()) // Get LoRA adapters
-				// r.Post("/lora-adapters", handler.ProxySetLoraAdapters()) // Set LoRA adapters
-
-				// OpenAI-compatible endpoints (proxied to the actual llama.cpp server)
-				// r.Post("/v1/completions", handler.ProxyV1Completions()) // OpenAI completions
-				// r.Post("/v1/chat/completions", handler.ProxyV1ChatCompletions()) // OpenAI chat completions
-				// r.Post("/v1/embeddings", handler.ProxyV1Embeddings())   // OpenAI embeddings
-				// r.Post("/v1/rerank", handler.ProxyV1Rerank())           // OpenAI rerank
-				// r.Post("/v1/reranking", handler.ProxyV1Reranking())     // OpenAI reranking
+				r.Route("/proxy", func(r chi.Router) {
+					r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests
+				})
 			})
 		})
 	})