Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/

2025-12-25 02:24:22 +00:00 · 2025-10-05 14:28:33 +00:00
parent db9eebeb8b
commit fa43f9e967
2 changed files with 131 additions and 21 deletions
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -103,6 +103,51 @@ func SetupRouter(handler *Handler) *chi.Mux {

 	})

+	r.Route("/llama-cpp/{name}", func(r chi.Router) {
+
+		// Public Routes
+		// Allow llama-cpp server to serve its own WebUI if it is running.
+		// Don't auto start the server since it can be accessed without an API key
+		r.Get("/", handler.LlamaCppProxy(false))
+
+		// Private Routes
+		r.Group(func(r chi.Router) {
+
+			if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
+				r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
+			}
+
+			// This handler auto start the server if it's not running
+			llamaCppHandler := handler.LlamaCppProxy(true)
+
+			// llama.cpp server specific proxy endpoints
+			r.Get("/props", llamaCppHandler)
+			// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
+			r.Get("/slots", llamaCppHandler)
+			r.Post("/apply-template", llamaCppHandler)
+			r.Post("/completion", llamaCppHandler)
+			r.Post("/detokenize", llamaCppHandler)
+			r.Post("/embeddings", llamaCppHandler)
+			r.Post("/infill", llamaCppHandler)
+			r.Post("/metrics", llamaCppHandler)
+			r.Post("/props", llamaCppHandler)
+			r.Post("/reranking", llamaCppHandler)
+			r.Post("/tokenize", llamaCppHandler)
+
+			// OpenAI-compatible proxy endpoint
+			// Handles all POST requests to /v1/*, including:
+			//   - /v1/completions
+			//   - /v1/chat/completions
+			//   - /v1/embeddings
+			//   - /v1/rerank
+			//   - /v1/reranking
+			// llamaCppHandler is used here because some users of llama.cpp endpoints depend
+			// on "model" field being optional, and handler.OpenAIProxy requires it.
+			r.Post("/v1/*", llamaCppHandler)
+		})
+
+	})
+
 	// Serve WebUI files
 	if err := webui.SetupWebUI(r); err != nil {
 		fmt.Printf("Failed to set up WebUI: %v\n", err)