mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Merge branch 'main' into feat/multi-host
This commit is contained in:
@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
||||
r.Use(cors.Handler(cors.Options{
|
||||
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
|
||||
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
|
||||
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
|
||||
AllowedHeaders: handler.cfg.Server.AllowedHeaders,
|
||||
ExposedHeaders: []string{"Link"},
|
||||
AllowCredentials: false,
|
||||
MaxAge: 300,
|
||||
@@ -112,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
||||
|
||||
})
|
||||
|
||||
r.Route("/llama-cpp/{name}", func(r chi.Router) {
|
||||
|
||||
// Public Routes
|
||||
// Allow llama-cpp server to serve its own WebUI if it is running.
|
||||
// Don't auto start the server since it can be accessed without an API key
|
||||
r.Get("/", handler.LlamaCppProxy(false))
|
||||
|
||||
// Private Routes
|
||||
r.Group(func(r chi.Router) {
|
||||
|
||||
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
|
||||
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
|
||||
}
|
||||
|
||||
// This handler auto start the server if it's not running
|
||||
llamaCppHandler := handler.LlamaCppProxy(true)
|
||||
|
||||
// llama.cpp server specific proxy endpoints
|
||||
r.Get("/props", llamaCppHandler)
|
||||
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
|
||||
r.Get("/slots", llamaCppHandler)
|
||||
r.Post("/apply-template", llamaCppHandler)
|
||||
r.Post("/completion", llamaCppHandler)
|
||||
r.Post("/detokenize", llamaCppHandler)
|
||||
r.Post("/embeddings", llamaCppHandler)
|
||||
r.Post("/infill", llamaCppHandler)
|
||||
r.Post("/metrics", llamaCppHandler)
|
||||
r.Post("/props", llamaCppHandler)
|
||||
r.Post("/reranking", llamaCppHandler)
|
||||
r.Post("/tokenize", llamaCppHandler)
|
||||
|
||||
// OpenAI-compatible proxy endpoint
|
||||
// Handles all POST requests to /v1/*, including:
|
||||
// - /v1/completions
|
||||
// - /v1/chat/completions
|
||||
// - /v1/embeddings
|
||||
// - /v1/rerank
|
||||
// - /v1/reranking
|
||||
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
|
||||
// on "model" field being optional, and handler.OpenAIProxy requires it.
|
||||
r.Post("/v1/*", llamaCppHandler)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
// Serve WebUI files
|
||||
if err := webui.SetupWebUI(r); err != nil {
|
||||
fmt.Printf("Failed to set up WebUI: %v\n", err)
|
||||
|
||||
Reference in New Issue
Block a user