mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
80 lines
4.1 KiB
Go
80 lines
4.1 KiB
Go
package llamactl
|
|
|
|
import (
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/go-chi/chi/v5/middleware"
|
|
httpSwagger "github.com/swaggo/http-swagger"
|
|
|
|
_ "llamactl/docs"
|
|
)
|
|
|
|
func SetupRouter(handler *Handler) *chi.Mux {
|
|
r := chi.NewRouter()
|
|
r.Use(middleware.Logger)
|
|
|
|
r.Get("/swagger/*", httpSwagger.Handler(
|
|
httpSwagger.URL("/swagger/doc.json"),
|
|
))
|
|
|
|
// Define routes
|
|
r.Route("/api/v1", func(r chi.Router) {
|
|
r.Route("/server", func(r chi.Router) {
|
|
r.Get("/help", handler.HelpHandler())
|
|
r.Get("/version", handler.VersionHandler())
|
|
r.Get("/devices", handler.ListDevicesHandler())
|
|
})
|
|
|
|
// Instance management endpoints
|
|
r.Route("/instances", func(r chi.Router) {
|
|
// r.Get("/", handler.ListInstances()) // List all instances
|
|
// r.Post("/", handler.CreateInstance()) // Create and start new instance
|
|
|
|
r.Route("/{id}", func(r chi.Router) {
|
|
// Instance management
|
|
// r.Get("/", handler.GetInstance()) // Get instance details
|
|
// r.Put("/", handler.UpdateInstance()) // Update instance configuration
|
|
// r.Delete("/", handler.DeleteInstance()) // Stop and remove instance
|
|
// r.Post("/start", handler.StartInstance()) // Start stopped instance
|
|
// r.Post("/stop", handler.StopInstance()) // Stop running instance
|
|
// r.Post("/restart", handler.RestartInstance()) // Restart instance
|
|
// r.Get("/logs", handler.GetInstanceLogs()) // Get instance logs
|
|
|
|
// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
|
|
// r.Get("/health", handler.ProxyHealthCheck()) // Health check
|
|
// r.Post("/completion", handler.ProxyCompletion()) // Text completion
|
|
// r.Post("/tokenize", handler.ProxyTokenize()) // Tokenize text
|
|
// r.Post("/detokenize", handler.ProxyDetokenize()) // Detokenize tokens
|
|
// r.Post("/apply-template", handler.ProxyApplyTemplate()) // Apply chat template
|
|
// r.Post("/embedding", handler.ProxyEmbedding()) // Generate embeddings
|
|
// r.Post("/reranking", handler.ProxyReranking()) // Rerank documents
|
|
// r.Post("/rerank", handler.ProxyRerank()) // Rerank documents (alias)
|
|
// r.Post("/infill", handler.ProxyInfill()) // Code infilling
|
|
// r.Get("/props", handler.ProxyGetProps()) // Get server properties
|
|
// r.Post("/props", handler.ProxySetProps()) // Set server properties
|
|
// r.Post("/embeddings", handler.ProxyEmbeddings()) // Non-OpenAI embeddings
|
|
// r.Get("/slots", handler.ProxyGetSlots()) // Get slots state
|
|
// r.Get("/metrics", handler.ProxyGetMetrics()) // Prometheus metrics
|
|
// r.Post("/slots/{slot_id}", handler.ProxySlotAction()) // Slot actions (save/restore/erase)
|
|
// r.Get("/lora-adapters", handler.ProxyGetLoraAdapters()) // Get LoRA adapters
|
|
// r.Post("/lora-adapters", handler.ProxySetLoraAdapters()) // Set LoRA adapters
|
|
|
|
// OpenAI-compatible endpoints (proxied to the actual llama.cpp server)
|
|
// r.Post("/v1/completions", handler.ProxyV1Completions()) // OpenAI completions
|
|
// r.Post("/v1/chat/completions", handler.ProxyV1ChatCompletions()) // OpenAI chat completions
|
|
// r.Post("/v1/embeddings", handler.ProxyV1Embeddings()) // OpenAI embeddings
|
|
// r.Post("/v1/rerank", handler.ProxyV1Rerank()) // OpenAI rerank
|
|
// r.Post("/v1/reranking", handler.ProxyV1Reranking()) // OpenAI reranking
|
|
})
|
|
})
|
|
})
|
|
|
|
// OpenAI-compatible endpoints (model name in request body determines routing)
|
|
// r.Post("/v1/completions", handler.OpenAICompletions()) // Route based on model name in request
|
|
// r.Post("/v1/chat/completions", handler.OpenAIChatCompletions()) // Route based on model name in request
|
|
// r.Post("/v1/embeddings", handler.OpenAIEmbeddings()) // Route based on model name in request (if supported)
|
|
// r.Post("/v1/rerank", handler.OpenAIRerank()) // Route based on model name in request (if supported)
|
|
// r.Post("/v1/reranking", handler.OpenAIReranking()) // Route based on model name in request (if supported)
|
|
|
|
return r
|
|
}
|