diff --git a/server/pkg/routes.go b/server/pkg/routes.go index 9163b55..81dfb33 100644 --- a/server/pkg/routes.go +++ b/server/pkg/routes.go @@ -25,16 +25,55 @@ func SetupRouter(handler *Handler) *chi.Mux { }) // Instance management endpoints - // r.Get("/instances", handler.ListInstances()) // List all instances - // r.Post("/instances", handler.CreateInstance()) // Create and start new instance - // r.Get("/instances/{id}", handler.GetInstance()) // Get instance details - // r.Put("/instances/{id}", handler.UpdateInstance()) // Update instance configuration - // r.Delete("/instances/{id}", handler.DeleteInstance()) // Stop and remove instance - // r.Post("/instances/{id}/start", handler.StartInstance()) // Start stopped instance - // r.Post("/instances/{id}/stop", handler.StopInstance()) // Stop running instance - // r.Post("/instances/{id}/restart", handler.RestartInstance()) // Restart instance - // r.Get("/instances/{id}/logs", handler.GetInstanceLogs()) // Get instance logs + r.Route("/instances", func(r chi.Router) { + // r.Get("/", handler.ListInstances()) // List all instances + // r.Post("/", handler.CreateInstance()) // Create and start new instance + + r.Route("/{id}", func(r chi.Router) { + // Instance management + // r.Get("/", handler.GetInstance()) // Get instance details + // r.Put("/", handler.UpdateInstance()) // Update instance configuration + // r.Delete("/", handler.DeleteInstance()) // Stop and remove instance + // r.Post("/start", handler.StartInstance()) // Start stopped instance + // r.Post("/stop", handler.StopInstance()) // Stop running instance + // r.Post("/restart", handler.RestartInstance()) // Restart instance + // r.Get("/logs", handler.GetInstanceLogs()) // Get instance logs + + // Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server) + // r.Get("/health", handler.ProxyHealthCheck()) // Health check + // r.Post("/completion", handler.ProxyCompletion()) // Text completion + // r.Post("/tokenize", handler.ProxyTokenize()) // Tokenize text + // r.Post("/detokenize", handler.ProxyDetokenize()) // Detokenize tokens + // r.Post("/apply-template", handler.ProxyApplyTemplate()) // Apply chat template + // r.Post("/embedding", handler.ProxyEmbedding()) // Generate embeddings + // r.Post("/reranking", handler.ProxyReranking()) // Rerank documents + // r.Post("/rerank", handler.ProxyRerank()) // Rerank documents (alias) + // r.Post("/infill", handler.ProxyInfill()) // Code infilling + // r.Get("/props", handler.ProxyGetProps()) // Get server properties + // r.Post("/props", handler.ProxySetProps()) // Set server properties + // r.Post("/embeddings", handler.ProxyEmbeddings()) // Non-OpenAI embeddings + // r.Get("/slots", handler.ProxyGetSlots()) // Get slots state + // r.Get("/metrics", handler.ProxyGetMetrics()) // Prometheus metrics + // r.Post("/slots/{slot_id}", handler.ProxySlotAction()) // Slot actions (save/restore/erase) + // r.Get("/lora-adapters", handler.ProxyGetLoraAdapters()) // Get LoRA adapters + // r.Post("/lora-adapters", handler.ProxySetLoraAdapters()) // Set LoRA adapters + + // OpenAI-compatible endpoints (proxied to the actual llama.cpp server) + // r.Post("/v1/completions", handler.ProxyV1Completions()) // OpenAI completions + // r.Post("/v1/chat/completions", handler.ProxyV1ChatCompletions()) // OpenAI chat completions + // r.Post("/v1/embeddings", handler.ProxyV1Embeddings()) // OpenAI embeddings + // r.Post("/v1/rerank", handler.ProxyV1Rerank()) // OpenAI rerank + // r.Post("/v1/reranking", handler.ProxyV1Reranking()) // OpenAI reranking + }) + }) }) + // OpenAI-compatible endpoints (model name in request body determines routing) + // r.Post("/v1/completions", handler.OpenAICompletions()) // Route based on model name in request + // r.Post("/v1/chat/completions", handler.OpenAIChatCompletions()) // Route based on model name in request + // r.Post("/v1/embeddings", handler.OpenAIEmbeddings()) // Route based on model name in request (if supported) + // r.Post("/v1/rerank", handler.OpenAIRerank()) // Route based on model name in request (if supported) + // r.Post("/v1/reranking", handler.OpenAIReranking()) // Route based on model name in request (if supported) + return r }