Implement model management for llama.cpp instances

2025-12-23 01:24:24 +00:00 · 2025-12-18 19:14:20 +01:00
parent f3c02b4939
commit fd9e651e09
11 changed files with 990 additions and 11 deletions
--- a/pkg/server/handlers_backends.go
+++ b/pkg/server/handlers_backends.go
@@ -5,9 +5,12 @@ import (
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/instance"
+	"log"
 	"net/http"
 	"os/exec"
 	"strings"
+
+	"github.com/go-chi/chi/v5"
 )

 // ParseCommandRequest represents the request body for backend command parsing
@@ -306,3 +309,115 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
 func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
 }
+
+// LlamaCppListModels godoc
+// @Summary List models in a llama.cpp instance
+// @Description Returns a list of models available in the specified llama.cpp instance
+// @Tags Llama.cpp
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} map[string]any "Models list response"
+// @Failure 400 {string} string "Invalid instance"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/llama-cpp/{name}/models [get]
+func (h *Handler) LlamaCppListModels() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inst, err := h.getInstance(r)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
+			return
+		}
+
+		models, err := inst.GetModels()
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "get_models_failed", err.Error())
+			return
+		}
+
+		response := map[string]any{
+			"object": "list",
+			"data":   models,
+		}
+
+		writeJSON(w, http.StatusOK, response)
+	}
+}
+
+// LlamaCppLoadModel godoc
+// @Summary Load a model in a llama.cpp instance
+// @Description Loads the specified model in the given llama.cpp instance
+// @Tags Llama.cpp
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param model path string true "Model Name"
+// @Success 200 {object} map[string]string "Success message"
+// @Failure 400 {string} string "Invalid request"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/llama-cpp/{name}/models/{model}/load [post]
+func (h *Handler) LlamaCppLoadModel() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inst, err := h.getInstance(r)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
+			return
+		}
+
+		modelName := chi.URLParam(r, "model")
+
+		if err := inst.LoadModel(modelName); err != nil {
+			writeError(w, http.StatusBadRequest, "load_model_failed", err.Error())
+			return
+		}
+
+		// Refresh the model registry
+		if err := h.InstanceManager.RefreshModelRegistry(inst); err != nil {
+			log.Printf("Warning: failed to refresh model registry after load: %v", err)
+		}
+
+		writeJSON(w, http.StatusOK, map[string]string{
+			"status":  "success",
+			"message": fmt.Sprintf("Model %s loaded successfully", modelName),
+		})
+	}
+}
+
+// LlamaCppUnloadModel godoc
+// @Summary Unload a model in a llama.cpp instance
+// @Description Unloads the specified model in the given llama.cpp instance
+// @Tags Llama.cpp
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param model path string true "Model Name"
+// @Success 200 {object} map[string]string "Success message"
+// @Failure 400 {string} string "Invalid request"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/llama-cpp/{name}/models/{model}/unload [post]
+func (h *Handler) LlamaCppUnloadModel() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inst, err := h.getInstance(r)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
+			return
+		}
+
+		modelName := chi.URLParam(r, "model")
+
+		if err := inst.UnloadModel(modelName); err != nil {
+			writeError(w, http.StatusBadRequest, "unload_model_failed", err.Error())
+			return
+		}
+
+		// Refresh the model registry
+		if err := h.InstanceManager.RefreshModelRegistry(inst); err != nil {
+			log.Printf("Warning: failed to refresh model registry after unload: %v", err)
+		}
+
+		writeJSON(w, http.StatusOK, map[string]string{
+			"status":  "success",
+			"message": fmt.Sprintf("Model %s unloaded successfully", modelName),
+		})
+	}
+}