Merge pull request #72 from lordmathis/refactor/handlers

refactor: Extract common helper functions in API handlers
This commit is contained in:
2025-10-26 12:07:42 +01:00
committed by GitHub
17 changed files with 403 additions and 405 deletions

View File

@@ -23,6 +23,7 @@ type backend interface {
SetPort(int) SetPort(int)
GetHost() string GetHost() string
Validate() error Validate() error
ParseCommand(string) (any, error)
} }
var backendConstructors = map[BackendType]func() backend{ var backendConstructors = map[BackendType]func() backend{

View File

@@ -9,7 +9,7 @@ import (
) )
// BuildCommandArgs converts a struct to command line arguments // BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string { func BuildCommandArgs(options any, multipleFlags map[string]struct{}) []string {
var args []string var args []string
v := reflect.ValueOf(options).Elem() v := reflect.ValueOf(options).Elem()
@@ -28,9 +28,10 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
continue continue
} }
// Get flag name from JSON tag // Get flag name from JSON tag (snake_case)
flagName := strings.Split(jsonTag, ",")[0] jsonFieldName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-") // Convert to kebab-case for CLI flags
flagName := strings.ReplaceAll(jsonFieldName, "_", "-")
switch field.Kind() { switch field.Kind() {
case reflect.Bool: case reflect.Bool:
@@ -51,7 +52,8 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
} }
case reflect.Slice: case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 { if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] { // Use jsonFieldName (snake_case) for multipleFlags lookup
if _, isMultiValue := multipleFlags[jsonFieldName]; isMultiValue {
// Multiple flags: --flag value1 --flag value2 // Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ { for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String()) args = append(args, "--"+flagName, field.Index(j).String())

View File

@@ -9,25 +9,16 @@ import (
) )
// llamaMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated // llamaMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes) // Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
var llamaMultiValuedFlags = map[string]bool{ var llamaMultiValuedFlags = map[string]struct{}{
// Parsing keys (with underscores) "override_tensor": {},
"override_tensor": true, "override_kv": {},
"override_kv": true, "lora": {},
"lora": true, "lora_scaled": {},
"lora_scaled": true, "control_vector": {},
"control_vector": true, "control_vector_scaled": {},
"control_vector_scaled": true, "dry_sequence_breaker": {},
"dry_sequence_breaker": true, "logit_bias": {},
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
} }
type LlamaServerOptions struct { type LlamaServerOptions struct {
@@ -378,19 +369,19 @@ func (o *LlamaServerOptions) BuildDockerArgs() []string {
return o.BuildCommandArgs() return o.BuildCommandArgs()
} }
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions // ParseCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats: // Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf" // 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf" // 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32" // 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes // 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) { func (o *LlamaServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"llama-server"} executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands var subcommandNames []string // Llama has no subcommands
// Use package-level llamaMultiValuedFlags variable // Use package-level llamaMultiValuedFlags variable
var llamaOptions LlamaServerOptions var llamaOptions LlamaServerOptions
if err := ParseCommand(command, executableNames, subcommandNames, llamaMultiValuedFlags, &llamaOptions); err != nil { if err := parseCommand(command, executableNames, subcommandNames, llamaMultiValuedFlags, &llamaOptions); err != nil {
return nil, err return nil, err
} }

View File

@@ -385,7 +385,9 @@ func TestParseLlamaCommand(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result, err := backends.ParseLlamaCommand(tt.command) var opts backends.LlamaServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.LlamaServerOptions)
if tt.expectErr { if tt.expectErr {
if err == nil { if err == nil {
@@ -413,7 +415,9 @@ func TestParseLlamaCommand(t *testing.T) {
func TestParseLlamaCommandArrays(t *testing.T) { func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin" command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := backends.ParseLlamaCommand(command) var opts backends.LlamaServerOptions
resultAny, err := opts.ParseCommand(command)
result, _ := resultAny.(*backends.LlamaServerOptions)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %v", err) t.Fatalf("unexpected error: %v", err)

View File

@@ -62,7 +62,7 @@ func (o *MlxServerOptions) Validate() error {
// BuildCommandArgs converts to command line arguments // BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string { func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields multipleFlags := map[string]struct{}{} // MLX doesn't currently have []string fields
return BuildCommandArgs(o, multipleFlags) return BuildCommandArgs(o, multipleFlags)
} }
@@ -70,19 +70,19 @@ func (o *MlxServerOptions) BuildDockerArgs() []string {
return []string{} return []string{}
} }
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions // ParseCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats: // Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path" // 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path" // 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0" // 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes // 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) { func (o *MlxServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"mlx_lm.server"} executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags multiValuedFlags := map[string]struct{}{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions var mlxOptions MlxServerOptions
if err := ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil { if err := parseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err return nil, err
} }

View File

@@ -96,7 +96,9 @@ func TestParseMlxCommand(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result, err := backends.ParseMlxCommand(tt.command) var opts backends.MlxServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.MlxServerOptions)
if tt.expectErr { if tt.expectErr {
if err == nil { if err == nil {

View File

@@ -9,8 +9,8 @@ import (
"strings" "strings"
) )
// ParseCommand parses a command string into a target struct // parseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error { func parseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]struct{}, target any) error {
// Normalize multiline commands // Normalize multiline commands
command = normalizeCommand(command) command = normalizeCommand(command)
if command == "" { if command == "" {
@@ -125,7 +125,7 @@ func extractArgs(command string, executableNames []string, subcommandNames []str
} }
// parseFlags parses command line flags into a map // parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) { func parseFlags(args []string, multiValuedFlags map[string]struct{}) (map[string]any, error) {
options := make(map[string]any) options := make(map[string]any)
for i := 0; i < len(args); i++ { for i := 0; i < len(args); i++ {
@@ -163,7 +163,7 @@ func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any
if hasValue { if hasValue {
// Handle multi-valued flags // Handle multi-valued flags
if multiValuedFlags[flagName] { if _, isMultiValue := multiValuedFlags[flagName]; isMultiValue {
if existing, ok := options[flagName].([]string); ok { if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value) options[flagName] = append(existing, value)
} else { } else {

View File

@@ -6,12 +6,16 @@ import (
) )
// vllmMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated // vllmMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var vllmMultiValuedFlags = map[string]bool{ // Based on vLLM's CLI argument definitions with action='append' or List types
"api-key": true, // Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
"allowed-origins": true, var vllmMultiValuedFlags = map[string]struct{}{
"allowed-methods": true, "api_key": {}, // --api-key (action='append')
"allowed-headers": true, "allowed_origins": {}, // --allowed-origins (List type)
"middleware": true, "allowed_methods": {}, // --allowed-methods (List type)
"allowed_headers": {}, // --allowed-headers (List type)
"middleware": {}, // --middleware (action='append')
"lora_modules": {}, // --lora-modules (custom LoRAParserAction, accepts multiple)
"prompt_adapters": {}, // --prompt-adapters (similar to lora-modules, accepts multiple)
} }
type VllmServerOptions struct { type VllmServerOptions struct {
@@ -202,28 +206,19 @@ func (o *VllmServerOptions) BuildDockerArgs() []string {
return args return args
} }
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions // ParseCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats: // Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args" // 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME" // 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args" // 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args" // 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes // 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) { func (o *VllmServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"vllm"} executableNames := []string{"vllm"}
subcommandNames := []string{"serve"} subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions var vllmOptions VllmServerOptions
if err := ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil { if err := parseCommand(command, executableNames, subcommandNames, vllmMultiValuedFlags, &vllmOptions); err != nil {
return nil, err return nil, err
} }

View File

@@ -92,7 +92,9 @@ func TestParseVllmCommand(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result, err := backends.ParseVllmCommand(tt.command) var opts backends.VllmServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.VllmServerOptions)
if tt.expectErr { if tt.expectErr {
if err == nil { if err == nil {
@@ -118,6 +120,41 @@ func TestParseVllmCommand(t *testing.T) {
} }
} }
func TestParseVllmCommandArrays(t *testing.T) {
command := "vllm serve test-model --middleware auth.py --middleware=cors.py --api-key key1 --api-key key2"
var opts backends.VllmServerOptions
resultAny, err := opts.ParseCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
result, ok := resultAny.(*backends.VllmServerOptions)
if !ok {
t.Fatalf("expected *VllmServerOptions, got %T", resultAny)
}
expectedMiddleware := []string{"auth.py", "cors.py"}
if len(result.Middleware) != len(expectedMiddleware) {
t.Errorf("expected %d middleware items, got %d", len(expectedMiddleware), len(result.Middleware))
}
for i, v := range expectedMiddleware {
if i >= len(result.Middleware) || result.Middleware[i] != v {
t.Errorf("expected middleware[%d]=%s got %s", i, v, result.Middleware[i])
}
}
expectedAPIKeys := []string{"key1", "key2"}
if len(result.APIKey) != len(expectedAPIKeys) {
t.Errorf("expected %d api keys, got %d", len(expectedAPIKeys), len(result.APIKey))
}
for i, v := range expectedAPIKeys {
if i >= len(result.APIKey) || result.APIKey[i] != v {
t.Errorf("expected api_key[%d]=%s got %s", i, v, result.APIKey[i])
}
}
}
func TestVllmBuildCommandArgs_BooleanFields(t *testing.T) { func TestVllmBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct { tests := []struct {
name string name string

View File

@@ -1,18 +1,60 @@
package server package server
import ( import (
"encoding/json"
"fmt"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager" "llamactl/pkg/manager"
"llamactl/pkg/validation"
"log"
"net/http" "net/http"
"time" "time"
"github.com/go-chi/chi/v5"
) )
// errorResponse represents an error response returned by the API
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
// writeError writes a JSON error response with the specified HTTP status code
func writeError(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}); err != nil {
log.Printf("Failed to encode error response: %v", err)
}
}
// writeJSON writes a JSON response with the specified HTTP status code
func writeJSON(w http.ResponseWriter, status int, data any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("Failed to encode JSON response: %v", err)
}
}
// writeText writes a plain text response with the specified HTTP status code
func writeText(w http.ResponseWriter, status int, data string) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)
if _, err := w.Write([]byte(data)); err != nil {
log.Printf("Failed to write text response: %v", err)
}
}
// Handler provides HTTP handlers for the llamactl server API
type Handler struct { type Handler struct {
InstanceManager manager.InstanceManager InstanceManager manager.InstanceManager
cfg config.AppConfig cfg config.AppConfig
httpClient *http.Client httpClient *http.Client
} }
// NewHandler creates a new Handler instance with the provided instance manager and configuration
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler { func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{ return &Handler{
InstanceManager: im, InstanceManager: im,
@@ -22,3 +64,52 @@ func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
}, },
} }
} }
// getInstance retrieves an instance by name from the request query parameters
func (h *Handler) getInstance(r *http.Request) (*instance.Instance, error) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
return nil, fmt.Errorf("invalid instance name: %w", err)
}
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
return nil, fmt.Errorf("failed to get instance by name: %w", err)
}
return inst, nil
}
// ensureInstanceRunning ensures the instance is running by starting it if on-demand start is enabled
// It handles LRU eviction when the maximum number of running instances is reached
func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
return fmt.Errorf("instance is not running and on-demand start is not enabled")
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
return fmt.Errorf("cannot start instance, failed to evict instance: %w", err)
}
} else {
return fmt.Errorf("cannot start instance, maximum number of instances reached")
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(inst.Name); err != nil {
return fmt.Errorf("failed to start instance: %w", err)
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil {
return fmt.Errorf("instance failed to become healthy: %w", err)
}
return nil
}

View File

@@ -5,99 +5,148 @@ import (
"fmt" "fmt"
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/validation"
"net/http" "net/http"
"os/exec" "os/exec"
"strings" "strings"
"github.com/go-chi/chi/v5"
) )
// ParseCommandRequest represents the request body for command parsing // ParseCommandRequest represents the request body for backend command parsing
type ParseCommandRequest struct { type ParseCommandRequest struct {
Command string `json:"command"` Command string `json:"command"`
} }
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc { // validateLlamaCppInstance validates that the instance specified in the request is a llama.cpp instance
return func(w http.ResponseWriter, r *http.Request) { func (h *Handler) validateLlamaCppInstance(r *http.Request) (*instance.Instance, error) {
inst, err := h.getInstance(r)
// Get the instance name from the URL parameter
name := chi.URLParam(r, "name")
// Validate instance name at the entry point
validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) return nil, fmt.Errorf("invalid instance: %w", err)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
} }
options := inst.GetOptions() options := inst.GetOptions()
if options == nil { if options == nil {
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError) return nil, fmt.Errorf("cannot obtain instance's options")
return
} }
if options.BackendOptions.BackendType != backends.BackendTypeLlamaCpp { if options.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest) return nil, fmt.Errorf("instance is not a llama.cpp server")
}
return inst, nil
}
// stripLlamaCppPrefix removes the llama.cpp proxy prefix from the request URL path
func (h *Handler) stripLlamaCppPrefix(r *http.Request, instName string) {
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", instName)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
}
// LlamaCppUIProxy godoc
// @Summary Proxy requests to llama.cpp UI for the instance
// @Description Proxies requests to the llama.cpp UI for the specified instance
// @Tags backends
// @Security ApiKeyAuth
// @Produce html
// @Param name query string true "Instance Name"
// @Success 200 {string} string "Proxied HTML response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /llama-cpp/{name}/ [get]
func (h *Handler) LlamaCppUIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return return
} }
if !inst.IsRemote() && !inst.IsRunning() { if !inst.IsRemote() && !inst.IsRunning() {
writeError(w, http.StatusBadRequest, "instance is not running", "Instance is not running")
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return return
} }
if h.InstanceManager.IsMaxRunningInstancesReached() { proxy, err := inst.GetProxy()
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil { if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error())
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(validatedName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
// Wait for the instance to become healthy before proceeding if !inst.IsRemote() {
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout h.stripLlamaCppPrefix(r, inst.Name)
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) }
proxy.ServeHTTP(w, r)
}
}
// LlamaCppProxy godoc
// @Summary Proxy requests to llama.cpp server instance
// @Description Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured
// @Tags backends
// @Security ApiKeyAuth
// @Produce json
// @Param name query string true "Instance Name"
// @Success 200 {object} map[string]any "Proxied response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /llama-cpp/{name}/* [post]
func (h *Handler) LlamaCppProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
return return
} }
} }
proxy, err := inst.GetProxy() proxy, err := inst.GetProxy()
if err != nil { if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error())
return return
} }
if !inst.IsRemote() { if !inst.IsRemote() {
// Strip the "/llama-cpp/<name>" prefix from the request URL h.stripLlamaCppPrefix(r, inst.Name)
prefix := fmt.Sprintf("/llama-cpp/%s", validatedName)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
} }
proxy.ServeHTTP(w, r) proxy.ServeHTTP(w, r)
} }
} }
// parseHelper parses a backend command and returns the parsed options
func parseHelper(w http.ResponseWriter, r *http.Request, backend interface {
ParseCommand(string) (any, error)
}) (any, bool) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return nil, false
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return nil, false
}
// Parse command using the backend's ParseCommand method
parsedOptions, err := backend.ParseCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return nil, false
}
return parsedOptions, true
}
// ParseLlamaCommand godoc // ParseLlamaCommand godoc
// @Summary Parse llama-server command // @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options // @Description Parses a llama-server command string into instance options
@@ -111,40 +160,20 @@ func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
// @Failure 500 {object} map[string]string "Internal Server Error" // @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post] // @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc { func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest parsedOptions, ok := parseHelper(w, r, &backends.LlamaServerOptions{})
if err := json.NewDecoder(r.Body).Decode(&req); err != nil { if !ok {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := backends.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return return
} }
options := &instance.Options{ options := &instance.Options{
BackendOptions: backends.Options{ BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp, BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions, LlamaServerOptions: parsedOptions.(*backends.LlamaServerOptions),
}, },
} }
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil { writeJSON(w, http.StatusOK, options)
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
} }
} }
@@ -160,47 +189,20 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
// @Failure 400 {object} map[string]string "Invalid request or command" // @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post] // @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc { func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest parsedOptions, ok := parseHelper(w, r, &backends.MlxServerOptions{})
if err := json.NewDecoder(r.Body).Decode(&req); err != nil { if !ok {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return return
} }
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := backends.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.Options{ options := &instance.Options{
BackendOptions: backends.Options{ BackendOptions: backends.Options{
BackendType: backendType, BackendType: backends.BackendTypeMlxLm,
MlxServerOptions: mlxOptions, MlxServerOptions: parsedOptions.(*backends.MlxServerOptions),
}, },
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, options)
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
} }
} }
@@ -216,46 +218,33 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
// @Failure 400 {object} map[string]string "Invalid request or command" // @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post] // @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc { func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest parsedOptions, ok := parseHelper(w, r, &backends.VllmServerOptions{})
if err := json.NewDecoder(r.Body).Decode(&req); err != nil { if !ok {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return return
} }
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := backends.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.Options{ options := &instance.Options{
BackendOptions: backends.Options{ BackendOptions: backends.Options{
BackendType: backendType, BackendType: backends.BackendTypeVllm,
VllmServerOptions: vllmOptions, VllmServerOptions: parsedOptions.(*backends.VllmServerOptions),
}, },
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, options)
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
} }
}
// executeLlamaServerCommand executes a llama-server command with the specified flag and returns the output
func (h *Handler) executeLlamaServerCommand(flag, errorMsg string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
cmd := exec.Command("llama-server", flag)
output, err := cmd.CombinedOutput()
if err != nil {
writeError(w, http.StatusInternalServerError, "command failed", errorMsg+": "+err.Error())
return
}
writeText(w, http.StatusOK, string(output))
} }
} }
@@ -269,16 +258,7 @@ func (h *Handler) ParseVllmCommand() http.HandlerFunc {
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get] // @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc { func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return h.executeLlamaServerCommand("--help", "Failed to get help")
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
} }
// LlamaServerVersionHandler godoc // LlamaServerVersionHandler godoc
@@ -291,16 +271,7 @@ func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get] // @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc { func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return h.executeLlamaServerCommand("--version", "Failed to get version")
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
} }
// LlamaServerListDevicesHandler godoc // LlamaServerListDevicesHandler godoc
@@ -313,14 +284,5 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get] // @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc { func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
} }

View File

@@ -26,15 +26,11 @@ func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances() instances, err := h.InstanceManager.ListInstances()
if err != nil { if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, instances)
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -54,31 +50,25 @@ func (h *Handler) ListInstances() http.HandlerFunc {
func (h *Handler) CreateInstance() http.HandlerFunc { func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
var options instance.Options var options instance.Options
if err := json.NewDecoder(r.Body).Decode(&options); err != nil { if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return return
} }
inst, err := h.InstanceManager.CreateInstance(validatedName, &options) inst, err := h.InstanceManager.CreateInstance(validatedName, &options)
if err != nil { if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "create_failed", "Failed to create instance: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusCreated, inst)
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -96,24 +86,19 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
func (h *Handler) GetInstance() http.HandlerFunc { func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
inst, err := h.InstanceManager.GetInstance(validatedName) inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil { if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, inst)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -133,30 +118,25 @@ func (h *Handler) GetInstance() http.HandlerFunc {
func (h *Handler) UpdateInstance() http.HandlerFunc { func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
var options instance.Options var options instance.Options
if err := json.NewDecoder(r.Body).Decode(&options); err != nil { if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return return
} }
inst, err := h.InstanceManager.UpdateInstance(validatedName, &options) inst, err := h.InstanceManager.UpdateInstance(validatedName, &options)
if err != nil { if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "update_failed", "Failed to update instance: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, inst)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -174,10 +154,9 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
func (h *Handler) StartInstance() http.HandlerFunc { func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
@@ -185,19 +164,15 @@ func (h *Handler) StartInstance() http.HandlerFunc {
if err != nil { if err != nil {
// Check if error is due to maximum running instances limit // Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok { if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict) writeError(w, http.StatusConflict, "max_instances_reached", err.Error())
return return
} }
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "start_failed", "Failed to start instance: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, inst)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -215,24 +190,19 @@ func (h *Handler) StartInstance() http.HandlerFunc {
func (h *Handler) StopInstance() http.HandlerFunc { func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
inst, err := h.InstanceManager.StopInstance(validatedName) inst, err := h.InstanceManager.StopInstance(validatedName)
if err != nil { if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "stop_failed", "Failed to stop instance: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, inst)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -250,24 +220,19 @@ func (h *Handler) StopInstance() http.HandlerFunc {
func (h *Handler) RestartInstance() http.HandlerFunc { func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
inst, err := h.InstanceManager.RestartInstance(validatedName) inst, err := h.InstanceManager.RestartInstance(validatedName)
if err != nil { if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "restart_failed", "Failed to restart instance: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, inst)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -284,15 +249,14 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
func (h *Handler) DeleteInstance() http.HandlerFunc { func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
if err := h.InstanceManager.DeleteInstance(validatedName); err != nil { if err := h.InstanceManager.DeleteInstance(validatedName); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "delete_failed", "Failed to delete instance: "+err.Error())
return return
} }
@@ -315,10 +279,9 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
func (h *Handler) GetInstanceLogs() http.HandlerFunc { func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name) validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
@@ -327,7 +290,7 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
if lines != "" { if lines != "" {
parsedLines, err := strconv.Atoi(lines) parsedLines, err := strconv.Atoi(lines)
if err != nil { if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_parameter", "Invalid lines parameter: "+err.Error())
return return
} }
numLines = parsedLines numLines = parsedLines
@@ -336,17 +299,16 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
// Use the instance manager which handles both local and remote instances // Use the instance manager which handles both local and remote instances
logs, err := h.InstanceManager.GetInstanceLogs(validatedName, numLines) logs, err := h.InstanceManager.GetInstanceLogs(validatedName, numLines)
if err != nil { if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "logs_failed", "Failed to get logs: "+err.Error())
return return
} }
w.Header().Set("Content-Type", "text/plain") writeText(w, http.StatusOK, logs)
w.Write([]byte(logs))
} }
} }
// ProxyToInstance godoc // InstanceProxy godoc
// @Summary Proxy requests to a specific instance // @Summary Proxy requests to a specific instance, does not autostart instance if stopped
// @Description Forwards HTTP requests to the llama-server instance running on a specific port // @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances // @Tags instances
// @Security ApiKeyAuth // @Security ApiKeyAuth
@@ -357,38 +319,28 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
// @Failure 503 {string} string "Instance is not running" // @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get] // @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post] // @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc { func (h *Handler) InstanceProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") inst, err := h.getInstance(r)
validatedName, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
if !inst.IsRunning() { if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) writeError(w, http.StatusServiceUnavailable, "instance_not_running", "Instance is not running")
return return
} }
// Get the cached proxy for this instance
proxy, err := inst.GetProxy() proxy, err := inst.GetProxy()
if err != nil { if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "proxy_failed", "Failed to get proxy: "+err.Error())
return return
} }
// Check if this is a remote instance
if !inst.IsRemote() { if !inst.IsRemote() {
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL // Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", validatedName) prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", inst.Name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix) r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
} }
@@ -396,7 +348,6 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host")) r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http") r.Header.Set("X-Forwarded-Proto", "http")
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r) proxy.ServeHTTP(w, r)
} }
} }

View File

@@ -1,13 +1,12 @@
package server package server
import ( import (
"encoding/json"
"net/http" "net/http"
"github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
) )
// NodeResponse represents a sanitized node configuration for API responses // NodeResponse represents a node configuration in API responses
type NodeResponse struct { type NodeResponse struct {
Address string `json:"address"` Address string `json:"address"`
} }
@@ -31,11 +30,7 @@ func (h *Handler) ListNodes() http.HandlerFunc {
} }
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, nodeResponses)
if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -55,13 +50,13 @@ func (h *Handler) GetNode() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name") name := chi.URLParam(r, "name")
if name == "" { if name == "" {
http.Error(w, "Node name cannot be empty", http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_request", "Node name cannot be empty")
return return
} }
nodeConfig, exists := h.cfg.Nodes[name] nodeConfig, exists := h.cfg.Nodes[name]
if !exists { if !exists {
http.Error(w, "Node not found", http.StatusNotFound) writeError(w, http.StatusNotFound, "not_found", "Node not found")
return return
} }
@@ -70,10 +65,6 @@ func (h *Handler) GetNode() http.HandlerFunc {
Address: nodeConfig.Address, Address: nodeConfig.Address,
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, nodeResponse)
if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }

View File

@@ -8,6 +8,20 @@ import (
"net/http" "net/http"
) )
// OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format
type OpenAIListInstancesResponse struct {
Object string `json:"object"`
Data []OpenAIInstance `json:"data"`
}
// OpenAIInstance represents a single instance (model) in OpenAI-compatible format
type OpenAIInstance struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}
// OpenAIListInstances godoc // OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format // @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API // @Description Returns a list of instances in a format compatible with OpenAI API
@@ -21,7 +35,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances() instances, err := h.InstanceManager.ListInstances()
if err != nil { if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
return return
} }
@@ -40,11 +54,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
Data: openaiInstances, Data: openaiInstances,
} }
w.Header().Set("Content-Type", "application/json") writeJSON(w, http.StatusOK, openaiResponse)
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
} }
} }
@@ -64,7 +74,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
// Read the entire body first // Read the entire body first
bodyBytes, err := io.ReadAll(r.Body) bodyBytes, err := io.ReadAll(r.Body)
if err != nil { if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_request", "Failed to read request body")
return return
} }
r.Body.Close() r.Body.Close()
@@ -72,67 +82,41 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
// Parse the body to extract instance name // Parse the body to extract instance name
var requestBody map[string]any var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil { if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return return
} }
modelName, ok := requestBody["model"].(string) modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" { if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_request", "Instance name is required")
return return
} }
// Validate instance name at the entry point // Validate instance name at the entry point
validatedName, err := validation.ValidateInstanceName(modelName) validatedName, err := validation.ValidateInstanceName(modelName)
if err != nil { if err != nil {
http.Error(w, "Invalid instance name: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
} }
// Route to the appropriate inst based on instance name // Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(validatedName) inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil { if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return return
} }
if !inst.IsRemote() && !inst.IsRunning() { if !inst.IsRemote() && !inst.IsRunning() {
options := inst.GetOptions() err := h.ensureInstanceRunning(inst)
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil { if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "instance_start_failed", err.Error())
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(validatedName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return return
} }
} }
proxy, err := inst.GetProxy() proxy, err := inst.GetProxy()
if err != nil { if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) writeError(w, http.StatusInternalServerError, "proxy_failed", err.Error())
return return
} }

View File

@@ -16,7 +16,7 @@ import (
// @Router /version [get] // @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc { func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain") versionInfo := fmt.Sprintf("Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime) writeText(w, http.StatusOK, versionInfo)
} }
} }

View File

@@ -1,13 +0,0 @@
package server
type OpenAIListInstancesResponse struct {
Object string `json:"object"`
Data []OpenAIInstance `json:"data"`
}
type OpenAIInstance struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}

View File

@@ -86,7 +86,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server) // Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
r.Route("/proxy", func(r chi.Router) { r.Route("/proxy", func(r chi.Router) {
r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests r.HandleFunc("/*", handler.InstanceProxy()) // Proxy all llama.cpp server requests
}) })
}) })
}) })
@@ -117,7 +117,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Public Routes // Public Routes
// Allow llama-cpp server to serve its own WebUI if it is running. // Allow llama-cpp server to serve its own WebUI if it is running.
// Don't auto start the server since it can be accessed without an API key // Don't auto start the server since it can be accessed without an API key
r.Get("/", handler.LlamaCppProxy(false)) r.Get("/", handler.LlamaCppUIProxy())
// Private Routes // Private Routes
r.Group(func(r chi.Router) { r.Group(func(r chi.Router) {
@@ -127,7 +127,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
} }
// This handler auto start the server if it's not running // This handler auto start the server if it's not running
llamaCppHandler := handler.LlamaCppProxy(true) llamaCppHandler := handler.LlamaCppProxy()
// llama.cpp server specific proxy endpoints // llama.cpp server specific proxy endpoints
r.Get("/props", llamaCppHandler) r.Get("/props", llamaCppHandler)