diff --git a/README.md b/README.md index a2a1e48..2a24520 100644 --- a/README.md +++ b/README.md @@ -2,30 +2,35 @@ ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) -**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.** +**Unified management and routing for llama.cpp and MLX models with web dashboard.** -## Why llamactl? +## Features -🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) -🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name -🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) -🔐 **API Key Authentication**: Separate keys for management vs inference access -📊 **Instance Monitoring**: Health checks, auto-restart, log management -⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits -💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests -💾 **State Persistence**: Ensure instances remain intact across server restarts +### 🚀 Easy Model Management +- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) +- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests +- **State Persistence**: Ensure instances remain intact across server restarts + +### 🔗 Universal Compatibility +- **OpenAI API Compatible**: Drop-in replacement - route requests by model name +- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized) + +### 🌐 User-Friendly Interface +- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) +- **API Key Authentication**: Separate keys for management vs inference access + +### ⚡ Smart Operations +- **Instance Monitoring**: Health checks, auto-restart, log management +- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits ![Dashboard Screenshot](docs/images/dashboard.png) -**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances -**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations -**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management - ## Quick Start ```bash -# 1. Install llama-server (one-time setup) -# See: https://github.com/ggml-org/llama.cpp#quick-start +# 1. Install backend (one-time setup) +# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start +# For MLX on macOS: pip install mlx-lm # 2. Download and run llamactl LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') @@ -42,15 +47,21 @@ llamactl ### Create and manage instances via web dashboard: 1. Open http://localhost:8080 2. Click "Create Instance" -3. Set model path and GPU layers -4. Start or stop the instance +3. Choose backend type (llama.cpp or MLX) +4. Set model path and backend-specific options +5. Start or stop the instance ### Or use the REST API: ```bash -# Create instance +# Create llama.cpp instance curl -X POST localhost:8080/api/v1/instances/my-7b-model \ -H "Authorization: Bearer your-key" \ - -d '{"model": "/path/to/model.gguf", "gpu_layers": 32}' + -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}' + +# Create MLX instance (macOS) +curl -X POST localhost:8080/api/v1/instances/my-mlx-model \ + -H "Authorization: Bearer your-key" \ + -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' # Use with OpenAI SDK curl -X POST localhost:8080/v1/chat/completions \ @@ -85,16 +96,31 @@ go build -o llamactl ./cmd/server ## Prerequisites +### Backend Dependencies + +**For llama.cpp backend:** You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed: ```bash -# Quick install methods: # Homebrew (macOS) brew install llama.cpp # Or build from source - see llama.cpp docs ``` +**For MLX backend (macOS only):** +You need MLX-LM installed: + +```bash +# Install via pip (requires Python 3.8+) +pip install mlx-lm + +# Or in a virtual environment (recommended) +python -m venv mlx-env +source mlx-env/bin/activate +pip install mlx-lm +``` + ## Configuration llamactl works out of the box with sensible defaults. @@ -106,6 +132,10 @@ server: allowed_origins: ["*"] # Allowed CORS origins (default: all) enable_swagger: false # Enable Swagger UI for API docs +backends: + llama_executable: llama-server # Path to llama-server executable + mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable + instances: port_range: [8000, 9000] # Port range for instances data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below) @@ -115,7 +145,6 @@ instances: max_instances: -1 # Max instances (-1 = unlimited) max_running_instances: -1 # Max running instances (-1 = unlimited) enable_lru_eviction: true # Enable LRU eviction for idle instances - llama_executable: llama-server # Path to llama-server executable default_auto_restart: true # Auto-restart new instances by default default_max_restarts: 3 # Max restarts for new instances default_restart_delay: 5 # Restart delay (seconds) for new instances diff --git a/cmd/server/main.go b/cmd/server/main.go index 7433c78..e245ebf 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -58,7 +58,7 @@ func main() { } // Initialize the instance manager - instanceManager := manager.NewInstanceManager(cfg.Instances) + instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances) // Create a new handler with the instance manager handler := server.NewHandler(instanceManager, cfg) diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 64b097a..f8003ef 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -19,6 +19,10 @@ server: allowed_origins: ["*"] # Allowed CORS origins (default: all) enable_swagger: false # Enable Swagger UI for API docs +backends: + llama_executable: llama-server # Path to llama-server executable + mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable + instances: port_range: [8000, 9000] # Port range for instances data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below) @@ -28,7 +32,6 @@ instances: max_instances: -1 # Max instances (-1 = unlimited) max_running_instances: -1 # Max running instances (-1 = unlimited) enable_lru_eviction: true # Enable LRU eviction for idle instances - llama_executable: llama-server # Path to llama-server executable default_auto_restart: true # Auto-restart new instances by default default_max_restarts: 3 # Max restarts for new instances default_restart_delay: 5 # Restart delay (seconds) for new instances @@ -79,11 +82,23 @@ server: enable_swagger: false # Enable Swagger UI (default: false) ``` -**Environment Variables:** -- `LLAMACTL_HOST` - Server host -- `LLAMACTL_PORT` - Server port -- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins -- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false) +**Environment Variables:** +- `LLAMACTL_HOST` - Server host +- `LLAMACTL_PORT` - Server port +- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins +- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false) + +### Backend Configuration + +```yaml +backends: + llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server") + mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server") +``` + +**Environment Variables:** +- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable +- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable ### Instance Configuration @@ -97,7 +112,6 @@ instances: max_instances: -1 # Maximum instances (-1 = unlimited) max_running_instances: -1 # Maximum running instances (-1 = unlimited) enable_lru_eviction: true # Enable LRU eviction for idle instances - llama_executable: "llama-server" # Path to llama-server executable default_auto_restart: true # Default auto-restart setting default_max_restarts: 3 # Default maximum restart attempts default_restart_delay: 5 # Default restart delay in seconds @@ -113,9 +127,8 @@ instances: - `LLAMACTL_LOGS_DIR` - Log directory path - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false) - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances -- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances -- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances -- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable +- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances +- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false) - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 90f78a8..a3ceae6 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -4,11 +4,14 @@ This guide will walk you through installing Llamactl on your system. ## Prerequisites +### Backend Dependencies + +llamactl supports multiple backends. Install at least one: + +**For llama.cpp backend (all platforms):** + You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed: - -**Quick install methods:** - ```bash # Homebrew (macOS/Linux) brew install llama.cpp @@ -18,6 +21,22 @@ winget install llama.cpp Or build from source - see llama.cpp docs +**For MLX backend (macOS only):** + +MLX provides optimized inference on Apple Silicon. Install MLX-LM: + +```bash +# Install via pip (requires Python 3.8+) +pip install mlx-lm + +# Or in a virtual environment (recommended) +python -m venv mlx-env +source mlx-env/bin/activate +pip install mlx-lm +``` + +Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.) + ## Installation Methods ### Option 1: Download Binary (Recommended) diff --git a/docs/index.md b/docs/index.md index d3e7bb9..585363c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,22 +1,23 @@ # Llamactl Documentation -Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.** +Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.** ![Dashboard Screenshot](images/dashboard.png) ## What is Llamactl? -Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management. +Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support. ## Features -🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) -🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name -🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) -🔐 **API Key Authentication**: Separate keys for management vs inference access -📊 **Instance Monitoring**: Health checks, auto-restart, log management -⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits -💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests +🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) +🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name +🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized) +🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) +🔐 **API Key Authentication**: Separate keys for management vs inference access +📊 **Instance Monitoring**: Health checks, auto-restart, log management +⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits +💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests 💾 **State Persistence**: Ensure instances remain intact across server restarts ## Quick Links diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md index 90e4552..186670c 100644 --- a/docs/user-guide/managing-instances.md +++ b/docs/user-guide/managing-instances.md @@ -1,6 +1,6 @@ # Managing Instances -Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API. +Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API. ## Overview @@ -39,40 +39,55 @@ Each instance is displayed as a card showing: 1. Click the **"Create Instance"** button on the dashboard 2. Enter a unique **Name** for your instance (only required field) -3. Configure model source (choose one): - - **Model Path**: Full path to your downloaded GGUF model file - - **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`) - - **HuggingFace File**: Specific file within the repo (optional, uses default if not specified) -4. Configure optional instance management settings: +3. **Choose Backend Type**: + - **llama.cpp**: For GGUF models using llama-server + - **MLX**: For MLX-optimized models (macOS only) +4. Configure model source: + - **For llama.cpp**: GGUF model path or HuggingFace repo + - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`) +5. Configure optional instance management settings: - **Auto Restart**: Automatically restart instance on failure - **Max Restarts**: Maximum number of restart attempts - **Restart Delay**: Delay in seconds between restart attempts - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) -5. Configure optional llama-server backend options: - - **Threads**: Number of CPU threads to use - - **Context Size**: Context window size (ctx_size) - - **GPU Layers**: Number of layers to offload to GPU - - **Port**: Network port (auto-assigned by llamactl if not specified) - - **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)) -6. Click **"Create"** to save the instance +6. Configure backend-specific options: + - **llama.cpp**: Threads, context size, GPU layers, port, etc. + - **MLX**: Temperature, top-p, adapter path, Python environment, etc. +7. Click **"Create"** to save the instance ### Via API ```bash -# Create instance with local model file -curl -X POST http://localhost:8080/api/instances/my-instance \ +# Create llama.cpp instance with local model file +curl -X POST http://localhost:8080/api/instances/my-llama-instance \ -H "Content-Type: application/json" \ -d '{ "backend_type": "llama_cpp", "backend_options": { "model": "/path/to/model.gguf", "threads": 8, - "ctx_size": 4096 + "ctx_size": 4096, + "gpu_layers": 32 } }' -# Create instance with HuggingFace model +# Create MLX instance (macOS only) +curl -X POST http://localhost:8080/api/instances/my-mlx-instance \ + -H "Content-Type: application/json" \ + -d '{ + "backend_type": "mlx_lm", + "backend_options": { + "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", + "temp": 0.7, + "top_p": 0.9, + "max_tokens": 2048 + }, + "auto_restart": true, + "max_restarts": 3 + }' + +# Create llama.cpp instance with HuggingFace model curl -X POST http://localhost:8080/api/instances/gemma-3-27b \ -H "Content-Type: application/json" \ -d '{ @@ -81,9 +96,7 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \ "hf_repo": "unsloth/gemma-3-27b-it-GGUF", "hf_file": "gemma-3-27b-it-GGUF.gguf", "gpu_layers": 32 - }, - "auto_restart": true, - "max_restarts": 3 + } }' ``` @@ -166,14 +179,16 @@ curl -X DELETE http://localhost:8080/api/instances/{name} ## Instance Proxy -Llamactl proxies all requests to the underlying llama-server instances. +Llamactl proxies all requests to the underlying backend instances (llama-server or MLX). ```bash # Get instance details curl http://localhost:8080/api/instances/{name}/proxy/ ``` -Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information. +Both backends provide OpenAI-compatible endpoints. Check the respective documentation: +- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) +- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md) ### Instance Health diff --git a/pkg/backends/backend.go b/pkg/backends/backend.go index c28a2cc..0270945 100644 --- a/pkg/backends/backend.go +++ b/pkg/backends/backend.go @@ -4,4 +4,6 @@ type BackendType string const ( BackendTypeLlamaCpp BackendType = "llama_cpp" + BackendTypeMlxLm BackendType = "mlx_lm" + // BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion ) diff --git a/pkg/backends/mlx/mlx.go b/pkg/backends/mlx/mlx.go new file mode 100644 index 0000000..c3324d2 --- /dev/null +++ b/pkg/backends/mlx/mlx.go @@ -0,0 +1,205 @@ +package mlx + +import ( + "encoding/json" + "reflect" + "strconv" +) + +type MlxServerOptions struct { + // Basic connection options + Model string `json:"model,omitempty"` + Host string `json:"host,omitempty"` + Port int `json:"port,omitempty"` + + // Model and adapter options + AdapterPath string `json:"adapter_path,omitempty"` + DraftModel string `json:"draft_model,omitempty"` + NumDraftTokens int `json:"num_draft_tokens,omitempty"` + TrustRemoteCode bool `json:"trust_remote_code,omitempty"` + + // Logging and templates + LogLevel string `json:"log_level,omitempty"` + ChatTemplate string `json:"chat_template,omitempty"` + UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"` + ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string + + // Sampling defaults + Temp float64 `json:"temp,omitempty"` // Note: MLX uses "temp" not "temperature" + TopP float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + MinP float64 `json:"min_p,omitempty"` + MaxTokens int `json:"max_tokens,omitempty"` +} + +// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names +func (o *MlxServerOptions) UnmarshalJSON(data []byte) error { + // First unmarshal into a map to handle multiple field names + var raw map[string]any + if err := json.Unmarshal(data, &raw); err != nil { + return err + } + + // Create a temporary struct for standard unmarshaling + type tempOptions MlxServerOptions + temp := tempOptions{} + + // Standard unmarshal first + if err := json.Unmarshal(data, &temp); err != nil { + return err + } + + // Copy to our struct + *o = MlxServerOptions(temp) + + // Handle alternative field names + fieldMappings := map[string]string{ + // Basic connection options + "m": "model", + "host": "host", + "port": "port", +// "python_path": "python_path", // removed + + // Model and adapter options + "adapter-path": "adapter_path", + "draft-model": "draft_model", + "num-draft-tokens": "num_draft_tokens", + "trust-remote-code": "trust_remote_code", + + // Logging and templates + "log-level": "log_level", + "chat-template": "chat_template", + "use-default-chat-template": "use_default_chat_template", + "chat-template-args": "chat_template_args", + + // Sampling defaults + "temperature": "temp", // Support both temp and temperature + "top-p": "top_p", + "top-k": "top_k", + "min-p": "min_p", + "max-tokens": "max_tokens", + } + + // Process alternative field names + for altName, canonicalName := range fieldMappings { + if value, exists := raw[altName]; exists { + // Use reflection to set the field value + v := reflect.ValueOf(o).Elem() + field := v.FieldByNameFunc(func(fieldName string) bool { + field, _ := v.Type().FieldByName(fieldName) + jsonTag := field.Tag.Get("json") + return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName + }) + + if field.IsValid() && field.CanSet() { + switch field.Kind() { + case reflect.Int: + if intVal, ok := value.(float64); ok { + field.SetInt(int64(intVal)) + } else if strVal, ok := value.(string); ok { + if intVal, err := strconv.Atoi(strVal); err == nil { + field.SetInt(int64(intVal)) + } + } + case reflect.Float64: + if floatVal, ok := value.(float64); ok { + field.SetFloat(floatVal) + } else if strVal, ok := value.(string); ok { + if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil { + field.SetFloat(floatVal) + } + } + case reflect.String: + if strVal, ok := value.(string); ok { + field.SetString(strVal) + } + case reflect.Bool: + if boolVal, ok := value.(bool); ok { + field.SetBool(boolVal) + } + } + } + } + } + + return nil +} + +// NewMlxServerOptions creates MlxServerOptions with MLX defaults +func NewMlxServerOptions() *MlxServerOptions { + return &MlxServerOptions{ + Host: "127.0.0.1", // MLX default (different from llama-server) + Port: 8080, // MLX default + NumDraftTokens: 3, // MLX default for speculative decoding + LogLevel: "INFO", // MLX default + Temp: 0.0, // MLX default + TopP: 1.0, // MLX default + TopK: 0, // MLX default (disabled) + MinP: 0.0, // MLX default (disabled) + MaxTokens: 512, // MLX default + ChatTemplateArgs: "{}", // MLX default (empty JSON object) + } +} + +// BuildCommandArgs converts to command line arguments +func (o *MlxServerOptions) BuildCommandArgs() []string { + var args []string + + // Required and basic options + if o.Model != "" { + args = append(args, "--model", o.Model) + } + if o.Host != "" { + args = append(args, "--host", o.Host) + } + if o.Port != 0 { + args = append(args, "--port", strconv.Itoa(o.Port)) + } + + // Model and adapter options + if o.AdapterPath != "" { + args = append(args, "--adapter-path", o.AdapterPath) + } + if o.DraftModel != "" { + args = append(args, "--draft-model", o.DraftModel) + } + if o.NumDraftTokens != 0 { + args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens)) + } + if o.TrustRemoteCode { + args = append(args, "--trust-remote-code") + } + + // Logging and templates + if o.LogLevel != "" { + args = append(args, "--log-level", o.LogLevel) + } + if o.ChatTemplate != "" { + args = append(args, "--chat-template", o.ChatTemplate) + } + if o.UseDefaultChatTemplate { + args = append(args, "--use-default-chat-template") + } + if o.ChatTemplateArgs != "" { + args = append(args, "--chat-template-args", o.ChatTemplateArgs) + } + + // Sampling defaults + if o.Temp != 0 { + args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64)) + } + if o.TopP != 0 { + args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64)) + } + if o.TopK != 0 { + args = append(args, "--top-k", strconv.Itoa(o.TopK)) + } + if o.MinP != 0 { + args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64)) + } + if o.MaxTokens != 0 { + args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens)) + } + + return args +} \ No newline at end of file diff --git a/pkg/backends/mlx/parser.go b/pkg/backends/mlx/parser.go new file mode 100644 index 0000000..96b04a9 --- /dev/null +++ b/pkg/backends/mlx/parser.go @@ -0,0 +1,254 @@ +package mlx + +import ( + "encoding/json" + "fmt" + "path/filepath" + "regexp" + "strconv" + "strings" +) + +// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions +// Supports multiple formats: +// 1. Full command: "mlx_lm.server --model model/path" +// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path" +// 3. Args only: "--model model/path --host 0.0.0.0" +// 4. Multiline commands with backslashes +func ParseMlxCommand(command string) (*MlxServerOptions, error) { + // 1. Normalize the command - handle multiline with backslashes + trimmed := normalizeMultilineCommand(command) + if trimmed == "" { + return nil, fmt.Errorf("command cannot be empty") + } + + // 2. Extract arguments from command + args, err := extractArgumentsFromCommand(trimmed) + if err != nil { + return nil, err + } + + // 3. Parse arguments into map + options := make(map[string]any) + + i := 0 + for i < len(args) { + arg := args[i] + + if !strings.HasPrefix(arg, "-") { // skip positional / stray values + i++ + continue + } + + // Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes + if strings.HasPrefix(arg, "---") { + return nil, fmt.Errorf("malformed flag: %s", arg) + } + + // Unified parsing for --flag=value vs --flag value + var rawFlag, rawValue string + hasEquals := false + if strings.Contains(arg, "=") { + parts := strings.SplitN(arg, "=", 2) + rawFlag = parts[0] + rawValue = parts[1] // may be empty string + hasEquals = true + } else { + rawFlag = arg + } + + flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-") + flagName := strings.ReplaceAll(flagCore, "-", "_") + + // Detect value if not in equals form + valueProvided := hasEquals + if !hasEquals { + if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value + rawValue = args[i+1] + valueProvided = true + } + } + + if valueProvided { + // MLX-specific validation for certain flags + if flagName == "log_level" && !isValidLogLevel(rawValue) { + return nil, fmt.Errorf("invalid log level: %s", rawValue) + } + + options[flagName] = parseValue(rawValue) + + // Advance index: if we consumed a following token as value (non equals form), skip it + if !hasEquals && i+1 < len(args) && rawValue == args[i+1] { + i += 2 + } else { + i++ + } + continue + } + + // Boolean flag (no value) - MLX specific boolean flags + if flagName == "trust_remote_code" || flagName == "use_default_chat_template" { + options[flagName] = true + } else { + options[flagName] = true + } + i++ + } + + // 4. Convert to MlxServerOptions using existing UnmarshalJSON + jsonData, err := json.Marshal(options) + if err != nil { + return nil, fmt.Errorf("failed to marshal parsed options: %w", err) + } + + var mlxOptions MlxServerOptions + if err := json.Unmarshal(jsonData, &mlxOptions); err != nil { + return nil, fmt.Errorf("failed to parse command options: %w", err) + } + + // 5. Return MlxServerOptions + return &mlxOptions, nil +} + +// isValidLogLevel validates MLX log levels +func isValidLogLevel(level string) bool { + validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"} + for _, valid := range validLevels { + if level == valid { + return true + } + } + return false +} + +// parseValue attempts to parse a string value into the most appropriate type +func parseValue(value string) any { + // Surrounding matching quotes (single or double) + if l := len(value); l >= 2 { + if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') { + value = value[1 : l-1] + } + } + + lower := strings.ToLower(value) + if lower == "true" { + return true + } + if lower == "false" { + return false + } + + if intVal, err := strconv.Atoi(value); err == nil { + return intVal + } + if floatVal, err := strconv.ParseFloat(value, 64); err == nil { + return floatVal + } + return value +} + +// normalizeMultilineCommand handles multiline commands with backslashes +func normalizeMultilineCommand(command string) string { + // Handle escaped newlines (backslash followed by newline) + re := regexp.MustCompile(`\\\s*\n\s*`) + normalized := re.ReplaceAllString(command, " ") + + // Clean up extra whitespace + re = regexp.MustCompile(`\s+`) + normalized = re.ReplaceAllString(normalized, " ") + + return strings.TrimSpace(normalized) +} + +// extractArgumentsFromCommand extracts arguments from various command formats +func extractArgumentsFromCommand(command string) ([]string, error) { + // Split command into tokens respecting quotes + tokens, err := splitCommandTokens(command) + if err != nil { + return nil, err + } + + if len(tokens) == 0 { + return nil, fmt.Errorf("no command tokens found") + } + + // Check if first token looks like an executable + firstToken := tokens[0] + + // Case 1: Full path to executable (contains path separator or ends with mlx_lm.server) + if strings.Contains(firstToken, string(filepath.Separator)) || + strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") { + return tokens[1:], nil // Return everything except the executable + } + + // Case 2: Just "mlx_lm.server" command + if strings.ToLower(firstToken) == "mlx_lm.server" { + return tokens[1:], nil // Return everything except the command + } + + // Case 3: Arguments only (starts with a flag) + if strings.HasPrefix(firstToken, "-") { + return tokens, nil // Return all tokens as arguments + } + + // Case 4: Unknown format - might be a different executable name + // Be permissive and assume it's the executable + return tokens[1:], nil +} + +// splitCommandTokens splits a command string into tokens, respecting quotes +func splitCommandTokens(command string) ([]string, error) { + var tokens []string + var current strings.Builder + inQuotes := false + quoteChar := byte(0) + escaped := false + + for i := 0; i < len(command); i++ { + c := command[i] + + if escaped { + current.WriteByte(c) + escaped = false + continue + } + + if c == '\\' { + escaped = true + current.WriteByte(c) + continue + } + + if !inQuotes && (c == '"' || c == '\'') { + inQuotes = true + quoteChar = c + current.WriteByte(c) + } else if inQuotes && c == quoteChar { + inQuotes = false + quoteChar = 0 + current.WriteByte(c) + } else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') { + if current.Len() > 0 { + tokens = append(tokens, current.String()) + current.Reset() + } + } else { + current.WriteByte(c) + } + } + + if inQuotes { + return nil, fmt.Errorf("unclosed quote in command") + } + + if current.Len() > 0 { + tokens = append(tokens, current.String()) + } + + return tokens, nil +} + +// isFlag checks if a string looks like a command line flag +func isFlag(s string) bool { + return strings.HasPrefix(s, "-") +} \ No newline at end of file diff --git a/pkg/config/config.go b/pkg/config/config.go index 5017662..28087db 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -10,9 +10,19 @@ import ( "gopkg.in/yaml.v3" ) +// BackendConfig contains backend executable configurations +type BackendConfig struct { + // Path to llama-server executable (llama.cpp backend) + LlamaExecutable string `yaml:"llama_executable"` + + // Path to mlx_lm executable (MLX-LM backend) + MLXLMExecutable string `yaml:"mlx_lm_executable"` +} + // AppConfig represents the configuration for llamactl type AppConfig struct { Server ServerConfig `yaml:"server"` + Backends BackendConfig `yaml:"backends"` Instances InstancesConfig `yaml:"instances"` Auth AuthConfig `yaml:"auth"` Version string `yaml:"-"` @@ -61,9 +71,6 @@ type InstancesConfig struct { // Enable LRU eviction for instance logs EnableLRUEviction bool `yaml:"enable_lru_eviction"` - // Path to llama-server executable - LlamaExecutable string `yaml:"llama_executable"` - // Default auto-restart setting for new instances DefaultAutoRestart bool `yaml:"default_auto_restart"` @@ -112,6 +119,10 @@ func LoadConfig(configPath string) (AppConfig, error) { AllowedOrigins: []string{"*"}, // Default to allow all origins EnableSwagger: false, }, + Backends: BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + }, Instances: InstancesConfig{ PortRange: [2]int{8000, 9000}, DataDir: getDefaultDataDirectory(), @@ -121,7 +132,6 @@ func LoadConfig(configPath string) (AppConfig, error) { MaxInstances: -1, // -1 means unlimited MaxRunningInstances: -1, // -1 means unlimited EnableLRUEviction: true, - LlamaExecutable: "llama-server", DefaultAutoRestart: true, DefaultMaxRestarts: 3, DefaultRestartDelay: 5, @@ -229,8 +239,12 @@ func loadEnvVars(cfg *AppConfig) { cfg.Instances.EnableLRUEviction = b } } + // Backend config if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { - cfg.Instances.LlamaExecutable = llamaExec + cfg.Backends.LlamaExecutable = llamaExec + } + if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" { + cfg.Backends.MLXLMExecutable = mlxLMExec } if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if b, err := strconv.ParseBool(autoRestart); err == nil { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 2596dac..ed95429 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -42,9 +42,6 @@ func TestLoadConfig_Defaults(t *testing.T) { if cfg.Instances.MaxInstances != -1 { t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) } - if cfg.Instances.LlamaExecutable != "llama-server" { - t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable) - } if !cfg.Instances.DefaultAutoRestart { t.Error("Expected default auto restart to be true") } @@ -101,9 +98,6 @@ instances: if cfg.Instances.MaxInstances != 5 { t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances) } - if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" { - t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable) - } if cfg.Instances.DefaultAutoRestart { t.Error("Expected auto restart to be false") } @@ -156,8 +150,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) { if cfg.Instances.MaxInstances != 20 { t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) } - if cfg.Instances.LlamaExecutable != "/env/llama-server" { - t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable) + if cfg.Backends.LlamaExecutable != "/env/llama-server" { + t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable) } if cfg.Instances.DefaultAutoRestart { t.Error("Expected auto restart to be false") diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index fc5089c..c0e5060 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -31,9 +31,10 @@ func (realTimeProvider) Now() time.Time { // Process represents a running instance of the llama server type Process struct { - Name string `json:"name"` - options *CreateInstanceOptions `json:"-"` - globalSettings *config.InstancesConfig + Name string `json:"name"` + options *CreateInstanceOptions `json:"-"` + globalInstanceSettings *config.InstancesConfig + globalBackendSettings *config.BackendConfig // Status Status InstanceStatus `json:"status"` @@ -65,22 +66,23 @@ type Process struct { } // NewInstance creates a new instance with the given name, log path, and options -func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process { +func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process { // Validate and copy options - options.ValidateAndApplyDefaults(name, globalSettings) + options.ValidateAndApplyDefaults(name, globalInstanceSettings) // Create the instance logger - logger := NewInstanceLogger(name, globalSettings.LogsDir) + logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir) return &Process{ - Name: name, - options: options, - globalSettings: globalSettings, - logger: logger, - timeProvider: realTimeProvider{}, - Created: time.Now().Unix(), - Status: Stopped, - onStatusChange: onStatusChange, + Name: name, + options: options, + globalInstanceSettings: globalInstanceSettings, + globalBackendSettings: globalBackendSettings, + logger: logger, + timeProvider: realTimeProvider{}, + Created: time.Now().Unix(), + Status: Stopped, + onStatusChange: onStatusChange, } } @@ -96,7 +98,13 @@ func (i *Process) GetPort() int { if i.options != nil { switch i.options.BackendType { case backends.BackendTypeLlamaCpp: - return i.options.LlamaServerOptions.Port + if i.options.LlamaServerOptions != nil { + return i.options.LlamaServerOptions.Port + } + case backends.BackendTypeMlxLm: + if i.options.MlxServerOptions != nil { + return i.options.MlxServerOptions.Port + } } } return 0 @@ -108,7 +116,13 @@ func (i *Process) GetHost() string { if i.options != nil { switch i.options.BackendType { case backends.BackendTypeLlamaCpp: - return i.options.LlamaServerOptions.Host + if i.options.LlamaServerOptions != nil { + return i.options.LlamaServerOptions.Host + } + case backends.BackendTypeMlxLm: + if i.options.MlxServerOptions != nil { + return i.options.MlxServerOptions.Host + } } } return "" @@ -124,7 +138,7 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) { } // Validate and copy options - options.ValidateAndApplyDefaults(i.Name, i.globalSettings) + options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings) i.options = options // Clear the proxy so it gets recreated with new options @@ -153,8 +167,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) { var port int switch i.options.BackendType { case backends.BackendTypeLlamaCpp: - host = i.options.LlamaServerOptions.Host - port = i.options.LlamaServerOptions.Port + if i.options.LlamaServerOptions != nil { + host = i.options.LlamaServerOptions.Host + port = i.options.LlamaServerOptions.Port + } + case backends.BackendTypeMlxLm: + if i.options.MlxServerOptions != nil { + host = i.options.MlxServerOptions.Host + port = i.options.MlxServerOptions.Port + } } targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port)) @@ -215,7 +236,7 @@ func (i *Process) UnmarshalJSON(data []byte) error { // Handle options with validation and defaults if aux.Options != nil { - aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings) + aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings) i.options = aux.Options } diff --git a/pkg/instance/instance_test.go b/pkg/instance/instance_test.go index aa916b9..9ce2d61 100644 --- a/pkg/instance/instance_test.go +++ b/pkg/instance/instance_test.go @@ -11,6 +11,11 @@ import ( ) func TestNewInstance(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", DefaultAutoRestart: true, @@ -29,7 +34,7 @@ func TestNewInstance(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) if inst.Name != "test-instance" { t.Errorf("Expected name 'test-instance', got %q", inst.Name) @@ -60,6 +65,11 @@ func TestNewInstance(t *testing.T) { } func TestNewInstance_WithRestartOptions(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", DefaultAutoRestart: true, @@ -85,7 +95,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) opts := instance.GetOptions() // Check that explicit values override defaults @@ -101,6 +111,11 @@ func TestNewInstance_WithRestartOptions(t *testing.T) { } func TestSetOptions(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", DefaultAutoRestart: true, @@ -119,7 +134,7 @@ func TestSetOptions(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange) // Update options newOptions := &instance.CreateInstanceOptions{ @@ -147,6 +162,11 @@ func TestSetOptions(t *testing.T) { } func TestGetProxy(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -162,7 +182,7 @@ func TestGetProxy(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) // Get proxy for the first time proxy1, err := inst.GetProxy() @@ -184,6 +204,11 @@ func TestGetProxy(t *testing.T) { } func TestMarshalJSON(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", DefaultAutoRestart: true, @@ -202,7 +227,7 @@ func TestMarshalJSON(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) data, err := json.Marshal(instance) if err != nil { @@ -338,6 +363,11 @@ func TestCreateInstanceOptionsValidation(t *testing.T) { }, } + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -356,7 +386,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange) + instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange) opts := instance.GetOptions() if opts.MaxRestarts == nil { diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 28a65b9..04c5fba 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -9,6 +9,8 @@ import ( "runtime" "syscall" "time" + + "llamactl/pkg/backends" ) // Start starts the llama server instance and returns an error if it fails. @@ -41,7 +43,20 @@ func (i *Process) Start() error { args := i.options.BuildCommandArgs() i.ctx, i.cancel = context.WithCancel(context.Background()) - i.cmd = exec.CommandContext(i.ctx, "llama-server", args...) + + var executable string + + // Get executable from global configuration + switch i.options.BackendType { + case backends.BackendTypeLlamaCpp: + executable = i.globalBackendSettings.LlamaExecutable + case backends.BackendTypeMlxLm: + executable = i.globalBackendSettings.MLXLMExecutable + default: + return fmt.Errorf("unsupported backend type: %s", i.options.BackendType) + } + + i.cmd = exec.CommandContext(i.ctx, executable, args...) if runtime.GOOS != "windows" { setProcAttrs(i.cmd) @@ -175,9 +190,16 @@ func (i *Process) WaitForHealthy(timeout int) error { var host string var port int switch opts.BackendType { - case "llama-cpp": - host = opts.LlamaServerOptions.Host - port = opts.LlamaServerOptions.Port + case backends.BackendTypeLlamaCpp: + if opts.LlamaServerOptions != nil { + host = opts.LlamaServerOptions.Host + port = opts.LlamaServerOptions.Port + } + case backends.BackendTypeMlxLm: + if opts.MlxServerOptions != nil { + host = opts.MlxServerOptions.Host + port = opts.MlxServerOptions.Port + } } if host == "" { host = "localhost" diff --git a/pkg/instance/options.go b/pkg/instance/options.go index b9a2cca..2b1437f 100644 --- a/pkg/instance/options.go +++ b/pkg/instance/options.go @@ -5,6 +5,7 @@ import ( "fmt" "llamactl/pkg/backends" "llamactl/pkg/backends/llamacpp" + "llamactl/pkg/backends/mlx" "llamactl/pkg/config" "log" ) @@ -22,8 +23,9 @@ type CreateInstanceOptions struct { BackendType backends.BackendType `json:"backend_type"` BackendOptions map[string]any `json:"backend_options,omitempty"` - // LlamaServerOptions contains the options for the llama server + // Backend-specific options LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"` + MlxServerOptions *mlx.MlxServerOptions `json:"-"` } // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions @@ -55,6 +57,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error { return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err) } } + case backends.BackendTypeMlxLm: + if c.BackendOptions != nil { + optionsData, err := json.Marshal(c.BackendOptions) + if err != nil { + return fmt.Errorf("failed to marshal backend options: %w", err) + } + + c.MlxServerOptions = &mlx.MlxServerOptions{} + if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil { + return fmt.Errorf("failed to unmarshal MLX options: %w", err) + } + } default: return fmt.Errorf("unknown backend type: %s", c.BackendType) } @@ -72,19 +86,36 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) { Alias: (*Alias)(c), } - // Convert LlamaServerOptions back to BackendOptions map for JSON - if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil { - data, err := json.Marshal(c.LlamaServerOptions) - if err != nil { - return nil, fmt.Errorf("failed to marshal llama server options: %w", err) - } + // Convert backend-specific options back to BackendOptions map for JSON + switch c.BackendType { + case backends.BackendTypeLlamaCpp: + if c.LlamaServerOptions != nil { + data, err := json.Marshal(c.LlamaServerOptions) + if err != nil { + return nil, fmt.Errorf("failed to marshal llama server options: %w", err) + } - var backendOpts map[string]any - if err := json.Unmarshal(data, &backendOpts); err != nil { - return nil, fmt.Errorf("failed to unmarshal to map: %w", err) - } + var backendOpts map[string]any + if err := json.Unmarshal(data, &backendOpts); err != nil { + return nil, fmt.Errorf("failed to unmarshal to map: %w", err) + } - aux.BackendOptions = backendOpts + aux.BackendOptions = backendOpts + } + case backends.BackendTypeMlxLm: + if c.MlxServerOptions != nil { + data, err := json.Marshal(c.MlxServerOptions) + if err != nil { + return nil, fmt.Errorf("failed to marshal MLX server options: %w", err) + } + + var backendOpts map[string]any + if err := json.Unmarshal(data, &backendOpts); err != nil { + return nil, fmt.Errorf("failed to unmarshal to map: %w", err) + } + + aux.BackendOptions = backendOpts + } } return json.Marshal(aux) @@ -136,6 +167,10 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string { if c.LlamaServerOptions != nil { return c.LlamaServerOptions.BuildCommandArgs() } + case backends.BackendTypeMlxLm: + if c.MlxServerOptions != nil { + return c.MlxServerOptions.BuildCommandArgs() + } } return []string{} } diff --git a/pkg/instance/timeout_test.go b/pkg/instance/timeout_test.go index c791bfb..21e3584 100644 --- a/pkg/instance/timeout_test.go +++ b/pkg/instance/timeout_test.go @@ -33,6 +33,11 @@ func (m *MockTimeProvider) SetTime(t time.Time) { // Timeout-related tests func TestUpdateLastRequestTime(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -47,13 +52,18 @@ func TestUpdateLastRequestTime(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) // Test that UpdateLastRequestTime doesn't panic inst.UpdateLastRequestTime() } func TestShouldTimeout_NotRunning(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -70,7 +80,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) // Instance is not running, should not timeout regardless of configuration if inst.ShouldTimeout() { @@ -79,6 +89,11 @@ func TestShouldTimeout_NotRunning(t *testing.T) { } func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -105,7 +120,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { }, } - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) // Simulate running state inst.SetStatus(instance.Running) @@ -117,6 +132,11 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { } func TestShouldTimeout_WithinTimeLimit(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -133,7 +153,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst.SetStatus(instance.Running) // Update last request time to now @@ -146,6 +166,11 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) { } func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -162,7 +187,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst.SetStatus(instance.Running) // Use MockTimeProvider to simulate old last request time @@ -181,6 +206,11 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { } func TestTimeoutConfiguration_Validation(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + globalSettings := &config.InstancesConfig{ LogsDir: "/tmp/test", } @@ -209,7 +239,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) { // Mock onStatusChange function mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} - inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) + inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) opts := inst.GetOptions() if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout { diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 80652a8..6999643 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -35,6 +35,7 @@ type instanceManager struct { runningInstances map[string]struct{} ports map[int]bool instancesConfig config.InstancesConfig + backendsConfig config.BackendConfig // Timeout checker timeoutChecker *time.Ticker @@ -44,7 +45,7 @@ type instanceManager struct { } // NewInstanceManager creates a new instance of InstanceManager. -func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager { +func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager { if instancesConfig.TimeoutCheckInterval <= 0 { instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set } @@ -53,6 +54,7 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager runningInstances: make(map[string]struct{}), ports: make(map[int]bool), instancesConfig: instancesConfig, + backendsConfig: backendsConfig, timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), shutdownChan: make(chan struct{}), @@ -241,7 +243,7 @@ func (im *instanceManager) loadInstance(name, path string) error { } // Create new inst using NewInstance (handles validation, defaults, setup) - inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback) + inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback) // Restore persisted fields that NewInstance doesn't set inst.Created = persistedInstance.Created diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index c332739..e022c5f 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -15,18 +15,22 @@ import ( ) func TestNewInstanceManager(t *testing.T) { + backendConfig := config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, LogsDir: "/tmp/test", MaxInstances: 5, - LlamaExecutable: "llama-server", DefaultAutoRestart: true, DefaultMaxRestarts: 3, DefaultRestartDelay: 5, TimeoutCheckInterval: 5, } - mgr := manager.NewInstanceManager(cfg) + mgr := manager.NewInstanceManager(backendConfig, cfg) if mgr == nil { t.Fatal("NewInstanceManager returned nil") } @@ -44,6 +48,11 @@ func TestNewInstanceManager(t *testing.T) { func TestPersistence(t *testing.T) { tempDir := t.TempDir() + backendConfig := config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, InstancesDir: tempDir, @@ -52,7 +61,7 @@ func TestPersistence(t *testing.T) { } // Test instance persistence on creation - manager1 := manager.NewInstanceManager(cfg) + manager1 := manager.NewInstanceManager(backendConfig, cfg) options := &instance.CreateInstanceOptions{ BackendType: backends.BackendTypeLlamaCpp, LlamaServerOptions: &llamacpp.LlamaServerOptions{ @@ -73,7 +82,7 @@ func TestPersistence(t *testing.T) { } // Test loading instances from disk - manager2 := manager.NewInstanceManager(cfg) + manager2 := manager.NewInstanceManager(backendConfig, cfg) instances, err := manager2.ListInstances() if err != nil { t.Fatalf("ListInstances failed: %v", err) @@ -172,15 +181,19 @@ func TestShutdown(t *testing.T) { // Helper function to create a test manager with standard config func createTestManager() manager.InstanceManager { + backendConfig := config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } + cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, LogsDir: "/tmp/test", MaxInstances: 10, - LlamaExecutable: "llama-server", DefaultAutoRestart: true, DefaultMaxRestarts: 3, DefaultRestartDelay: 5, TimeoutCheckInterval: 5, } - return manager.NewInstanceManager(cfg) + return manager.NewInstanceManager(backendConfig, cfg) } diff --git a/pkg/manager/operations.go b/pkg/manager/operations.go index 6f65680..1354481 100644 --- a/pkg/manager/operations.go +++ b/pkg/manager/operations.go @@ -62,7 +62,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI im.onStatusChange(name, oldStatus, newStatus) } - inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback) + inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback) im.instances[inst.Name] = inst if err := im.persistInstance(inst); err != nil { @@ -260,6 +260,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp if options.LlamaServerOptions != nil { return options.LlamaServerOptions.Port } + case backends.BackendTypeMlxLm: + if options.MlxServerOptions != nil { + return options.MlxServerOptions.Port + } } return 0 } @@ -271,6 +275,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti if options.LlamaServerOptions != nil { options.LlamaServerOptions.Port = port } + case backends.BackendTypeMlxLm: + if options.MlxServerOptions != nil { + options.MlxServerOptions.Port = port + } } } diff --git a/pkg/manager/operations_test.go b/pkg/manager/operations_test.go index 7dd4889..87c37d4 100644 --- a/pkg/manager/operations_test.go +++ b/pkg/manager/operations_test.go @@ -62,12 +62,16 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) { } // Test max instances limit + backendConfig := config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, MaxInstances: 1, // Very low limit for testing TimeoutCheckInterval: 5, } - limitedManager := manager.NewInstanceManager(cfg) + limitedManager := manager.NewInstanceManager(backendConfig, cfg) _, err = limitedManager.CreateInstance("instance1", options) if err != nil { diff --git a/pkg/manager/timeout_test.go b/pkg/manager/timeout_test.go index 23143d2..91b3ad7 100644 --- a/pkg/manager/timeout_test.go +++ b/pkg/manager/timeout_test.go @@ -13,13 +13,17 @@ import ( func TestTimeoutFunctionality(t *testing.T) { // Test timeout checker initialization + backendConfig := config.BackendConfig{ + LlamaExecutable: "llama-server", + MLXLMExecutable: "mlx_lm.server", + } cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, TimeoutCheckInterval: 10, MaxInstances: 5, } - manager := manager.NewInstanceManager(cfg) + manager := manager.NewInstanceManager(backendConfig, cfg) if manager == nil { t.Fatal("Manager should be initialized with timeout checker") } diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index e5e2eb5..c4932b2 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -7,6 +7,7 @@ import ( "io" "llamactl/pkg/backends" "llamactl/pkg/backends/llamacpp" + "llamactl/pkg/backends/mlx" "llamactl/pkg/config" "llamactl/pkg/instance" "llamactl/pkg/manager" @@ -684,3 +685,57 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc { } } } + +// ParseMlxCommand godoc +// @Summary Parse mlx_lm.server command +// @Description Parses MLX-LM server command string into instance options +// @Tags backends +// @Security ApiKeyAuth +// @Accept json +// @Produce json +// @Param request body ParseCommandRequest true "Command to parse" +// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" +// @Failure 400 {object} map[string]string "Invalid request or command" +// @Router /backends/mlx/parse-command [post] +func (h *Handler) ParseMlxCommand() http.HandlerFunc { + type errorResponse struct { + Error string `json:"error"` + Details string `json:"details,omitempty"` + } + writeError := func(w http.ResponseWriter, status int, code, details string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) + } + return func(w http.ResponseWriter, r *http.Request) { + var req ParseCommandRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") + return + } + + if strings.TrimSpace(req.Command) == "" { + writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") + return + } + + mlxOptions, err := mlx.ParseMlxCommand(req.Command) + if err != nil { + writeError(w, http.StatusBadRequest, "parse_error", err.Error()) + return + } + + // Currently only support mlx_lm backend type + backendType := backends.BackendTypeMlxLm + + options := &instance.CreateInstanceOptions{ + BackendType: backendType, + MlxServerOptions: mlxOptions, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(options); err != nil { + writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) + } + } +} diff --git a/pkg/server/routes.go b/pkg/server/routes.go index 0f2568d..aa31e1f 100644 --- a/pkg/server/routes.go +++ b/pkg/server/routes.go @@ -55,6 +55,9 @@ func SetupRouter(handler *Handler) *chi.Mux { r.Route("/llama-cpp", func(r chi.Router) { r.Post("/parse-command", handler.ParseLlamaCommand()) }) + r.Route("/mlx", func(r chi.Router) { + r.Post("/parse-command", handler.ParseMlxCommand()) + }) }) // Instance management endpoints diff --git a/pkg/validation/validation.go b/pkg/validation/validation.go index 77873ca..eff1dd3 100644 --- a/pkg/validation/validation.go +++ b/pkg/validation/validation.go @@ -44,6 +44,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error { switch options.BackendType { case backends.BackendTypeLlamaCpp: return validateLlamaCppOptions(options) + case backends.BackendTypeMlxLm: + return validateMlxOptions(options) default: return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType)) } @@ -68,6 +70,24 @@ func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error { return nil } +// validateMlxOptions validates MLX backend specific options +func validateMlxOptions(options *instance.CreateInstanceOptions) error { + if options.MlxServerOptions == nil { + return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend")) + } + + if err := validateStructStrings(options.MlxServerOptions, ""); err != nil { + return err + } + + // Basic network validation for port + if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 { + return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port)) + } + + return nil +} + // validateStructStrings recursively validates all string fields in a struct func validateStructStrings(v any, fieldPath string) error { val := reflect.ValueOf(v) diff --git a/webui/src/components/BackendFormField.tsx b/webui/src/components/BackendFormField.tsx index a210626..3dd7af0 100644 --- a/webui/src/components/BackendFormField.tsx +++ b/webui/src/components/BackendFormField.tsx @@ -2,11 +2,10 @@ import React from 'react' import { Input } from '@/components/ui/input' import { Label } from '@/components/ui/label' import { Checkbox } from '@/components/ui/checkbox' -import type { BackendOptions } from '@/schemas/instanceOptions' import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils' interface BackendFormFieldProps { - fieldKey: keyof BackendOptions + fieldKey: string value: string | number | boolean | string[] | undefined onChange: (key: string, value: string | number | boolean | string[] | undefined) => void } diff --git a/webui/src/components/InstanceDialog.tsx b/webui/src/components/InstanceDialog.tsx index cc4b7e4..919ef52 100644 --- a/webui/src/components/InstanceDialog.tsx +++ b/webui/src/components/InstanceDialog.tsx @@ -41,8 +41,8 @@ const InstanceDialog: React.FC = ({ // Get field lists dynamically from the type const basicFields = getBasicFields(); const advancedFields = getAdvancedFields(); - const basicBackendFields = getBasicBackendFields(); - const advancedBackendFields = getAdvancedBackendFields(); + const basicBackendFields = getBasicBackendFields(formData.backend_type); + const advancedBackendFields = getAdvancedBackendFields(formData.backend_type); // Reset form when dialog opens/closes or when instance changes useEffect(() => { @@ -66,10 +66,21 @@ const InstanceDialog: React.FC = ({ }, [open, instance]); const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => { - setFormData((prev) => ({ - ...prev, - [key]: value, - })); + setFormData((prev) => { + // If backend_type is changing, clear backend_options + if (key === 'backend_type' && prev.backend_type !== value) { + return { + ...prev, + [key]: value, + backend_options: {}, // Clear backend options when backend type changes + }; + } + + return { + ...prev, + [key]: value, + }; + }); }; const handleBackendFieldChange = (key: string, value: any) => { @@ -78,7 +89,7 @@ const InstanceDialog: React.FC = ({ backend_options: { ...prev.backend_options, [key]: value, - }, + } as any, })); }; @@ -260,7 +271,7 @@ const InstanceDialog: React.FC = ({ ))} @@ -345,7 +356,7 @@ const InstanceDialog: React.FC = ({ ))} diff --git a/webui/src/components/ZodFormField.tsx b/webui/src/components/ZodFormField.tsx index f1ab226..64832b0 100644 --- a/webui/src/components/ZodFormField.tsx +++ b/webui/src/components/ZodFormField.tsx @@ -2,8 +2,7 @@ import React from 'react' import { Input } from '@/components/ui/input' import { Label } from '@/components/ui/label' import { Checkbox } from '@/components/ui/checkbox' -import type { CreateInstanceOptions } from '@/types/instance' -import { BackendType } from '@/types/instance' +import { BackendType, type CreateInstanceOptions } from '@/types/instance' import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils' interface ZodFormFieldProps { @@ -39,7 +38,7 @@ const ZodFormField: React.FC = ({ fieldKey, value, onChange } className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50" > - {/* Add more backend types here as they become available */} + {config.description && (

{config.description}

diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts index 608769f..05c1f27 100644 --- a/webui/src/lib/api.ts +++ b/webui/src/lib/api.ts @@ -93,6 +93,14 @@ export const backendsApi = { body: JSON.stringify({ command }), }), }, + mlx: { + // POST /backends/mlx/parse-command + parseCommand: (command: string) => + apiCall('/backends/mlx/parse-command', { + method: 'POST', + body: JSON.stringify({ command }), + }), + }, }; // Instance API functions diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts index 6700e74..7bde5a9 100644 --- a/webui/src/lib/zodFormUtils.ts +++ b/webui/src/lib/zodFormUtils.ts @@ -1,4 +1,15 @@ -import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions' +import { + type CreateInstanceOptions, + type LlamaCppBackendOptions, + type MlxBackendOptions, + LlamaCppBackendOptionsSchema, + MlxBackendOptionsSchema, + getAllFieldKeys, + getAllLlamaCppFieldKeys, + getAllMlxFieldKeys, + getLlamaCppFieldType, + getMlxFieldType +} from '@/schemas/instanceOptions' // Instance-level basic fields (not backend-specific) export const basicFieldsConfig: Record = { + model: { + label: 'Model', + placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit', + description: 'The path to the MLX model weights, tokenizer, and config', + required: true + }, + temp: { + label: 'Temperature', + placeholder: '0.0', + description: 'Default sampling temperature (default: 0.0)' + }, + top_p: { + label: 'Top-P', + placeholder: '1.0', + description: 'Default nucleus sampling top-p (default: 1.0)' + }, + top_k: { + label: 'Top-K', + placeholder: '0', + description: 'Default top-k sampling (default: 0, disables top-k)' + }, + min_p: { + label: 'Min-P', + placeholder: '0.0', + description: 'Default min-p sampling (default: 0.0, disables min-p)' + }, + max_tokens: { + label: 'Max Tokens', + placeholder: '512', + description: 'Default maximum number of tokens to generate (default: 512)' + } +} + +function isBasicField(key: keyof CreateInstanceOptions): boolean { return key in basicFieldsConfig } -export function isBasicBackendField(key: keyof BackendOptions): boolean { - return key in basicBackendFieldsConfig -} export function getBasicFields(): (keyof CreateInstanceOptions)[] { return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[] @@ -81,13 +130,61 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] { return getAllFieldKeys().filter(key => !isBasicField(key)) } -export function getBasicBackendFields(): (keyof BackendOptions)[] { - return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[] + +export function getBasicBackendFields(backendType?: string): string[] { + if (backendType === 'mlx_lm') { + return Object.keys(basicMlxFieldsConfig) + } else if (backendType === 'llama_cpp') { + return Object.keys(basicLlamaCppFieldsConfig) + } + // Default to LlamaCpp for backward compatibility + return Object.keys(basicLlamaCppFieldsConfig) } -export function getAdvancedBackendFields(): (keyof BackendOptions)[] { - return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key)) +export function getAdvancedBackendFields(backendType?: string): string[] { + if (backendType === 'mlx_lm') { + return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig)) + } else if (backendType === 'llama_cpp') { + return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig)) + } + // Default to LlamaCpp for backward compatibility + return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig)) +} + +// Combined backend fields config for use in BackendFormField +export const basicBackendFieldsConfig: Record = { + ...basicLlamaCppFieldsConfig, + ...basicMlxFieldsConfig +} + +// Get field type for any backend option (union type) +export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' { + // Try to get type from LlamaCpp schema first + try { + if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) { + return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions) + } + } catch { + // Schema might not be available + } + + // Try MLX schema + try { + if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) { + return getMlxFieldType(key as keyof MlxBackendOptions) + } + } catch { + // Schema might not be available + } + + // Default fallback + return 'text' } // Re-export the Zod-based functions -export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions' \ No newline at end of file +export { getFieldType } from '@/schemas/instanceOptions' \ No newline at end of file diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts index cd422c5..7fa0dcc 100644 --- a/webui/src/schemas/instanceOptions.ts +++ b/webui/src/schemas/instanceOptions.ts @@ -1,8 +1,8 @@ import { BackendType } from '@/types/instance' import { z } from 'zod' -// Define the backend options schema (previously embedded in CreateInstanceOptionsSchema) -export const BackendOptionsSchema = z.object({ +// Define the LlamaCpp backend options schema +export const LlamaCppBackendOptionsSchema = z.object({ // Common params verbose_prompt: z.boolean().optional(), threads: z.number().optional(), @@ -170,6 +170,39 @@ export const BackendOptionsSchema = z.object({ fim_qwen_14b_spec: z.boolean().optional(), }) +// Define the MLX backend options schema +export const MlxBackendOptionsSchema = z.object({ + // Basic connection options + model: z.string().optional(), + host: z.string().optional(), + port: z.number().optional(), + + // Model and adapter options + adapter_path: z.string().optional(), + draft_model: z.string().optional(), + num_draft_tokens: z.number().optional(), + trust_remote_code: z.boolean().optional(), + + // Logging and templates + log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(), + chat_template: z.string().optional(), + use_default_chat_template: z.boolean().optional(), + chat_template_args: z.string().optional(), // JSON string + + // Sampling defaults + temp: z.number().optional(), // Note: MLX uses "temp" not "temperature" + top_p: z.number().optional(), + top_k: z.number().optional(), + min_p: z.number().optional(), + max_tokens: z.number().optional(), +}) + +// Backend options union +export const BackendOptionsSchema = z.union([ + LlamaCppBackendOptionsSchema, + MlxBackendOptionsSchema, +]) + // Define the main create instance options schema export const CreateInstanceOptionsSchema = z.object({ // Restart options @@ -180,11 +213,13 @@ export const CreateInstanceOptionsSchema = z.object({ on_demand_start: z.boolean().optional(), // Backend configuration - backend_type: z.enum([BackendType.LLAMA_CPP]).optional(), + backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(), backend_options: BackendOptionsSchema.optional(), }) // Infer the TypeScript types from the schemas +export type LlamaCppBackendOptions = z.infer +export type MlxBackendOptions = z.infer export type BackendOptions = z.infer export type CreateInstanceOptions = z.infer @@ -193,9 +228,14 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] { return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[] } -// Helper to get all backend option field keys -export function getAllBackendFieldKeys(): (keyof BackendOptions)[] { - return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[] +// Helper to get all LlamaCpp backend option field keys +export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] { + return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[] +} + +// Helper to get all MLX backend option field keys +export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] { + return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[] } // Get field type from Zod schema @@ -213,9 +253,9 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number return 'text' // ZodString and others default to text } -// Get field type for backend options -export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' { - const fieldSchema = BackendOptionsSchema.shape[key] +// Get field type for LlamaCpp backend options +export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' { + const fieldSchema = LlamaCppBackendOptionsSchema.shape[key] if (!fieldSchema) return 'text' // Handle ZodOptional wrapper @@ -225,4 +265,19 @@ export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodArray) return 'array' return 'text' // ZodString and others default to text +} + +// Get field type for MLX backend options +export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' { + const fieldSchema = MlxBackendOptionsSchema.shape[key] + if (!fieldSchema) return 'text' + + // Handle ZodOptional wrapper + const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema + + if (innerSchema instanceof z.ZodBoolean) return 'boolean' + if (innerSchema instanceof z.ZodNumber) return 'number' + if (innerSchema instanceof z.ZodArray) return 'array' + if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select + return 'text' // ZodString and others default to text } \ No newline at end of file diff --git a/webui/src/types/instance.ts b/webui/src/types/instance.ts index bffb321..869f835 100644 --- a/webui/src/types/instance.ts +++ b/webui/src/types/instance.ts @@ -3,7 +3,9 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions' export { type CreateInstanceOptions } from '@/schemas/instanceOptions' export const BackendType = { - LLAMA_CPP: 'llama_cpp' + LLAMA_CPP: 'llama_cpp', + MLX_LM: 'mlx_lm', + // MLX_VLM: 'mlx_vlm', // Future expansion } as const export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]