Merge pull request #32 from lordmathis/feat/mlx-backend

feat: Implement mlx-lm backend
This commit is contained in:
2025-09-18 20:34:04 +02:00
committed by GitHub
31 changed files with 1140 additions and 176 deletions

View File

@@ -2,30 +2,35 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
**Unified management and routing for llama.cpp and MLX models with web dashboard.**
## Why llamactl?
## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
### 🚀 Easy Model Management
- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
- **State Persistence**: Ensure instances remain intact across server restarts
### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **API Key Authentication**: Separate keys for management vs inference access
### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
![Dashboard Screenshot](docs/images/dashboard.png)
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
## Quick Start
```bash
# 1. Install llama-server (one-time setup)
# See: https://github.com/ggml-org/llama.cpp#quick-start
# 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -42,15 +47,21 @@ llamactl
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Set model path and GPU layers
4. Start or stop the instance
3. Choose backend type (llama.cpp or MLX)
4. Set model path and backend-specific options
5. Start or stop the instance
### Or use the REST API:
```bash
# Create instance
# Create llama.cpp instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
-d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
# Create MLX instance (macOS)
curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
@@ -85,16 +96,31 @@ go build -o llamactl ./cmd/server
## Prerequisites
### Backend Dependencies
**For llama.cpp backend:**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Quick install methods:
# Homebrew (macOS)
brew install llama.cpp
# Or build from source - see llama.cpp docs
```
**For MLX backend (macOS only):**
You need MLX-LM installed:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
## Configuration
llamactl works out of the box with sensible defaults.
@@ -106,6 +132,10 @@ server:
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
@@ -115,7 +145,6 @@ instances:
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances

View File

@@ -58,7 +58,7 @@ func main() {
}
// Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Instances)
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
// Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg)

View File

@@ -19,6 +19,10 @@ server:
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
@@ -28,7 +32,6 @@ instances:
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
@@ -85,6 +88,18 @@ server:
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration
```yaml
backends:
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
```
**Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
### Instance Configuration
```yaml
@@ -97,7 +112,6 @@ instances:
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
@@ -115,7 +129,6 @@ instances:
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds

View File

@@ -4,11 +4,14 @@ This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
**Quick install methods:**
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
@@ -18,6 +21,22 @@ winget install llama.cpp
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
## Installation Methods
### Option 1: Download Binary (Recommended)

View File

@@ -1,17 +1,18 @@
# Llamactl Documentation
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.**
![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl?
Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support.
## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management

View File

@@ -1,6 +1,6 @@
# Managing Instances
Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.
## Overview
@@ -39,40 +39,55 @@ Each instance is displayed as a card showing:
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. Configure model source (choose one):
- **Model Path**: Full path to your downloaded GGUF model file
- **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
- **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
4. Configure optional instance management settings:
3. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
4. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
5. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
5. Configure optional llama-server backend options:
- **Threads**: Number of CPU threads to use
- **Context Size**: Context window size (ctx_size)
- **GPU Layers**: Number of layers to offload to GPU
- **Port**: Network port (auto-assigned by llamactl if not specified)
- **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
6. Click **"Create"** to save the instance
6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
7. Click **"Create"** to save the instance
### Via API
```bash
# Create instance with local model file
curl -X POST http://localhost:8080/api/instances/my-instance \
# Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/instances/my-llama-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096
"ctx_size": 4096,
"gpu_layers": 32
}
}'
# Create instance with HuggingFace model
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"top_p": 0.9,
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
@@ -81,9 +96,7 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
},
"auto_restart": true,
"max_restarts": 3
}
}'
```
@@ -166,14 +179,16 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
## Instance Proxy
Llamactl proxies all requests to the underlying llama-server instances.
Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
### Instance Health

View File

@@ -4,4 +4,6 @@ type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
)

205
pkg/backends/mlx/mlx.go Normal file
View File

@@ -0,0 +1,205 @@
package mlx
import (
"encoding/json"
"reflect"
"strconv"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
DraftModel string `json:"draft_model,omitempty"`
NumDraftTokens int `json:"num_draft_tokens,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"` // Note: MLX uses "temp" not "temperature"
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to handle multiple field names
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions MlxServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = MlxServerOptions(temp)
// Handle alternative field names
fieldMappings := map[string]string{
// Basic connection options
"m": "model",
"host": "host",
"port": "port",
// "python_path": "python_path", // removed
// Model and adapter options
"adapter-path": "adapter_path",
"draft-model": "draft_model",
"num-draft-tokens": "num_draft_tokens",
"trust-remote-code": "trust_remote_code",
// Logging and templates
"log-level": "log_level",
"chat-template": "chat_template",
"use-default-chat-template": "use_default_chat_template",
"chat-template-args": "chat_template_args",
// Sampling defaults
"temperature": "temp", // Support both temp and temperature
"top-p": "top_p",
"top-k": "top_k",
"min-p": "min_p",
"max-tokens": "max_tokens",
}
// Process alternative field names
for altName, canonicalName := range fieldMappings {
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
field := v.FieldByNameFunc(func(fieldName string) bool {
field, _ := v.Type().FieldByName(fieldName)
jsonTag := field.Tag.Get("json")
return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
}
}
}
return nil
}
// NewMlxServerOptions creates MlxServerOptions with MLX defaults
func NewMlxServerOptions() *MlxServerOptions {
return &MlxServerOptions{
Host: "127.0.0.1", // MLX default (different from llama-server)
Port: 8080, // MLX default
NumDraftTokens: 3, // MLX default for speculative decoding
LogLevel: "INFO", // MLX default
Temp: 0.0, // MLX default
TopP: 1.0, // MLX default
TopK: 0, // MLX default (disabled)
MinP: 0.0, // MLX default (disabled)
MaxTokens: 512, // MLX default
ChatTemplateArgs: "{}", // MLX default (empty JSON object)
}
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
var args []string
// Required and basic options
if o.Model != "" {
args = append(args, "--model", o.Model)
}
if o.Host != "" {
args = append(args, "--host", o.Host)
}
if o.Port != 0 {
args = append(args, "--port", strconv.Itoa(o.Port))
}
// Model and adapter options
if o.AdapterPath != "" {
args = append(args, "--adapter-path", o.AdapterPath)
}
if o.DraftModel != "" {
args = append(args, "--draft-model", o.DraftModel)
}
if o.NumDraftTokens != 0 {
args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
}
if o.TrustRemoteCode {
args = append(args, "--trust-remote-code")
}
// Logging and templates
if o.LogLevel != "" {
args = append(args, "--log-level", o.LogLevel)
}
if o.ChatTemplate != "" {
args = append(args, "--chat-template", o.ChatTemplate)
}
if o.UseDefaultChatTemplate {
args = append(args, "--use-default-chat-template")
}
if o.ChatTemplateArgs != "" {
args = append(args, "--chat-template-args", o.ChatTemplateArgs)
}
// Sampling defaults
if o.Temp != 0 {
args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
}
if o.TopP != 0 {
args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
}
if o.TopK != 0 {
args = append(args, "--top-k", strconv.Itoa(o.TopK))
}
if o.MinP != 0 {
args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
}
if o.MaxTokens != 0 {
args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
}
return args
}

254
pkg/backends/mlx/parser.go Normal file
View File

@@ -0,0 +1,254 @@
package mlx
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
// 1. Normalize the command - handle multiline with backslashes
trimmed := normalizeMultilineCommand(command)
if trimmed == "" {
return nil, fmt.Errorf("command cannot be empty")
}
// 2. Extract arguments from command
args, err := extractArgumentsFromCommand(trimmed)
if err != nil {
return nil, err
}
// 3. Parse arguments into map
options := make(map[string]any)
i := 0
for i < len(args) {
arg := args[i]
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
i++
continue
}
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Unified parsing for --flag=value vs --flag value
var rawFlag, rawValue string
hasEquals := false
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
rawFlag = parts[0]
rawValue = parts[1] // may be empty string
hasEquals = true
} else {
rawFlag = arg
}
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
flagName := strings.ReplaceAll(flagCore, "-", "_")
// Detect value if not in equals form
valueProvided := hasEquals
if !hasEquals {
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
rawValue = args[i+1]
valueProvided = true
}
}
if valueProvided {
// MLX-specific validation for certain flags
if flagName == "log_level" && !isValidLogLevel(rawValue) {
return nil, fmt.Errorf("invalid log level: %s", rawValue)
}
options[flagName] = parseValue(rawValue)
// Advance index: if we consumed a following token as value (non equals form), skip it
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
i += 2
} else {
i++
}
continue
}
// Boolean flag (no value) - MLX specific boolean flags
if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
options[flagName] = true
} else {
options[flagName] = true
}
i++
}
// 4. Convert to MlxServerOptions using existing UnmarshalJSON
jsonData, err := json.Marshal(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
}
var mlxOptions MlxServerOptions
if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
return nil, fmt.Errorf("failed to parse command options: %w", err)
}
// 5. Return MlxServerOptions
return &mlxOptions, nil
}
// isValidLogLevel validates MLX log levels
func isValidLogLevel(level string) bool {
validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
for _, valid := range validLevels {
if level == valid {
return true
}
}
return false
}
// parseValue attempts to parse a string value into the most appropriate type
func parseValue(value string) any {
// Surrounding matching quotes (single or double)
if l := len(value); l >= 2 {
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
value = value[1 : l-1]
}
}
lower := strings.ToLower(value)
if lower == "true" {
return true
}
if lower == "false" {
return false
}
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
return value
}
// normalizeMultilineCommand handles multiline commands with backslashes
func normalizeMultilineCommand(command string) string {
// Handle escaped newlines (backslash followed by newline)
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
// Clean up extra whitespace
re = regexp.MustCompile(`\s+`)
normalized = re.ReplaceAllString(normalized, " ")
return strings.TrimSpace(normalized)
}
// extractArgumentsFromCommand extracts arguments from various command formats
func extractArgumentsFromCommand(command string) ([]string, error) {
// Split command into tokens respecting quotes
tokens, err := splitCommandTokens(command)
if err != nil {
return nil, err
}
if len(tokens) == 0 {
return nil, fmt.Errorf("no command tokens found")
}
// Check if first token looks like an executable
firstToken := tokens[0]
// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
if strings.Contains(firstToken, string(filepath.Separator)) ||
strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
return tokens[1:], nil // Return everything except the executable
}
// Case 2: Just "mlx_lm.server" command
if strings.ToLower(firstToken) == "mlx_lm.server" {
return tokens[1:], nil // Return everything except the command
}
// Case 3: Arguments only (starts with a flag)
if strings.HasPrefix(firstToken, "-") {
return tokens, nil // Return all tokens as arguments
}
// Case 4: Unknown format - might be a different executable name
// Be permissive and assume it's the executable
return tokens[1:], nil
}
// splitCommandTokens splits a command string into tokens, respecting quotes
func splitCommandTokens(command string) ([]string, error) {
var tokens []string
var current strings.Builder
inQuotes := false
quoteChar := byte(0)
escaped := false
for i := 0; i < len(command); i++ {
c := command[i]
if escaped {
current.WriteByte(c)
escaped = false
continue
}
if c == '\\' {
escaped = true
current.WriteByte(c)
continue
}
if !inQuotes && (c == '"' || c == '\'') {
inQuotes = true
quoteChar = c
current.WriteByte(c)
} else if inQuotes && c == quoteChar {
inQuotes = false
quoteChar = 0
current.WriteByte(c)
} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
if current.Len() > 0 {
tokens = append(tokens, current.String())
current.Reset()
}
} else {
current.WriteByte(c)
}
}
if inQuotes {
return nil, fmt.Errorf("unclosed quote in command")
}
if current.Len() > 0 {
tokens = append(tokens, current.String())
}
return tokens, nil
}
// isFlag checks if a string looks like a command line flag
func isFlag(s string) bool {
return strings.HasPrefix(s, "-")
}

View File

@@ -10,9 +10,19 @@ import (
"gopkg.in/yaml.v3"
)
// BackendConfig contains backend executable configurations
type BackendConfig struct {
// Path to llama-server executable (llama.cpp backend)
LlamaExecutable string `yaml:"llama_executable"`
// Path to mlx_lm executable (MLX-LM backend)
MLXLMExecutable string `yaml:"mlx_lm_executable"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
@@ -61,9 +71,6 @@ type InstancesConfig struct {
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
@@ -112,6 +119,10 @@ func LoadConfig(configPath string) (AppConfig, error) {
AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
},
Backends: BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
@@ -121,7 +132,6 @@ func LoadConfig(configPath string) (AppConfig, error) {
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
@@ -229,8 +239,12 @@ func loadEnvVars(cfg *AppConfig) {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec
cfg.Backends.LlamaExecutable = llamaExec
}
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
cfg.Backends.MLXLMExecutable = mlxLMExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {

View File

@@ -42,9 +42,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "llama-server" {
t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if !cfg.Instances.DefaultAutoRestart {
t.Error("Expected default auto restart to be true")
}
@@ -101,9 +98,6 @@ instances:
if cfg.Instances.MaxInstances != 5 {
t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" {
t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")
}
@@ -156,8 +150,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "/env/llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable)
if cfg.Backends.LlamaExecutable != "/env/llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")

View File

@@ -31,9 +31,10 @@ func (realTimeProvider) Now() time.Time {
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *config.InstancesConfig
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig
// Status
Status InstanceStatus `json:"status"`
@@ -65,22 +66,23 @@ type Process struct {
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options
options.ValidateAndApplyDefaults(name, globalSettings)
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogsDir)
logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
return &Process{
Name: name,
options: options,
globalSettings: globalSettings,
logger: logger,
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
Name: name,
options: options,
globalInstanceSettings: globalInstanceSettings,
globalBackendSettings: globalBackendSettings,
logger: logger,
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
}
}
@@ -96,7 +98,13 @@ func (i *Process) GetPort() int {
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Port
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
}
}
return 0
@@ -108,7 +116,13 @@ func (i *Process) GetHost() string {
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Host
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Host
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
}
}
return ""
@@ -124,7 +138,7 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
}
// Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = options
// Clear the proxy so it gets recreated with new options
@@ -153,8 +167,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
if i.options.LlamaServerOptions != nil {
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
@@ -215,7 +236,7 @@ func (i *Process) UnmarshalJSON(data []byte) error {
// Handle options with validation and defaults
if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = aux.Options
}

View File

@@ -11,6 +11,11 @@ import (
)
func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -29,7 +34,7 @@ func TestNewInstance(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -60,6 +65,11 @@ func TestNewInstance(t *testing.T) {
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -85,7 +95,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
// Check that explicit values override defaults
@@ -101,6 +111,11 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
}
func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -119,7 +134,7 @@ func TestSetOptions(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
// Update options
newOptions := &instance.CreateInstanceOptions{
@@ -147,6 +162,11 @@ func TestSetOptions(t *testing.T) {
}
func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -162,7 +182,7 @@ func TestGetProxy(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
@@ -184,6 +204,11 @@ func TestGetProxy(t *testing.T) {
}
func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -202,7 +227,7 @@ func TestMarshalJSON(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance)
if err != nil {
@@ -338,6 +363,11 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
},
}
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -356,7 +386,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange)
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {

View File

@@ -9,6 +9,8 @@ import (
"runtime"
"syscall"
"time"
"llamactl/pkg/backends"
)
// Start starts the llama server instance and returns an error if it fails.
@@ -41,7 +43,20 @@ func (i *Process) Start() error {
args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
var executable string
// Get executable from global configuration
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
executable = i.globalBackendSettings.LlamaExecutable
case backends.BackendTypeMlxLm:
executable = i.globalBackendSettings.MLXLMExecutable
default:
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
i.cmd = exec.CommandContext(i.ctx, executable, args...)
if runtime.GOOS != "windows" {
setProcAttrs(i.cmd)
@@ -175,9 +190,16 @@ func (i *Process) WaitForHealthy(timeout int) error {
var host string
var port int
switch opts.BackendType {
case "llama-cpp":
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
case backends.BackendTypeLlamaCpp:
if opts.LlamaServerOptions != nil {
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if opts.MlxServerOptions != nil {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
}
if host == "" {
host = "localhost"

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/config"
"log"
)
@@ -22,8 +23,9 @@ type CreateInstanceOptions struct {
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// LlamaServerOptions contains the options for the llama server
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -55,6 +57,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
}
case backends.BackendTypeMlxLm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.MlxServerOptions = &mlx.MlxServerOptions{}
if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
@@ -72,19 +86,36 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
Alias: (*Alias)(c),
}
// Convert LlamaServerOptions back to BackendOptions map for JSON
if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
// Convert backend-specific options back to BackendOptions map for JSON
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
aux.BackendOptions = backendOpts
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
data, err := json.Marshal(c.MlxServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
}
return json.Marshal(aux)
@@ -136,6 +167,10 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs()
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
return c.MlxServerOptions.BuildCommandArgs()
}
}
return []string{}
}

View File

@@ -33,6 +33,11 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -47,13 +52,18 @@ func TestUpdateLastRequestTime(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -70,7 +80,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
@@ -79,6 +89,11 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -105,7 +120,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
},
}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.SetStatus(instance.Running)
@@ -117,6 +132,11 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -133,7 +153,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
@@ -146,6 +166,11 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -162,7 +187,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
@@ -181,6 +206,11 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -209,7 +239,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {

View File

@@ -35,6 +35,7 @@ type instanceManager struct {
runningInstances map[string]struct{}
ports map[int]bool
instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig
// Timeout checker
timeoutChecker *time.Ticker
@@ -44,7 +45,7 @@ type instanceManager struct {
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
@@ -53,6 +54,7 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
backendsConfig: backendsConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
@@ -241,7 +243,7 @@ func (im *instanceManager) loadInstance(name, path string) error {
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created

View File

@@ -15,18 +15,22 @@ import (
)
func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
mgr := manager.NewInstanceManager(cfg)
mgr := manager.NewInstanceManager(backendConfig, cfg)
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
@@ -44,6 +48,11 @@ func TestNewInstanceManager(t *testing.T) {
func TestPersistence(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
@@ -52,7 +61,7 @@ func TestPersistence(t *testing.T) {
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg)
manager1 := manager.NewInstanceManager(backendConfig, cfg)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -73,7 +82,7 @@ func TestPersistence(t *testing.T) {
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(cfg)
manager2 := manager.NewInstanceManager(backendConfig, cfg)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
@@ -172,15 +181,19 @@ func TestShutdown(t *testing.T) {
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(cfg)
return manager.NewInstanceManager(backendConfig, cfg)
}

View File

@@ -62,7 +62,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst
if err := im.persistInstance(inst); err != nil {
@@ -260,6 +260,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
}
return 0
}
@@ -271,6 +275,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
}
}

View File

@@ -62,12 +62,16 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
}
// Test max instances limit
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {

View File

@@ -13,13 +13,17 @@ import (
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
manager := manager.NewInstanceManager(backendConfig, cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}

View File

@@ -7,6 +7,7 @@ import (
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
@@ -684,3 +685,57 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}

View File

@@ -55,6 +55,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route("/llama-cpp", func(r chi.Router) {
r.Post("/parse-command", handler.ParseLlamaCommand())
})
r.Route("/mlx", func(r chi.Router) {
r.Post("/parse-command", handler.ParseMlxCommand())
})
})
// Instance management endpoints

View File

@@ -44,6 +44,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm:
return validateMlxOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
@@ -68,6 +70,24 @@ func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
return nil
}
// validateMlxOptions validates MLX backend specific options
func validateMlxOptions(options *instance.CreateInstanceOptions) error {
if options.MlxServerOptions == nil {
return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v)

View File

@@ -2,11 +2,10 @@ import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { BackendOptions } from '@/schemas/instanceOptions'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface BackendFormFieldProps {
fieldKey: keyof BackendOptions
fieldKey: string
value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
}

View File

@@ -41,8 +41,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Get field lists dynamically from the type
const basicFields = getBasicFields();
const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields();
const advancedBackendFields = getAdvancedBackendFields();
const basicBackendFields = getBasicBackendFields(formData.backend_type);
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
// Reset form when dialog opens/closes or when instance changes
useEffect(() => {
@@ -66,10 +66,21 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
}, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
setFormData((prev) => ({
...prev,
[key]: value,
}));
setFormData((prev) => {
// If backend_type is changing, clear backend_options
if (key === 'backend_type' && prev.backend_type !== value) {
return {
...prev,
[key]: value,
backend_options: {}, // Clear backend options when backend type changes
};
}
return {
...prev,
[key]: value,
};
});
};
const handleBackendFieldChange = (key: string, value: any) => {
@@ -78,7 +89,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
backend_options: {
...prev.backend_options,
[key]: value,
},
} as any,
}));
};
@@ -260,7 +271,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
@@ -345,7 +356,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}

View File

@@ -2,8 +2,7 @@ import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
@@ -39,7 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
{/* Add more backend types here as they become available */}
<option value={BackendType.MLX_LM}>MLX LM</option>
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>

View File

@@ -93,6 +93,14 @@ export const backendsApi = {
body: JSON.stringify({ command }),
}),
},
mlx: {
// POST /backends/mlx/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/mlx/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
};
// Instance API functions

View File

@@ -1,4 +1,15 @@
import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions'
import {
type CreateInstanceOptions,
type LlamaCppBackendOptions,
type MlxBackendOptions,
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
getAllFieldKeys,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getLlamaCppFieldType,
getMlxFieldType
} from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, {
@@ -36,8 +47,8 @@ export const basicFieldsConfig: Record<string, {
}
}
// Backend-specific basic fields (these go in backend_options)
export const basicBackendFieldsConfig: Record<string, {
// LlamaCpp backend-specific basic fields
const basicLlamaCppFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
@@ -46,7 +57,8 @@ export const basicBackendFieldsConfig: Record<string, {
model: {
label: 'Model Path',
placeholder: '/path/to/model.gguf',
description: 'Path to the model file'
description: 'Path to the model file',
required: true
},
hf_repo: {
label: 'Hugging Face Repository',
@@ -65,13 +77,50 @@ export const basicBackendFieldsConfig: Record<string, {
}
}
export function isBasicField(key: keyof CreateInstanceOptions): boolean {
// MLX backend-specific basic fields
const basicMlxFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
model: {
label: 'Model',
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
description: 'The path to the MLX model weights, tokenizer, and config',
required: true
},
temp: {
label: 'Temperature',
placeholder: '0.0',
description: 'Default sampling temperature (default: 0.0)'
},
top_p: {
label: 'Top-P',
placeholder: '1.0',
description: 'Default nucleus sampling top-p (default: 1.0)'
},
top_k: {
label: 'Top-K',
placeholder: '0',
description: 'Default top-k sampling (default: 0, disables top-k)'
},
min_p: {
label: 'Min-P',
placeholder: '0.0',
description: 'Default min-p sampling (default: 0.0, disables min-p)'
},
max_tokens: {
label: 'Max Tokens',
placeholder: '512',
description: 'Default maximum number of tokens to generate (default: 512)'
}
}
function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
}
export function isBasicBackendField(key: keyof BackendOptions): boolean {
return key in basicBackendFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
@@ -81,13 +130,61 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
}
export function getBasicBackendFields(): (keyof BackendOptions)[] {
return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[]
export function getBasicBackendFields(backendType?: string): string[] {
if (backendType === 'mlx_lm') {
return Object.keys(basicMlxFieldsConfig)
} else if (backendType === 'llama_cpp') {
return Object.keys(basicLlamaCppFieldsConfig)
}
// Default to LlamaCpp for backward compatibility
return Object.keys(basicLlamaCppFieldsConfig)
}
export function getAdvancedBackendFields(): (keyof BackendOptions)[] {
return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key))
export function getAdvancedBackendFields(backendType?: string): string[] {
if (backendType === 'mlx_lm') {
return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
} else if (backendType === 'llama_cpp') {
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
}
// Default to LlamaCpp for backward compatibility
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
}
// Combined backend fields config for use in BackendFormField
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
...basicLlamaCppFieldsConfig,
...basicMlxFieldsConfig
}
// Get field type for any backend option (union type)
export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' {
// Try to get type from LlamaCpp schema first
try {
if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) {
return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions)
}
} catch {
// Schema might not be available
}
// Try MLX schema
try {
if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) {
return getMlxFieldType(key as keyof MlxBackendOptions)
}
} catch {
// Schema might not be available
}
// Default fallback
return 'text'
}
// Re-export the Zod-based functions
export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'
export { getFieldType } from '@/schemas/instanceOptions'

View File

@@ -1,8 +1,8 @@
import { BackendType } from '@/types/instance'
import { z } from 'zod'
// Define the backend options schema (previously embedded in CreateInstanceOptionsSchema)
export const BackendOptionsSchema = z.object({
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
@@ -170,6 +170,39 @@ export const BackendOptionsSchema = z.object({
fim_qwen_14b_spec: z.boolean().optional(),
})
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Backend options union
export const BackendOptionsSchema = z.union([
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
])
// Define the main create instance options schema
export const CreateInstanceOptionsSchema = z.object({
// Restart options
@@ -180,11 +213,13 @@ export const CreateInstanceOptionsSchema = z.object({
on_demand_start: z.boolean().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP]).optional(),
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
backend_options: BackendOptionsSchema.optional(),
})
// Infer the TypeScript types from the schemas
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
@@ -193,9 +228,14 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
}
// Helper to get all backend option field keys
export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type from Zod schema
@@ -213,9 +253,9 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
return 'text' // ZodString and others default to text
}
// Get field type for backend options
export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = BackendOptionsSchema.shape[key]
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
@@ -226,3 +266,18 @@ export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -3,7 +3,9 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = {
LLAMA_CPP: 'llama_cpp'
LLAMA_CPP: 'llama_cpp',
MLX_LM: 'mlx_lm',
// MLX_VLM: 'mlx_vlm', // Future expansion
} as const
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]