Merge pull request #32 from lordmathis/feat/mlx-backend

feat: Implement mlx-lm backend
2025-11-06 00:54:23 +00:00 · 2025-09-18 20:34:04 +02:00
parent 1f25e9d05b 6a580667ed
commit 3081a1986b
31 changed files with 1140 additions and 176 deletions
--- a/README.md
+++ b/README.md
@@ -2,30 +2,35 @@
 ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
-**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
+**Unified management and routing for llama.cpp and MLX models with web dashboard.**
-## Why llamactl?
+## Features
-🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)  
+### 🚀 Easy Model Management
-🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name  
+- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
-🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
+- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
-🔐 **API Key Authentication**: Separate keys for management vs inference access  
+- **State Persistence**: Ensure instances remain intact across server restarts
-📊 **Instance Monitoring**: Health checks, auto-restart, log management  
+
-⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
+### 🔗 Universal Compatibility
-💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
+- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
-💾 **State Persistence**: Ensure instances remain intact across server restarts  
+- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
 ### 🌐 User-Friendly Interface
 - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
 - **API Key Authentication**: Separate keys for management vs inference access
 ### ⚡ Smart Operations
 - **Instance Monitoring**: Health checks, auto-restart, log management
 - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
 ![Dashboard Screenshot](docs/images/dashboard.png)
 **Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances  
 **Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations  
 **Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
 ## Quick Start
 ```bash
-# 1. Install llama-server (one-time setup)
+# 1. Install backend (one-time setup)
-# See: https://github.com/ggml-org/llama.cpp#quick-start
+# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
 # For MLX on macOS: pip install mlx-lm
 # 2. Download and run llamactl
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -42,15 +47,21 @@ llamactl
 ### Create and manage instances via web dashboard:
 1. Open http://localhost:8080
 2. Click "Create Instance"
-3. Set model path and GPU layers
+3. Choose backend type (llama.cpp or MLX)
-4. Start or stop the instance
+4. Set model path and backend-specific options
 5. Start or stop the instance
 ### Or use the REST API:
 ```bash
-# Create instance
+# Create llama.cpp instance
 curl -X POST localhost:8080/api/v1/instances/my-7b-model \
  -H "Authorization: Bearer your-key" \
-  -d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
+  -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
 # Create MLX instance (macOS)
 curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
 # Use with OpenAI SDK
 curl -X POST localhost:8080/v1/chat/completions \
@@ -85,16 +96,31 @@ go build -o llamactl ./cmd/server
 ## Prerequisites
 ### Backend Dependencies
 **For llama.cpp backend:**
 You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
 ```bash
 # Quick install methods:
 # Homebrew (macOS)
 brew install llama.cpp
 # Or build from source - see llama.cpp docs
 ```
 **For MLX backend (macOS only):**
 You need MLX-LM installed:
 ```bash
 # Install via pip (requires Python 3.8+)
 pip install mlx-lm
 # Or in a virtual environment (recommended)
 python -m venv mlx-env
 source mlx-env/bin/activate
 pip install mlx-lm
 ```
 ## Configuration
 llamactl works out of the box with sensible defaults.
@@ -106,6 +132,10 @@ server:
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs
 backends:
  llama_executable: llama-server # Path to llama-server executable
  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
 instances:
  port_range: [8000, 9000]       # Port range for instances
  data_dir: ~/.local/share/llamactl         # Data directory (platform-specific, see below)
@@ -115,7 +145,6 @@ instances:
  max_instances: -1              # Max instances (-1 = unlimited)
  max_running_instances: -1      # Max running instances (-1 = unlimited)
  enable_lru_eviction: true      # Enable LRU eviction for idle instances
  llama_executable: llama-server # Path to llama-server executable
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
  default_restart_delay: 5       # Restart delay (seconds) for new instances
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -58,7 +58,7 @@ func main() {
 	}
 	// Initialize the instance manager
-	instanceManager := manager.NewInstanceManager(cfg.Instances)
+	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
 	// Create a new handler with the instance manager
 	handler := server.NewHandler(instanceManager, cfg)
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -19,6 +19,10 @@ server:
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs
 backends:
  llama_executable: llama-server # Path to llama-server executable
  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
 instances:
  port_range: [8000, 9000]       # Port range for instances
  data_dir: ~/.local/share/llamactl         # Data directory (platform-specific, see below)
@@ -28,7 +32,6 @@ instances:
  max_instances: -1              # Max instances (-1 = unlimited)
  max_running_instances: -1      # Max running instances (-1 = unlimited)
  enable_lru_eviction: true      # Enable LRU eviction for idle instances
  llama_executable: llama-server # Path to llama-server executable
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
  default_restart_delay: 5       # Restart delay (seconds) for new instances
@@ -79,11 +82,23 @@ server:
  enable_swagger: false   # Enable Swagger UI (default: false)
 ```
-**Environment Variables:**  
+**Environment Variables:**
- `LLAMACTL_HOST` - Server host  
+- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port  
+- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins  
+- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)  
+- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
 ### Backend Configuration
 ```yaml
 backends:
  llama_executable: "llama-server"     # Path to llama-server executable (default: "llama-server")
  mlx_lm_executable: "mlx_lm.server"   # Path to mlx_lm.server executable (default: "mlx_lm.server")
 ```
 **Environment Variables:**
 - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
 - `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
 ### Instance Configuration
@@ -97,7 +112,6 @@ instances:
  max_instances: -1                                 # Maximum instances (-1 = unlimited)
  max_running_instances: -1                         # Maximum running instances (-1 = unlimited)
  enable_lru_eviction: true                         # Enable LRU eviction for idle instances
  llama_executable: "llama-server"                  # Path to llama-server executable
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
  default_restart_delay: 5                          # Default restart delay in seconds
@@ -113,9 +127,8 @@ instances:
 - `LLAMACTL_LOGS_DIR` - Log directory path  
 - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)  
 - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances  
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances  
+- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances  
+- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
 - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable  
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)  
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts  
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds  
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -4,11 +4,14 @@ This guide will walk you through installing Llamactl on your system.
 ## Prerequisites
 ### Backend Dependencies
 llamactl supports multiple backends. Install at least one:
 **For llama.cpp backend (all platforms):**
 You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
 **Quick install methods:**
 ```bash
 # Homebrew (macOS/Linux)
 brew install llama.cpp
@@ -18,6 +21,22 @@ winget install llama.cpp
 Or build from source - see llama.cpp docs
 **For MLX backend (macOS only):**
 MLX provides optimized inference on Apple Silicon. Install MLX-LM:
 ```bash
 # Install via pip (requires Python 3.8+)
 pip install mlx-lm
 # Or in a virtual environment (recommended)
 python -m venv mlx-env
 source mlx-env/bin/activate
 pip install mlx-lm
 ```
 Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
 ## Installation Methods
 ### Option 1: Download Binary (Recommended)
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,22 +1,23 @@
 # Llamactl Documentation
-Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
+Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.**
 ![Dashboard Screenshot](images/dashboard.png)
 ## What is Llamactl?
-Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
+Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support.
 ## Features
-🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)  
+🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
-🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name  
+🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
-🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
+🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
-🔐 **API Key Authentication**: Separate keys for management vs inference access  
+🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
-📊 **Instance Monitoring**: Health checks, auto-restart, log management  
+🔐 **API Key Authentication**: Separate keys for management vs inference access
-⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
+📊 **Instance Monitoring**: Health checks, auto-restart, log management
-💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
+⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
 💾 **State Persistence**: Ensure instances remain intact across server restarts  
 ## Quick Links
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -1,6 +1,6 @@
 # Managing Instances
-Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
+Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.
 ## Overview
@@ -39,40 +39,55 @@ Each instance is displayed as a card showing:
 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
-3. Configure model source (choose one):
+3. **Choose Backend Type**:
-    - **Model Path**: Full path to your downloaded GGUF model file
+    - **llama.cpp**: For GGUF models using llama-server
-    - **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
+    - **MLX**: For MLX-optimized models (macOS only)
-    - **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
+4. Configure model source:
-4. Configure optional instance management settings:
+    - **For llama.cpp**: GGUF model path or HuggingFace repo
    - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
 5. Configure optional instance management settings:
    - **Auto Restart**: Automatically restart instance on failure
    - **Max Restarts**: Maximum number of restart attempts
    - **Restart Delay**: Delay in seconds between restart attempts
    - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
    - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
-5. Configure optional llama-server backend options:
+6. Configure backend-specific options:
-    - **Threads**: Number of CPU threads to use
+    - **llama.cpp**: Threads, context size, GPU layers, port, etc.
-    - **Context Size**: Context window size (ctx_size)
+    - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
-    - **GPU Layers**: Number of layers to offload to GPU
+7. Click **"Create"** to save the instance  
    - **Port**: Network port (auto-assigned by llamactl if not specified)
    - **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
 6. Click **"Create"** to save the instance  
 ### Via API
 ```bash
-# Create instance with local model file
+# Create llama.cpp instance with local model file
-curl -X POST http://localhost:8080/api/instances/my-instance \
+curl -X POST http://localhost:8080/api/instances/my-llama-instance \
  -H "Content-Type: application/json" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "model": "/path/to/model.gguf",
      "threads": 8,
-      "ctx_size": 4096
+      "ctx_size": 4096,
      "gpu_layers": 32
    }
  }'
-# Create instance with HuggingFace model
+# Create MLX instance (macOS only)
 curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
  -H "Content-Type: application/json" \
  -d '{
    "backend_type": "mlx_lm",
    "backend_options": {
      "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
      "temp": 0.7,
      "top_p": 0.9,
      "max_tokens": 2048
    },
    "auto_restart": true,
    "max_restarts": 3
  }'
 # Create llama.cpp instance with HuggingFace model
 curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
  -H "Content-Type: application/json" \
  -d '{
@@ -81,9 +96,7 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
      "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
      "hf_file": "gemma-3-27b-it-GGUF.gguf",
      "gpu_layers": 32
-    },
+    }
    "auto_restart": true,
    "max_restarts": 3
  }'
 ```
@@ -166,14 +179,16 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
 ## Instance Proxy
-Llamactl proxies all requests to the underlying llama-server instances.
+Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).
 ```bash
 # Get instance details
 curl http://localhost:8080/api/instances/{name}/proxy/
 ```
-Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
+Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
 - [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
 - [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
 ### Instance Health
--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -4,4 +4,6 @@ type BackendType string
 const (
 	BackendTypeLlamaCpp BackendType = "llama_cpp"
 	BackendTypeMlxLm    BackendType = "mlx_lm"
 	// BackendTypeMlxVlm BackendType = "mlx_vlm"  // Future expansion
 )
--- a/pkg/backends/mlx/mlx.go
+++ b/pkg/backends/mlx/mlx.go
@@ -0,0 +1,205 @@
 package mlx
 import (
 	"encoding/json"
 	"reflect"
 	"strconv"
 )
 type MlxServerOptions struct {
 	// Basic connection options
 	Model       string `json:"model,omitempty"`
 	Host        string `json:"host,omitempty"`
 	Port        int    `json:"port,omitempty"`
 	// Model and adapter options
 	AdapterPath     string `json:"adapter_path,omitempty"`
 	DraftModel      string `json:"draft_model,omitempty"`
 	NumDraftTokens  int    `json:"num_draft_tokens,omitempty"`
 	TrustRemoteCode bool   `json:"trust_remote_code,omitempty"`
 	// Logging and templates
 	LogLevel                 string `json:"log_level,omitempty"`
 	ChatTemplate             string `json:"chat_template,omitempty"`
 	UseDefaultChatTemplate   bool   `json:"use_default_chat_template,omitempty"`
 	ChatTemplateArgs         string `json:"chat_template_args,omitempty"` // JSON string
 	// Sampling defaults
 	Temp     float64 `json:"temp,omitempty"`      // Note: MLX uses "temp" not "temperature"
 	TopP     float64 `json:"top_p,omitempty"`
 	TopK     int     `json:"top_k,omitempty"`
 	MinP     float64 `json:"min_p,omitempty"`
 	MaxTokens int    `json:"max_tokens,omitempty"`
 }
 // UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
 func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
 	// First unmarshal into a map to handle multiple field names
 	var raw map[string]any
 	if err := json.Unmarshal(data, &raw); err != nil {
 		return err
 	}
 	// Create a temporary struct for standard unmarshaling
 	type tempOptions MlxServerOptions
 	temp := tempOptions{}
 	// Standard unmarshal first
 	if err := json.Unmarshal(data, &temp); err != nil {
 		return err
 	}
 	// Copy to our struct
 	*o = MlxServerOptions(temp)
 	// Handle alternative field names
 	fieldMappings := map[string]string{
 		// Basic connection options
 		"m":            "model",
 		"host":         "host",
 		"port":         "port",
 //     "python_path":  "python_path", // removed
 		// Model and adapter options
 		"adapter-path":      "adapter_path",
 		"draft-model":       "draft_model",
 		"num-draft-tokens":  "num_draft_tokens",
 		"trust-remote-code": "trust_remote_code",
 		// Logging and templates
 		"log-level":                   "log_level",
 		"chat-template":               "chat_template",
 		"use-default-chat-template":   "use_default_chat_template",
 		"chat-template-args":          "chat_template_args",
 		// Sampling defaults
 		"temperature": "temp",        // Support both temp and temperature
 		"top-p":       "top_p",
 		"top-k":       "top_k",
 		"min-p":       "min_p",
 		"max-tokens":  "max_tokens",
 	}
 	// Process alternative field names
 	for altName, canonicalName := range fieldMappings {
 		if value, exists := raw[altName]; exists {
 			// Use reflection to set the field value
 			v := reflect.ValueOf(o).Elem()
 			field := v.FieldByNameFunc(func(fieldName string) bool {
 				field, _ := v.Type().FieldByName(fieldName)
 				jsonTag := field.Tag.Get("json")
 				return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
 			})
 			if field.IsValid() && field.CanSet() {
 				switch field.Kind() {
 				case reflect.Int:
 					if intVal, ok := value.(float64); ok {
 						field.SetInt(int64(intVal))
 					} else if strVal, ok := value.(string); ok {
 						if intVal, err := strconv.Atoi(strVal); err == nil {
 							field.SetInt(int64(intVal))
 						}
 					}
 				case reflect.Float64:
 					if floatVal, ok := value.(float64); ok {
 						field.SetFloat(floatVal)
 					} else if strVal, ok := value.(string); ok {
 						if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
 							field.SetFloat(floatVal)
 						}
 					}
 				case reflect.String:
 					if strVal, ok := value.(string); ok {
 						field.SetString(strVal)
 					}
 				case reflect.Bool:
 					if boolVal, ok := value.(bool); ok {
 						field.SetBool(boolVal)
 					}
 				}
 			}
 		}
 	}
 	return nil
 }
 // NewMlxServerOptions creates MlxServerOptions with MLX defaults
 func NewMlxServerOptions() *MlxServerOptions {
 	return &MlxServerOptions{
 		Host:           "127.0.0.1",  // MLX default (different from llama-server)
 		Port:           8080,         // MLX default
 		NumDraftTokens: 3,            // MLX default for speculative decoding
 		LogLevel:       "INFO",       // MLX default
 		Temp:           0.0,          // MLX default
 		TopP:           1.0,          // MLX default  
 		TopK:           0,            // MLX default (disabled)
 		MinP:           0.0,          // MLX default (disabled)
 		MaxTokens:      512,          // MLX default
 		ChatTemplateArgs: "{}",       // MLX default (empty JSON object)
 	}
 }
 // BuildCommandArgs converts to command line arguments
 func (o *MlxServerOptions) BuildCommandArgs() []string {
 	var args []string
 	// Required and basic options
 	if o.Model != "" {
 		args = append(args, "--model", o.Model)
 	}
 	if o.Host != "" {
 		args = append(args, "--host", o.Host)
 	}
 	if o.Port != 0 {
 		args = append(args, "--port", strconv.Itoa(o.Port))
 	}
 	// Model and adapter options
 	if o.AdapterPath != "" {
 		args = append(args, "--adapter-path", o.AdapterPath)
 	}
 	if o.DraftModel != "" {
 		args = append(args, "--draft-model", o.DraftModel)
 	}
 	if o.NumDraftTokens != 0 {
 		args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
 	}
 	if o.TrustRemoteCode {
 		args = append(args, "--trust-remote-code")
 	}
 	// Logging and templates
 	if o.LogLevel != "" {
 		args = append(args, "--log-level", o.LogLevel)
 	}
 	if o.ChatTemplate != "" {
 		args = append(args, "--chat-template", o.ChatTemplate)
 	}
 	if o.UseDefaultChatTemplate {
 		args = append(args, "--use-default-chat-template")
 	}
 	if o.ChatTemplateArgs != "" {
 		args = append(args, "--chat-template-args", o.ChatTemplateArgs)
 	}
 	// Sampling defaults
 	if o.Temp != 0 {
 		args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
 	}
 	if o.TopP != 0 {
 		args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
 	}
 	if o.TopK != 0 {
 		args = append(args, "--top-k", strconv.Itoa(o.TopK))
 	}
 	if o.MinP != 0 {
 		args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
 	}
 	if o.MaxTokens != 0 {
 		args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
 	}
 	return args
 }
--- a/pkg/backends/mlx/parser.go
+++ b/pkg/backends/mlx/parser.go
@@ -0,0 +1,254 @@
 package mlx
 import (
 	"encoding/json"
 	"fmt"
 	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
 )
 // ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
 // Supports multiple formats:
 // 1. Full command: "mlx_lm.server --model model/path"
 // 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
 // 3. Args only: "--model model/path --host 0.0.0.0"
 // 4. Multiline commands with backslashes
 func ParseMlxCommand(command string) (*MlxServerOptions, error) {
 	// 1. Normalize the command - handle multiline with backslashes
 	trimmed := normalizeMultilineCommand(command)
 	if trimmed == "" {
 		return nil, fmt.Errorf("command cannot be empty")
 	}
 	// 2. Extract arguments from command
 	args, err := extractArgumentsFromCommand(trimmed)
 	if err != nil {
 		return nil, err
 	}
 	// 3. Parse arguments into map
 	options := make(map[string]any)
 	i := 0
 	for i < len(args) {
 		arg := args[i]
 		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
 			i++
 			continue
 		}
 		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
 		if strings.HasPrefix(arg, "---") {
 			return nil, fmt.Errorf("malformed flag: %s", arg)
 		}
 		// Unified parsing for --flag=value vs --flag value
 		var rawFlag, rawValue string
 		hasEquals := false
 		if strings.Contains(arg, "=") {
 			parts := strings.SplitN(arg, "=", 2)
 			rawFlag = parts[0]
 			rawValue = parts[1] // may be empty string
 			hasEquals = true
 		} else {
 			rawFlag = arg
 		}
 		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
 		flagName := strings.ReplaceAll(flagCore, "-", "_")
 		// Detect value if not in equals form
 		valueProvided := hasEquals
 		if !hasEquals {
 			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
 				rawValue = args[i+1]
 				valueProvided = true
 			}
 		}
 		if valueProvided {
 			// MLX-specific validation for certain flags
 			if flagName == "log_level" && !isValidLogLevel(rawValue) {
 				return nil, fmt.Errorf("invalid log level: %s", rawValue)
 			}
 			options[flagName] = parseValue(rawValue)
 			// Advance index: if we consumed a following token as value (non equals form), skip it
 			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
 				i += 2
 			} else {
 				i++
 			}
 			continue
 		}
 		// Boolean flag (no value) - MLX specific boolean flags
 		if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
 			options[flagName] = true
 		} else {
 			options[flagName] = true
 		}
 		i++
 	}
 	// 4. Convert to MlxServerOptions using existing UnmarshalJSON
 	jsonData, err := json.Marshal(options)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
 	}
 	var mlxOptions MlxServerOptions
 	if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
 		return nil, fmt.Errorf("failed to parse command options: %w", err)
 	}
 	// 5. Return MlxServerOptions
 	return &mlxOptions, nil
 }
 // isValidLogLevel validates MLX log levels
 func isValidLogLevel(level string) bool {
 	validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
 	for _, valid := range validLevels {
 		if level == valid {
 			return true
 		}
 	}
 	return false
 }
 // parseValue attempts to parse a string value into the most appropriate type
 func parseValue(value string) any {
 	// Surrounding matching quotes (single or double)
 	if l := len(value); l >= 2 {
 		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
 			value = value[1 : l-1]
 		}
 	}
 	lower := strings.ToLower(value)
 	if lower == "true" {
 		return true
 	}
 	if lower == "false" {
 		return false
 	}
 	if intVal, err := strconv.Atoi(value); err == nil {
 		return intVal
 	}
 	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
 		return floatVal
 	}
 	return value
 }
 // normalizeMultilineCommand handles multiline commands with backslashes
 func normalizeMultilineCommand(command string) string {
 	// Handle escaped newlines (backslash followed by newline)
 	re := regexp.MustCompile(`\\\s*\n\s*`)
 	normalized := re.ReplaceAllString(command, " ")
 	// Clean up extra whitespace
 	re = regexp.MustCompile(`\s+`)
 	normalized = re.ReplaceAllString(normalized, " ")
 	return strings.TrimSpace(normalized)
 }
 // extractArgumentsFromCommand extracts arguments from various command formats
 func extractArgumentsFromCommand(command string) ([]string, error) {
 	// Split command into tokens respecting quotes
 	tokens, err := splitCommandTokens(command)
 	if err != nil {
 		return nil, err
 	}
 	if len(tokens) == 0 {
 		return nil, fmt.Errorf("no command tokens found")
 	}
 	// Check if first token looks like an executable
 	firstToken := tokens[0]
 	// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
 	if strings.Contains(firstToken, string(filepath.Separator)) ||
 		strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
 		return tokens[1:], nil // Return everything except the executable
 	}
 	// Case 2: Just "mlx_lm.server" command
 	if strings.ToLower(firstToken) == "mlx_lm.server" {
 		return tokens[1:], nil // Return everything except the command
 	}
 	// Case 3: Arguments only (starts with a flag)
 	if strings.HasPrefix(firstToken, "-") {
 		return tokens, nil // Return all tokens as arguments
 	}
 	// Case 4: Unknown format - might be a different executable name
 	// Be permissive and assume it's the executable
 	return tokens[1:], nil
 }
 // splitCommandTokens splits a command string into tokens, respecting quotes
 func splitCommandTokens(command string) ([]string, error) {
 	var tokens []string
 	var current strings.Builder
 	inQuotes := false
 	quoteChar := byte(0)
 	escaped := false
 	for i := 0; i < len(command); i++ {
 		c := command[i]
 		if escaped {
 			current.WriteByte(c)
 			escaped = false
 			continue
 		}
 		if c == '\\' {
 			escaped = true
 			current.WriteByte(c)
 			continue
 		}
 		if !inQuotes && (c == '"' || c == '\'') {
 			inQuotes = true
 			quoteChar = c
 			current.WriteByte(c)
 		} else if inQuotes && c == quoteChar {
 			inQuotes = false
 			quoteChar = 0
 			current.WriteByte(c)
 		} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
 			if current.Len() > 0 {
 				tokens = append(tokens, current.String())
 				current.Reset()
 			}
 		} else {
 			current.WriteByte(c)
 		}
 	}
 	if inQuotes {
 		return nil, fmt.Errorf("unclosed quote in command")
 	}
 	if current.Len() > 0 {
 		tokens = append(tokens, current.String())
 	}
 	return tokens, nil
 }
 // isFlag checks if a string looks like a command line flag
 func isFlag(s string) bool {
 	return strings.HasPrefix(s, "-")
 }
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -10,9 +10,19 @@ import (
 	"gopkg.in/yaml.v3"
 )
 // BackendConfig contains backend executable configurations
 type BackendConfig struct {
 	// Path to llama-server executable (llama.cpp backend)
 	LlamaExecutable string `yaml:"llama_executable"`
 	// Path to mlx_lm executable (MLX-LM backend)
 	MLXLMExecutable string `yaml:"mlx_lm_executable"`
 }
 // AppConfig represents the configuration for llamactl
 type AppConfig struct {
 	Server     ServerConfig    `yaml:"server"`
 	Backends   BackendConfig   `yaml:"backends"`
 	Instances  InstancesConfig `yaml:"instances"`
 	Auth       AuthConfig      `yaml:"auth"`
 	Version    string          `yaml:"-"`
@@ -61,9 +71,6 @@ type InstancesConfig struct {
 	// Enable LRU eviction for instance logs
 	EnableLRUEviction bool `yaml:"enable_lru_eviction"`
 	// Path to llama-server executable
 	LlamaExecutable string `yaml:"llama_executable"`
 	// Default auto-restart setting for new instances
 	DefaultAutoRestart bool `yaml:"default_auto_restart"`
@@ -112,6 +119,10 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			AllowedOrigins: []string{"*"}, // Default to allow all origins
 			EnableSwagger:  false,
 		},
 		Backends: BackendConfig{
 			LlamaExecutable: "llama-server",
 			MLXLMExecutable: "mlx_lm.server",
 		},
 		Instances: InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
 			DataDir:              getDefaultDataDirectory(),
@@ -121,7 +132,6 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			MaxInstances:         -1, // -1 means unlimited
 			MaxRunningInstances:  -1, // -1 means unlimited
 			EnableLRUEviction:    true,
 			LlamaExecutable:      "llama-server",
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
@@ -229,8 +239,12 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.EnableLRUEviction = b
 		}
 	}
 	// Backend config
 	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
-		cfg.Instances.LlamaExecutable = llamaExec
+		cfg.Backends.LlamaExecutable = llamaExec
 	}
 	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
 		cfg.Backends.MLXLMExecutable = mlxLMExec
 	}
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -42,9 +42,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
 	if cfg.Instances.MaxInstances != -1 {
 		t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
 	}
 	if cfg.Instances.LlamaExecutable != "llama-server" {
 		t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable)
 	}
 	if !cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected default auto restart to be true")
 	}
@@ -101,9 +98,6 @@ instances:
 	if cfg.Instances.MaxInstances != 5 {
 		t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
 	}
 	if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" {
 		t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable)
 	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
 	}
@@ -156,8 +150,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 	if cfg.Instances.MaxInstances != 20 {
 		t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Instances.LlamaExecutable != "/env/llama-server" {
+	if cfg.Backends.LlamaExecutable != "/env/llama-server" {
-		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable)
+		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
 	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -31,9 +31,10 @@ func (realTimeProvider) Now() time.Time {
 // Process represents a running instance of the llama server
 type Process struct {
-	Name           string                 `json:"name"`
+	Name                   string                 `json:"name"`
-	options        *CreateInstanceOptions `json:"-"`
+	options                *CreateInstanceOptions `json:"-"`
-	globalSettings *config.InstancesConfig
+	globalInstanceSettings *config.InstancesConfig
 	globalBackendSettings  *config.BackendConfig
 	// Status
 	Status         InstanceStatus `json:"status"`
@@ -65,22 +66,23 @@ type Process struct {
 }
 // NewInstance creates a new instance with the given name, log path, and options
-func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
+func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
 	// Validate and copy options
-	options.ValidateAndApplyDefaults(name, globalSettings)
+	options.ValidateAndApplyDefaults(name, globalInstanceSettings)
 	// Create the instance logger
-	logger := NewInstanceLogger(name, globalSettings.LogsDir)
+	logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
 	return &Process{
-		Name:           name,
+		Name:                   name,
-		options:        options,
+		options:                options,
-		globalSettings: globalSettings,
+		globalInstanceSettings: globalInstanceSettings,
-		logger:         logger,
+		globalBackendSettings:  globalBackendSettings,
-		timeProvider:   realTimeProvider{},
+		logger:                 logger,
-		Created:        time.Now().Unix(),
+		timeProvider:           realTimeProvider{},
-		Status:         Stopped,
+		Created:                time.Now().Unix(),
-		onStatusChange: onStatusChange,
+		Status:                 Stopped,
 		onStatusChange:         onStatusChange,
 	}
 }
@@ -96,7 +98,13 @@ func (i *Process) GetPort() int {
 	if i.options != nil {
 		switch i.options.BackendType {
 		case backends.BackendTypeLlamaCpp:
-			return i.options.LlamaServerOptions.Port
+			if i.options.LlamaServerOptions != nil {
 				return i.options.LlamaServerOptions.Port
 			}
 		case backends.BackendTypeMlxLm:
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Port
 			}
 		}
 	}
 	return 0
@@ -108,7 +116,13 @@ func (i *Process) GetHost() string {
 	if i.options != nil {
 		switch i.options.BackendType {
 		case backends.BackendTypeLlamaCpp:
-			return i.options.LlamaServerOptions.Host
+			if i.options.LlamaServerOptions != nil {
 				return i.options.LlamaServerOptions.Host
 			}
 		case backends.BackendTypeMlxLm:
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Host
 			}
 		}
 	}
 	return ""
@@ -124,7 +138,7 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
 	}
 	// Validate and copy options
-	options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
+	options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
 	i.options = options
 	// Clear the proxy so it gets recreated with new options
@@ -153,8 +167,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 	var port int
 	switch i.options.BackendType {
 	case backends.BackendTypeLlamaCpp:
-		host = i.options.LlamaServerOptions.Host
+		if i.options.LlamaServerOptions != nil {
-		port = i.options.LlamaServerOptions.Port
+			host = i.options.LlamaServerOptions.Host
 			port = i.options.LlamaServerOptions.Port
 		}
 	case backends.BackendTypeMlxLm:
 		if i.options.MlxServerOptions != nil {
 			host = i.options.MlxServerOptions.Host
 			port = i.options.MlxServerOptions.Port
 		}
 	}
 	targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
@@ -215,7 +236,7 @@ func (i *Process) UnmarshalJSON(data []byte) error {
 	// Handle options with validation and defaults
 	if aux.Options != nil {
-		aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
+		aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
 		i.options = aux.Options
 	}
--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -11,6 +11,11 @@ import (
 )
 func TestNewInstance(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -29,7 +34,7 @@ func TestNewInstance(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	if inst.Name != "test-instance" {
 		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -60,6 +65,11 @@ func TestNewInstance(t *testing.T) {
 }
 func TestNewInstance_WithRestartOptions(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -85,7 +95,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	opts := instance.GetOptions()
 	// Check that explicit values override defaults
@@ -101,6 +111,11 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
 }
 func TestSetOptions(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -119,7 +134,7 @@ func TestSetOptions(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
 	// Update options
 	newOptions := &instance.CreateInstanceOptions{
@@ -147,6 +162,11 @@ func TestSetOptions(t *testing.T) {
 }
 func TestGetProxy(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -162,7 +182,7 @@ func TestGetProxy(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	// Get proxy for the first time
 	proxy1, err := inst.GetProxy()
@@ -184,6 +204,11 @@ func TestGetProxy(t *testing.T) {
 }
 func TestMarshalJSON(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -202,7 +227,7 @@ func TestMarshalJSON(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	data, err := json.Marshal(instance)
 	if err != nil {
@@ -338,6 +363,11 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 		},
 	}
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -356,7 +386,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 			// Mock onStatusChange function
 			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-			instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange)
+			instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
 			opts := instance.GetOptions()
 			if opts.MaxRestarts == nil {
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -9,6 +9,8 @@ import (
 	"runtime"
 	"syscall"
 	"time"
 	"llamactl/pkg/backends"
 )
 // Start starts the llama server instance and returns an error if it fails.
@@ -41,7 +43,20 @@ func (i *Process) Start() error {
 	args := i.options.BuildCommandArgs()
 	i.ctx, i.cancel = context.WithCancel(context.Background())
-	i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
+
 	var executable string
 	// Get executable from global configuration
 	switch i.options.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		executable = i.globalBackendSettings.LlamaExecutable
 	case backends.BackendTypeMlxLm:
 		executable = i.globalBackendSettings.MLXLMExecutable
 	default:
 		return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
 	}
 	i.cmd = exec.CommandContext(i.ctx, executable, args...)
 	if runtime.GOOS != "windows" {
 		setProcAttrs(i.cmd)
@@ -175,9 +190,16 @@ func (i *Process) WaitForHealthy(timeout int) error {
 	var host string
 	var port int
 	switch opts.BackendType {
-	case "llama-cpp":
+	case backends.BackendTypeLlamaCpp:
-		host = opts.LlamaServerOptions.Host
+		if opts.LlamaServerOptions != nil {
-		port = opts.LlamaServerOptions.Port
+			host = opts.LlamaServerOptions.Host
 			port = opts.LlamaServerOptions.Port
 		}
 	case backends.BackendTypeMlxLm:
 		if opts.MlxServerOptions != nil {
 			host = opts.MlxServerOptions.Host
 			port = opts.MlxServerOptions.Port
 		}
 	}
 	if host == "" {
 		host = "localhost"
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/config"
 	"log"
 )
@@ -22,8 +23,9 @@ type CreateInstanceOptions struct {
 	BackendType    backends.BackendType `json:"backend_type"`
 	BackendOptions map[string]any       `json:"backend_options,omitempty"`
-	// LlamaServerOptions contains the options for the llama server
+	// Backend-specific options
 	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
 	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
 }
 // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -55,6 +57,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 				return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
 			}
 		}
 	case backends.BackendTypeMlxLm:
 		if c.BackendOptions != nil {
 			optionsData, err := json.Marshal(c.BackendOptions)
 			if err != nil {
 				return fmt.Errorf("failed to marshal backend options: %w", err)
 			}
 			c.MlxServerOptions = &mlx.MlxServerOptions{}
 			if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
 				return fmt.Errorf("failed to unmarshal MLX options: %w", err)
 			}
 		}
 	default:
 		return fmt.Errorf("unknown backend type: %s", c.BackendType)
 	}
@@ -72,19 +86,36 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
 		Alias: (*Alias)(c),
 	}
-	// Convert LlamaServerOptions back to BackendOptions map for JSON
+	// Convert backend-specific options back to BackendOptions map for JSON
-	if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil {
+	switch c.BackendType {
-		data, err := json.Marshal(c.LlamaServerOptions)
+	case backends.BackendTypeLlamaCpp:
-		if err != nil {
+		if c.LlamaServerOptions != nil {
-			return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
+			data, err := json.Marshal(c.LlamaServerOptions)
-		}
+			if err != nil {
 				return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
 			}
-		var backendOpts map[string]any
+			var backendOpts map[string]any
-		if err := json.Unmarshal(data, &backendOpts); err != nil {
+			if err := json.Unmarshal(data, &backendOpts); err != nil {
-			return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
+				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
-		}
+			}
-		aux.BackendOptions = backendOpts
+			aux.BackendOptions = backendOpts
 		}
 	case backends.BackendTypeMlxLm:
 		if c.MlxServerOptions != nil {
 			data, err := json.Marshal(c.MlxServerOptions)
 			if err != nil {
 				return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
 			}
 			var backendOpts map[string]any
 			if err := json.Unmarshal(data, &backendOpts); err != nil {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}
 			aux.BackendOptions = backendOpts
 		}
 	}
 	return json.Marshal(aux)
@@ -136,6 +167,10 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
 		if c.LlamaServerOptions != nil {
 			return c.LlamaServerOptions.BuildCommandArgs()
 		}
 	case backends.BackendTypeMlxLm:
 		if c.MlxServerOptions != nil {
 			return c.MlxServerOptions.BuildCommandArgs()
 		}
 	}
 	return []string{}
 }
--- a/pkg/instance/timeout_test.go
+++ b/pkg/instance/timeout_test.go
@@ -33,6 +33,11 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
 // Timeout-related tests
 func TestUpdateLastRequestTime(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -47,13 +52,18 @@ func TestUpdateLastRequestTime(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	// Test that UpdateLastRequestTime doesn't panic
 	inst.UpdateLastRequestTime()
 }
 func TestShouldTimeout_NotRunning(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -70,7 +80,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	// Instance is not running, should not timeout regardless of configuration
 	if inst.ShouldTimeout() {
@@ -79,6 +89,11 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
 }
 func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -105,7 +120,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 				},
 			}
-			inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+			inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 			// Simulate running state
 			inst.SetStatus(instance.Running)
@@ -117,6 +132,11 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 }
 func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -133,7 +153,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	inst.SetStatus(instance.Running)
 	// Update last request time to now
@@ -146,6 +166,11 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 }
 func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -162,7 +187,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	inst.SetStatus(instance.Running)
 	// Use MockTimeProvider to simulate old last request time
@@ -181,6 +206,11 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 }
 func TestTimeoutConfiguration_Validation(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -209,7 +239,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
 			// Mock onStatusChange function
 			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-			inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+			inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 			opts := inst.GetOptions()
 			if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -35,6 +35,7 @@ type instanceManager struct {
 	runningInstances map[string]struct{}
 	ports            map[int]bool
 	instancesConfig  config.InstancesConfig
 	backendsConfig   config.BackendConfig
 	// Timeout checker
 	timeoutChecker *time.Ticker
@@ -44,7 +45,7 @@ type instanceManager struct {
 }
 // NewInstanceManager creates a new instance of InstanceManager.
-func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
+func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
 	if instancesConfig.TimeoutCheckInterval <= 0 {
 		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 	}
@@ -53,6 +54,7 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager
 		runningInstances: make(map[string]struct{}),
 		ports:            make(map[int]bool),
 		instancesConfig:  instancesConfig,
 		backendsConfig:   backendsConfig,
 		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
 		shutdownChan:   make(chan struct{}),
@@ -241,7 +243,7 @@ func (im *instanceManager) loadInstance(name, path string) error {
 	}
 	// Create new inst using NewInstance (handles validation, defaults, setup)
-	inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
+	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
 	// Restore persisted fields that NewInstance doesn't set
 	inst.Created = persistedInstance.Created
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -15,18 +15,22 @@ import (
 )
 func TestNewInstanceManager(t *testing.T) {
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		LogsDir:              "/tmp/test",
 		MaxInstances:         5,
 		LlamaExecutable:      "llama-server",
 		DefaultAutoRestart:   true,
 		DefaultMaxRestarts:   3,
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
-	mgr := manager.NewInstanceManager(cfg)
+	mgr := manager.NewInstanceManager(backendConfig, cfg)
 	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}
@@ -44,6 +48,11 @@ func TestNewInstanceManager(t *testing.T) {
 func TestPersistence(t *testing.T) {
 	tempDir := t.TempDir()
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		InstancesDir:         tempDir,
@@ -52,7 +61,7 @@ func TestPersistence(t *testing.T) {
 	}
 	// Test instance persistence on creation
-	manager1 := manager.NewInstanceManager(cfg)
+	manager1 := manager.NewInstanceManager(backendConfig, cfg)
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -73,7 +82,7 @@ func TestPersistence(t *testing.T) {
 	}
 	// Test loading instances from disk
-	manager2 := manager.NewInstanceManager(cfg)
+	manager2 := manager.NewInstanceManager(backendConfig, cfg)
 	instances, err := manager2.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
@@ -172,15 +181,19 @@ func TestShutdown(t *testing.T) {
 // Helper function to create a test manager with standard config
 func createTestManager() manager.InstanceManager {
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		LogsDir:              "/tmp/test",
 		MaxInstances:         10,
 		LlamaExecutable:      "llama-server",
 		DefaultAutoRestart:   true,
 		DefaultMaxRestarts:   3,
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
-	return manager.NewInstanceManager(cfg)
+	return manager.NewInstanceManager(backendConfig, cfg)
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -62,7 +62,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 		im.onStatusChange(name, oldStatus, newStatus)
 	}
-	inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback)
+	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
 	im.instances[inst.Name] = inst
 	if err := im.persistInstance(inst); err != nil {
@@ -260,6 +260,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
 		if options.LlamaServerOptions != nil {
 			return options.LlamaServerOptions.Port
 		}
 	case backends.BackendTypeMlxLm:
 		if options.MlxServerOptions != nil {
 			return options.MlxServerOptions.Port
 		}
 	}
 	return 0
 }
@@ -271,6 +275,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
 		if options.LlamaServerOptions != nil {
 			options.LlamaServerOptions.Port = port
 		}
 	case backends.BackendTypeMlxLm:
 		if options.MlxServerOptions != nil {
 			options.MlxServerOptions.Port = port
 		}
 	}
 }
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -62,12 +62,16 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	}
 	// Test max instances limit
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
-	limitedManager := manager.NewInstanceManager(cfg)
+	limitedManager := manager.NewInstanceManager(backendConfig, cfg)
 	_, err = limitedManager.CreateInstance("instance1", options)
 	if err != nil {
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -13,13 +13,17 @@ import (
 func TestTimeoutFunctionality(t *testing.T) {
 	// Test timeout checker initialization
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		TimeoutCheckInterval: 10,
 		MaxInstances:         5,
 	}
-	manager := manager.NewInstanceManager(cfg)
+	manager := manager.NewInstanceManager(backendConfig, cfg)
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -7,6 +7,7 @@ import (
 	"io"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
@@ -684,3 +685,57 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
 		}
 	}
 }
 // ParseMlxCommand godoc
 // @Summary Parse mlx_lm.server command
 // @Description Parses MLX-LM server command string into instance options
 // @Tags backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Router /backends/mlx/parse-command [post]
 func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 	type errorResponse struct {
 		Error   string `json:"error"`
 		Details string `json:"details,omitempty"`
 	}
 	writeError := func(w http.ResponseWriter, status int, code, details string) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(status)
 		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
 	}
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req ParseCommandRequest
 		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
 			return
 		}
 		if strings.TrimSpace(req.Command) == "" {
 			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
 			return
 		}
 		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
 			return
 		}
 		// Currently only support mlx_lm backend type
 		backendType := backends.BackendTypeMlxLm
 		options := &instance.CreateInstanceOptions{
 			BackendType:      backendType,
 			MlxServerOptions: mlxOptions,
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(options); err != nil {
 			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
 		}
 	}
 }
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -55,6 +55,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			r.Route("/llama-cpp", func(r chi.Router) {
 				r.Post("/parse-command", handler.ParseLlamaCommand())
 			})
 			r.Route("/mlx", func(r chi.Router) {
 				r.Post("/parse-command", handler.ParseMlxCommand())
 			})
 		})
 		// Instance management endpoints
--- a/pkg/validation/validation.go
+++ b/pkg/validation/validation.go
@@ -44,6 +44,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
 	switch options.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		return validateLlamaCppOptions(options)
 	case backends.BackendTypeMlxLm:
 		return validateMlxOptions(options)
 	default:
 		return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
 	}
@@ -68,6 +70,24 @@ func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
 	return nil
 }
 // validateMlxOptions validates MLX backend specific options
 func validateMlxOptions(options *instance.CreateInstanceOptions) error {
 	if options.MlxServerOptions == nil {
 		return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
 	}
 	if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
 		return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
 	}
 	return nil
 }
 // validateStructStrings recursively validates all string fields in a struct
 func validateStructStrings(v any, fieldPath string) error {
 	val := reflect.ValueOf(v)
--- a/webui/src/components/BackendFormField.tsx
+++ b/webui/src/components/BackendFormField.tsx
@@ -2,11 +2,10 @@ import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 import { Checkbox } from '@/components/ui/checkbox'
 import type { BackendOptions } from '@/schemas/instanceOptions'
 import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
 interface BackendFormFieldProps {
-  fieldKey: keyof BackendOptions
+  fieldKey: string
  value: string | number | boolean | string[] | undefined
  onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
 }
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -41,8 +41,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  // Get field lists dynamically from the type
  const basicFields = getBasicFields();
  const advancedFields = getAdvancedFields();
-  const basicBackendFields = getBasicBackendFields();
+  const basicBackendFields = getBasicBackendFields(formData.backend_type);
-  const advancedBackendFields = getAdvancedBackendFields();
+  const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
  // Reset form when dialog opens/closes or when instance changes
  useEffect(() => {
@@ -66,10 +66,21 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  }, [open, instance]);
  const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
-    setFormData((prev) => ({
+    setFormData((prev) => {
-      ...prev,
+      // If backend_type is changing, clear backend_options
-      [key]: value,
+      if (key === 'backend_type' && prev.backend_type !== value) {
-    }));
+        return {
          ...prev,
          [key]: value,
          backend_options: {}, // Clear backend options when backend type changes
        };
      }
      return {
        ...prev,
        [key]: value,
      };
    });
  };
  const handleBackendFieldChange = (key: string, value: any) => {
@@ -78,7 +89,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
      backend_options: {
        ...prev.backend_options,
        [key]: value,
-      },
+      } as any,
    }));
  };
@@ -260,7 +271,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
                <BackendFormField
                  key={fieldKey}
                  fieldKey={fieldKey}
-                  value={formData.backend_options?.[fieldKey]}
+                  value={(formData.backend_options as any)?.[fieldKey]}
                  onChange={handleBackendFieldChange}
                />
              ))}
@@ -345,7 +356,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
                        <BackendFormField
                          key={fieldKey}
                          fieldKey={fieldKey}
-                          value={formData.backend_options?.[fieldKey]}
+                          value={(formData.backend_options as any)?.[fieldKey]}
                          onChange={handleBackendFieldChange}
                        />
                      ))}
--- a/webui/src/components/ZodFormField.tsx
+++ b/webui/src/components/ZodFormField.tsx
@@ -2,8 +2,7 @@ import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 import { Checkbox } from '@/components/ui/checkbox'
-import type { CreateInstanceOptions } from '@/types/instance'
+import { BackendType, type CreateInstanceOptions } from '@/types/instance'
 import { BackendType } from '@/types/instance'
 import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
 interface ZodFormFieldProps {
@@ -39,7 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
            className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
          >
            <option value={BackendType.LLAMA_CPP}>Llama Server</option>
-            {/* Add more backend types here as they become available */}
+            <option value={BackendType.MLX_LM}>MLX LM</option>
          </select>
          {config.description && (
            <p className="text-sm text-muted-foreground">{config.description}</p>
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -93,6 +93,14 @@ export const backendsApi = {
        body: JSON.stringify({ command }),
      }),
  },
  mlx: {
    // POST /backends/mlx/parse-command
    parseCommand: (command: string) =>
      apiCall<CreateInstanceOptions>('/backends/mlx/parse-command', {
        method: 'POST',
        body: JSON.stringify({ command }),
      }),
  },
 };
 // Instance API functions
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,4 +1,15 @@
-import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions'
+import { 
  type CreateInstanceOptions, 
  type LlamaCppBackendOptions, 
  type MlxBackendOptions,
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
  getAllFieldKeys, 
  getAllLlamaCppFieldKeys,
  getAllMlxFieldKeys,
  getLlamaCppFieldType,
  getMlxFieldType
 } from '@/schemas/instanceOptions'
 // Instance-level basic fields (not backend-specific)
 export const basicFieldsConfig: Record<string, {
@@ -36,8 +47,8 @@ export const basicFieldsConfig: Record<string, {
  }
 }
-// Backend-specific basic fields (these go in backend_options)
+// LlamaCpp backend-specific basic fields
-export const basicBackendFieldsConfig: Record<string, {
+const basicLlamaCppFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
@@ -46,7 +57,8 @@ export const basicBackendFieldsConfig: Record<string, {
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
-    description: 'Path to the model file'
+    description: 'Path to the model file',
    required: true
  },
  hf_repo: {
    label: 'Hugging Face Repository',
@@ -65,13 +77,50 @@ export const basicBackendFieldsConfig: Record<string, {
  }
 }
-export function isBasicField(key: keyof CreateInstanceOptions): boolean {
+// MLX backend-specific basic fields
 const basicMlxFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
  required?: boolean
 }> = {
  model: {
    label: 'Model',
    placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
    description: 'The path to the MLX model weights, tokenizer, and config',
    required: true
  },
  temp: {
    label: 'Temperature',
    placeholder: '0.0',
    description: 'Default sampling temperature (default: 0.0)'
  },
  top_p: {
    label: 'Top-P',
    placeholder: '1.0',
    description: 'Default nucleus sampling top-p (default: 1.0)'
  },
  top_k: {
    label: 'Top-K',
    placeholder: '0',
    description: 'Default top-k sampling (default: 0, disables top-k)'
  },
  min_p: {
    label: 'Min-P',
    placeholder: '0.0',
    description: 'Default min-p sampling (default: 0.0, disables min-p)'
  },
  max_tokens: {
    label: 'Max Tokens',
    placeholder: '512',
    description: 'Default maximum number of tokens to generate (default: 512)'
  }
 }
 function isBasicField(key: keyof CreateInstanceOptions): boolean {
  return key in basicFieldsConfig
 }
 export function isBasicBackendField(key: keyof BackendOptions): boolean {
  return key in basicBackendFieldsConfig
 }
 export function getBasicFields(): (keyof CreateInstanceOptions)[] {
  return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
@@ -81,13 +130,61 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
  return getAllFieldKeys().filter(key => !isBasicField(key))
 }
-export function getBasicBackendFields(): (keyof BackendOptions)[] {
+
-  return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[]
+export function getBasicBackendFields(backendType?: string): string[] {
  if (backendType === 'mlx_lm') {
    return Object.keys(basicMlxFieldsConfig)
  } else if (backendType === 'llama_cpp') {
    return Object.keys(basicLlamaCppFieldsConfig)
  }
  // Default to LlamaCpp for backward compatibility
  return Object.keys(basicLlamaCppFieldsConfig)
 }
-export function getAdvancedBackendFields(): (keyof BackendOptions)[] {
+export function getAdvancedBackendFields(backendType?: string): string[] {
-  return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key))
+  if (backendType === 'mlx_lm') {
    return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
  } else if (backendType === 'llama_cpp') {
    return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
  }
  // Default to LlamaCpp for backward compatibility
  return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
 }
 // Combined backend fields config for use in BackendFormField
 export const basicBackendFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
  required?: boolean
 }> = {
  ...basicLlamaCppFieldsConfig,
  ...basicMlxFieldsConfig
 }
 // Get field type for any backend option (union type)
 export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' {
  // Try to get type from LlamaCpp schema first
  try {
    if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) {
      return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions)
    }
  } catch {
    // Schema might not be available
  }
  // Try MLX schema
  try {
    if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) {
      return getMlxFieldType(key as keyof MlxBackendOptions)
    }
  } catch {
    // Schema might not be available
  }
  // Default fallback
  return 'text'
 }
 // Re-export the Zod-based functions
-export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'
+export { getFieldType } from '@/schemas/instanceOptions'
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -1,8 +1,8 @@
 import { BackendType } from '@/types/instance'
 import { z } from 'zod'
-// Define the backend options schema (previously embedded in CreateInstanceOptionsSchema)
+// Define the LlamaCpp backend options schema
-export const BackendOptionsSchema = z.object({
+export const LlamaCppBackendOptionsSchema = z.object({
  // Common params
  verbose_prompt: z.boolean().optional(),
  threads: z.number().optional(),
@@ -170,6 +170,39 @@ export const BackendOptionsSchema = z.object({
  fim_qwen_14b_spec: z.boolean().optional(),
 })
 // Define the MLX backend options schema
 export const MlxBackendOptionsSchema = z.object({
  // Basic connection options
  model: z.string().optional(),
  host: z.string().optional(),
  port: z.number().optional(),
  // Model and adapter options
  adapter_path: z.string().optional(),
  draft_model: z.string().optional(),
  num_draft_tokens: z.number().optional(),
  trust_remote_code: z.boolean().optional(),
  // Logging and templates
  log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
  chat_template: z.string().optional(),
  use_default_chat_template: z.boolean().optional(),
  chat_template_args: z.string().optional(), // JSON string
  // Sampling defaults
  temp: z.number().optional(),     // Note: MLX uses "temp" not "temperature"
  top_p: z.number().optional(),
  top_k: z.number().optional(),
  min_p: z.number().optional(),
  max_tokens: z.number().optional(),
 })
 // Backend options union
 export const BackendOptionsSchema = z.union([
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
 ])
 // Define the main create instance options schema
 export const CreateInstanceOptionsSchema = z.object({
  // Restart options
@@ -180,11 +213,13 @@ export const CreateInstanceOptionsSchema = z.object({
  on_demand_start: z.boolean().optional(),
  // Backend configuration
-  backend_type: z.enum([BackendType.LLAMA_CPP]).optional(),
+  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
  backend_options: BackendOptionsSchema.optional(),
 })
 // Infer the TypeScript types from the schemas
 export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
 export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
 export type BackendOptions = z.infer<typeof BackendOptionsSchema>
 export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
@@ -193,9 +228,14 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
  return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
 }
-// Helper to get all backend option field keys
+// Helper to get all LlamaCpp backend option field keys
-export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
+export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
-  return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
+  return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
 }
 // Helper to get all MLX backend option field keys
 export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
  return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
 }
 // Get field type from Zod schema
@@ -213,9 +253,9 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
  return 'text' // ZodString and others default to text
 }
-// Get field type for backend options
+// Get field type for LlamaCpp backend options
-export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
+export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
-  const fieldSchema = BackendOptionsSchema.shape[key]
+  const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
@@ -225,4 +265,19 @@ export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  return 'text' // ZodString and others default to text
 }
 // Get field type for MLX backend options
 export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
  const fieldSchema = MlxBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
  return 'text' // ZodString and others default to text
 }
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -3,7 +3,9 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
 export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
 export const BackendType = {
-  LLAMA_CPP: 'llama_cpp'
+  LLAMA_CPP: 'llama_cpp',
  MLX_LM: 'mlx_lm',
  // MLX_VLM: 'mlx_vlm',  // Future expansion
 } as const
 export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]