Merge pull request #32 from lordmathis/feat/mlx-backend

feat: Implement mlx-lm backend
2025-12-25 02:24:22 +00:00 · 2025-09-18 20:34:04 +02:00
parent 1f25e9d05b 6a580667ed
commit 3081a1986b
31 changed files with 1140 additions and 176 deletions
--- a/README.md
+++ b/README.md
@@ -2,30 +2,35 @@

 ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)

-**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
+**Unified management and routing for llama.cpp and MLX models with web dashboard.**

-## Why llamactl?
+## Features

-🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)  
-🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name  
-🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
-🔐 **API Key Authentication**: Separate keys for management vs inference access  
-📊 **Instance Monitoring**: Health checks, auto-restart, log management  
-⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
-💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
-💾 **State Persistence**: Ensure instances remain intact across server restarts  
+### 🚀 Easy Model Management
+- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
+- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
+- **State Persistence**: Ensure instances remain intact across server restarts
+
+### 🔗 Universal Compatibility
+- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
+- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
+
+### 🌐 User-Friendly Interface
+- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
+- **API Key Authentication**: Separate keys for management vs inference access
+
+### ⚡ Smart Operations
+- **Instance Monitoring**: Health checks, auto-restart, log management
+- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  

 ![Dashboard Screenshot](docs/images/dashboard.png)

-**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances  
-**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations  
-**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
-
 ## Quick Start

 ```bash
-# 1. Install llama-server (one-time setup)
-# See: https://github.com/ggml-org/llama.cpp#quick-start
+# 1. Install backend (one-time setup)
+# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
+# For MLX on macOS: pip install mlx-lm

 # 2. Download and run llamactl
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -42,15 +47,21 @@ llamactl
 ### Create and manage instances via web dashboard:
 1. Open http://localhost:8080
 2. Click "Create Instance"
-3. Set model path and GPU layers
-4. Start or stop the instance
+3. Choose backend type (llama.cpp or MLX)
+4. Set model path and backend-specific options
+5. Start or stop the instance

 ### Or use the REST API:
 ```bash
-# Create instance
+# Create llama.cpp instance
 curl -X POST localhost:8080/api/v1/instances/my-7b-model \
  -H "Authorization: Bearer your-key" \
-  -d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
+  -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
+
+# Create MLX instance (macOS)
+curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
+  -H "Authorization: Bearer your-key" \
+  -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'

 # Use with OpenAI SDK
 curl -X POST localhost:8080/v1/chat/completions \
@@ -85,16 +96,31 @@ go build -o llamactl ./cmd/server

 ## Prerequisites

+### Backend Dependencies
+
+**For llama.cpp backend:**
 You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:

 ```bash
-# Quick install methods:
 # Homebrew (macOS)
 brew install llama.cpp

 # Or build from source - see llama.cpp docs
 ```

+**For MLX backend (macOS only):**
+You need MLX-LM installed:
+
+```bash
+# Install via pip (requires Python 3.8+)
+pip install mlx-lm
+
+# Or in a virtual environment (recommended)
+python -m venv mlx-env
+source mlx-env/bin/activate
+pip install mlx-lm
+```
+
 ## Configuration

 llamactl works out of the box with sensible defaults.
@@ -106,6 +132,10 @@ server:
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs

+backends:
+  llama_executable: llama-server # Path to llama-server executable
+  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
+
 instances:
  port_range: [8000, 9000]       # Port range for instances
  data_dir: ~/.local/share/llamactl         # Data directory (platform-specific, see below)
@@ -115,7 +145,6 @@ instances:
  max_instances: -1              # Max instances (-1 = unlimited)
  max_running_instances: -1      # Max running instances (-1 = unlimited)
  enable_lru_eviction: true      # Enable LRU eviction for idle instances
-  llama_executable: llama-server # Path to llama-server executable
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
  default_restart_delay: 5       # Restart delay (seconds) for new instances
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -58,7 +58,7 @@ func main() {
 	}

 	// Initialize the instance manager
-	instanceManager := manager.NewInstanceManager(cfg.Instances)
+	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)

 	// Create a new handler with the instance manager
 	handler := server.NewHandler(instanceManager, cfg)
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -19,6 +19,10 @@ server:
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs

+backends:
+  llama_executable: llama-server # Path to llama-server executable
+  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
+
 instances:
  port_range: [8000, 9000]       # Port range for instances
  data_dir: ~/.local/share/llamactl         # Data directory (platform-specific, see below)
@@ -28,7 +32,6 @@ instances:
  max_instances: -1              # Max instances (-1 = unlimited)
  max_running_instances: -1      # Max running instances (-1 = unlimited)
  enable_lru_eviction: true      # Enable LRU eviction for idle instances
-  llama_executable: llama-server # Path to llama-server executable
  default_auto_restart: true     # Auto-restart new instances by default
  default_max_restarts: 3        # Max restarts for new instances
  default_restart_delay: 5       # Restart delay (seconds) for new instances
@@ -79,11 +82,23 @@ server:
  enable_swagger: false   # Enable Swagger UI (default: false)
 ```

-**Environment Variables:**  
- `LLAMACTL_HOST` - Server host  
- `LLAMACTL_PORT` - Server port  
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins  
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)  
+**Environment Variables:**
+- `LLAMACTL_HOST` - Server host
+- `LLAMACTL_PORT` - Server port
+- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
+- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
+
+### Backend Configuration
+
+```yaml
+backends:
+  llama_executable: "llama-server"     # Path to llama-server executable (default: "llama-server")
+  mlx_lm_executable: "mlx_lm.server"   # Path to mlx_lm.server executable (default: "mlx_lm.server")
+```
+
+**Environment Variables:**
+- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
+- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable

 ### Instance Configuration

@@ -97,7 +112,6 @@ instances:
  max_instances: -1                                 # Maximum instances (-1 = unlimited)
  max_running_instances: -1                         # Maximum running instances (-1 = unlimited)
  enable_lru_eviction: true                         # Enable LRU eviction for idle instances
-  llama_executable: "llama-server"                  # Path to llama-server executable
  default_auto_restart: true                        # Default auto-restart setting
  default_max_restarts: 3                           # Default maximum restart attempts
  default_restart_delay: 5                          # Default restart delay in seconds
@@ -113,9 +127,8 @@ instances:
 - `LLAMACTL_LOGS_DIR` - Log directory path  
 - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)  
 - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances  
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances  
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances  
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable  
+- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
+- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
 - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)  
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts  
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds  
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -4,11 +4,14 @@ This guide will walk you through installing Llamactl on your system.

 ## Prerequisites

+### Backend Dependencies
+
+llamactl supports multiple backends. Install at least one:
+
+**For llama.cpp backend (all platforms):**
+
 You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:

-
-**Quick install methods:**
-
 ```bash
 # Homebrew (macOS/Linux)
 brew install llama.cpp
@@ -18,6 +21,22 @@ winget install llama.cpp

 Or build from source - see llama.cpp docs

+**For MLX backend (macOS only):**
+
+MLX provides optimized inference on Apple Silicon. Install MLX-LM:
+
+```bash
+# Install via pip (requires Python 3.8+)
+pip install mlx-lm
+
+# Or in a virtual environment (recommended)
+python -m venv mlx-env
+source mlx-env/bin/activate
+pip install mlx-lm
+```
+
+Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
+
 ## Installation Methods

 ### Option 1: Download Binary (Recommended)
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,22 +1,23 @@
 # Llamactl Documentation

-Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
+Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.**

 ![Dashboard Screenshot](images/dashboard.png)

 ## What is Llamactl?

-Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
+Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support.

 ## Features

-🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)  
-🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name  
-🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
-🔐 **API Key Authentication**: Separate keys for management vs inference access  
-📊 **Instance Monitoring**: Health checks, auto-restart, log management  
-⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
-💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
+🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
+🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
+🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
+🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
+🔐 **API Key Authentication**: Separate keys for management vs inference access
+📊 **Instance Monitoring**: Health checks, auto-restart, log management
+⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
+💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
 💾 **State Persistence**: Ensure instances remain intact across server restarts  

 ## Quick Links
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -1,6 +1,6 @@
 # Managing Instances

-Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
+Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.

 ## Overview

@@ -39,40 +39,55 @@ Each instance is displayed as a card showing:

 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
-3. Configure model source (choose one):
-    - **Model Path**: Full path to your downloaded GGUF model file
-    - **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
-    - **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
-4. Configure optional instance management settings:
+3. **Choose Backend Type**:
+    - **llama.cpp**: For GGUF models using llama-server
+    - **MLX**: For MLX-optimized models (macOS only)
+4. Configure model source:
+    - **For llama.cpp**: GGUF model path or HuggingFace repo
+    - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
+5. Configure optional instance management settings:
    - **Auto Restart**: Automatically restart instance on failure
    - **Max Restarts**: Maximum number of restart attempts
    - **Restart Delay**: Delay in seconds between restart attempts
    - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
    - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
-5. Configure optional llama-server backend options:
-    - **Threads**: Number of CPU threads to use
-    - **Context Size**: Context window size (ctx_size)
-    - **GPU Layers**: Number of layers to offload to GPU
-    - **Port**: Network port (auto-assigned by llamactl if not specified)
-    - **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
-6. Click **"Create"** to save the instance  
+6. Configure backend-specific options:
+    - **llama.cpp**: Threads, context size, GPU layers, port, etc.
+    - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
+7. Click **"Create"** to save the instance  

 ### Via API

 ```bash
-# Create instance with local model file
-curl -X POST http://localhost:8080/api/instances/my-instance \
+# Create llama.cpp instance with local model file
+curl -X POST http://localhost:8080/api/instances/my-llama-instance \
  -H "Content-Type: application/json" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "model": "/path/to/model.gguf",
      "threads": 8,
-      "ctx_size": 4096
+      "ctx_size": 4096,
+      "gpu_layers": 32
    }
  }'

-# Create instance with HuggingFace model
+# Create MLX instance (macOS only)
+curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "mlx_lm",
+    "backend_options": {
+      "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
+      "temp": 0.7,
+      "top_p": 0.9,
+      "max_tokens": 2048
+    },
+    "auto_restart": true,
+    "max_restarts": 3
+  }'
+
+# Create llama.cpp instance with HuggingFace model
 curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
  -H "Content-Type: application/json" \
  -d '{
@@ -81,9 +96,7 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
      "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
      "hf_file": "gemma-3-27b-it-GGUF.gguf",
      "gpu_layers": 32
-    },
-    "auto_restart": true,
-    "max_restarts": 3
+    }
  }'
 ```

@@ -166,14 +179,16 @@ curl -X DELETE http://localhost:8080/api/instances/{name}

 ## Instance Proxy

-Llamactl proxies all requests to the underlying llama-server instances.
+Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).

 ```bash
 # Get instance details
 curl http://localhost:8080/api/instances/{name}/proxy/
 ```

-Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
+Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
+- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
+- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)

 ### Instance Health

--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -4,4 +4,6 @@ type BackendType string

 const (
 	BackendTypeLlamaCpp BackendType = "llama_cpp"
+	BackendTypeMlxLm    BackendType = "mlx_lm"
+	// BackendTypeMlxVlm BackendType = "mlx_vlm"  // Future expansion
 )
--- a/pkg/backends/mlx/mlx.go
+++ b/pkg/backends/mlx/mlx.go
@@ -0,0 +1,205 @@
+package mlx
+
+import (
+	"encoding/json"
+	"reflect"
+	"strconv"
+)
+
+type MlxServerOptions struct {
+	// Basic connection options
+	Model       string `json:"model,omitempty"`
+	Host        string `json:"host,omitempty"`
+	Port        int    `json:"port,omitempty"`
+	
+	// Model and adapter options
+	AdapterPath     string `json:"adapter_path,omitempty"`
+	DraftModel      string `json:"draft_model,omitempty"`
+	NumDraftTokens  int    `json:"num_draft_tokens,omitempty"`
+	TrustRemoteCode bool   `json:"trust_remote_code,omitempty"`
+	
+	// Logging and templates
+	LogLevel                 string `json:"log_level,omitempty"`
+	ChatTemplate             string `json:"chat_template,omitempty"`
+	UseDefaultChatTemplate   bool   `json:"use_default_chat_template,omitempty"`
+	ChatTemplateArgs         string `json:"chat_template_args,omitempty"` // JSON string
+	
+	// Sampling defaults
+	Temp     float64 `json:"temp,omitempty"`      // Note: MLX uses "temp" not "temperature"
+	TopP     float64 `json:"top_p,omitempty"`
+	TopK     int     `json:"top_k,omitempty"`
+	MinP     float64 `json:"min_p,omitempty"`
+	MaxTokens int    `json:"max_tokens,omitempty"`
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
+func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
+	// First unmarshal into a map to handle multiple field names
+	var raw map[string]any
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return err
+	}
+
+	// Create a temporary struct for standard unmarshaling
+	type tempOptions MlxServerOptions
+	temp := tempOptions{}
+
+	// Standard unmarshal first
+	if err := json.Unmarshal(data, &temp); err != nil {
+		return err
+	}
+
+	// Copy to our struct
+	*o = MlxServerOptions(temp)
+
+	// Handle alternative field names
+	fieldMappings := map[string]string{
+		// Basic connection options
+		"m":            "model",
+		"host":         "host",
+		"port":         "port",
+//     "python_path":  "python_path", // removed
+		
+		// Model and adapter options
+		"adapter-path":      "adapter_path",
+		"draft-model":       "draft_model",
+		"num-draft-tokens":  "num_draft_tokens",
+		"trust-remote-code": "trust_remote_code",
+		
+		// Logging and templates
+		"log-level":                   "log_level",
+		"chat-template":               "chat_template",
+		"use-default-chat-template":   "use_default_chat_template",
+		"chat-template-args":          "chat_template_args",
+		
+		// Sampling defaults
+		"temperature": "temp",        // Support both temp and temperature
+		"top-p":       "top_p",
+		"top-k":       "top_k",
+		"min-p":       "min_p",
+		"max-tokens":  "max_tokens",
+	}
+
+	// Process alternative field names
+	for altName, canonicalName := range fieldMappings {
+		if value, exists := raw[altName]; exists {
+			// Use reflection to set the field value
+			v := reflect.ValueOf(o).Elem()
+			field := v.FieldByNameFunc(func(fieldName string) bool {
+				field, _ := v.Type().FieldByName(fieldName)
+				jsonTag := field.Tag.Get("json")
+				return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
+			})
+
+			if field.IsValid() && field.CanSet() {
+				switch field.Kind() {
+				case reflect.Int:
+					if intVal, ok := value.(float64); ok {
+						field.SetInt(int64(intVal))
+					} else if strVal, ok := value.(string); ok {
+						if intVal, err := strconv.Atoi(strVal); err == nil {
+							field.SetInt(int64(intVal))
+						}
+					}
+				case reflect.Float64:
+					if floatVal, ok := value.(float64); ok {
+						field.SetFloat(floatVal)
+					} else if strVal, ok := value.(string); ok {
+						if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
+							field.SetFloat(floatVal)
+						}
+					}
+				case reflect.String:
+					if strVal, ok := value.(string); ok {
+						field.SetString(strVal)
+					}
+				case reflect.Bool:
+					if boolVal, ok := value.(bool); ok {
+						field.SetBool(boolVal)
+					}
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+// NewMlxServerOptions creates MlxServerOptions with MLX defaults
+func NewMlxServerOptions() *MlxServerOptions {
+	return &MlxServerOptions{
+		Host:           "127.0.0.1",  // MLX default (different from llama-server)
+		Port:           8080,         // MLX default
+		NumDraftTokens: 3,            // MLX default for speculative decoding
+		LogLevel:       "INFO",       // MLX default
+		Temp:           0.0,          // MLX default
+		TopP:           1.0,          // MLX default  
+		TopK:           0,            // MLX default (disabled)
+		MinP:           0.0,          // MLX default (disabled)
+		MaxTokens:      512,          // MLX default
+		ChatTemplateArgs: "{}",       // MLX default (empty JSON object)
+	}
+}
+
+// BuildCommandArgs converts to command line arguments
+func (o *MlxServerOptions) BuildCommandArgs() []string {
+	var args []string
+		
+	// Required and basic options
+	if o.Model != "" {
+		args = append(args, "--model", o.Model)
+	}
+	if o.Host != "" {
+		args = append(args, "--host", o.Host)
+	}
+	if o.Port != 0 {
+		args = append(args, "--port", strconv.Itoa(o.Port))
+	}
+	
+	// Model and adapter options
+	if o.AdapterPath != "" {
+		args = append(args, "--adapter-path", o.AdapterPath)
+	}
+	if o.DraftModel != "" {
+		args = append(args, "--draft-model", o.DraftModel)
+	}
+	if o.NumDraftTokens != 0 {
+		args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
+	}
+	if o.TrustRemoteCode {
+		args = append(args, "--trust-remote-code")
+	}
+	
+	// Logging and templates
+	if o.LogLevel != "" {
+		args = append(args, "--log-level", o.LogLevel)
+	}
+	if o.ChatTemplate != "" {
+		args = append(args, "--chat-template", o.ChatTemplate)
+	}
+	if o.UseDefaultChatTemplate {
+		args = append(args, "--use-default-chat-template")
+	}
+	if o.ChatTemplateArgs != "" {
+		args = append(args, "--chat-template-args", o.ChatTemplateArgs)
+	}
+	
+	// Sampling defaults
+	if o.Temp != 0 {
+		args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
+	}
+	if o.TopP != 0 {
+		args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
+	}
+	if o.TopK != 0 {
+		args = append(args, "--top-k", strconv.Itoa(o.TopK))
+	}
+	if o.MinP != 0 {
+		args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
+	}
+	if o.MaxTokens != 0 {
+		args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
+	}
+	
+	return args
+}
--- a/pkg/backends/mlx/parser.go
+++ b/pkg/backends/mlx/parser.go
@@ -0,0 +1,254 @@
+package mlx
+
+import (
+	"encoding/json"
+	"fmt"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
+// Supports multiple formats:
+// 1. Full command: "mlx_lm.server --model model/path"
+// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
+// 3. Args only: "--model model/path --host 0.0.0.0"
+// 4. Multiline commands with backslashes
+func ParseMlxCommand(command string) (*MlxServerOptions, error) {
+	// 1. Normalize the command - handle multiline with backslashes
+	trimmed := normalizeMultilineCommand(command)
+	if trimmed == "" {
+		return nil, fmt.Errorf("command cannot be empty")
+	}
+
+	// 2. Extract arguments from command
+	args, err := extractArgumentsFromCommand(trimmed)
+	if err != nil {
+		return nil, err
+	}
+
+	// 3. Parse arguments into map
+	options := make(map[string]any)
+
+	i := 0
+	for i < len(args) {
+		arg := args[i]
+
+		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
+			i++
+			continue
+		}
+
+		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
+		if strings.HasPrefix(arg, "---") {
+			return nil, fmt.Errorf("malformed flag: %s", arg)
+		}
+
+		// Unified parsing for --flag=value vs --flag value
+		var rawFlag, rawValue string
+		hasEquals := false
+		if strings.Contains(arg, "=") {
+			parts := strings.SplitN(arg, "=", 2)
+			rawFlag = parts[0]
+			rawValue = parts[1] // may be empty string
+			hasEquals = true
+		} else {
+			rawFlag = arg
+		}
+
+		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
+		flagName := strings.ReplaceAll(flagCore, "-", "_")
+
+		// Detect value if not in equals form
+		valueProvided := hasEquals
+		if !hasEquals {
+			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
+				rawValue = args[i+1]
+				valueProvided = true
+			}
+		}
+
+		if valueProvided {
+			// MLX-specific validation for certain flags
+			if flagName == "log_level" && !isValidLogLevel(rawValue) {
+				return nil, fmt.Errorf("invalid log level: %s", rawValue)
+			}
+			
+			options[flagName] = parseValue(rawValue)
+			
+			// Advance index: if we consumed a following token as value (non equals form), skip it
+			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
+				i += 2
+			} else {
+				i++
+			}
+			continue
+		}
+
+		// Boolean flag (no value) - MLX specific boolean flags
+		if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
+			options[flagName] = true
+		} else {
+			options[flagName] = true
+		}
+		i++
+	}
+
+	// 4. Convert to MlxServerOptions using existing UnmarshalJSON
+	jsonData, err := json.Marshal(options)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
+	}
+
+	var mlxOptions MlxServerOptions
+	if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
+		return nil, fmt.Errorf("failed to parse command options: %w", err)
+	}
+
+	// 5. Return MlxServerOptions
+	return &mlxOptions, nil
+}
+
+// isValidLogLevel validates MLX log levels
+func isValidLogLevel(level string) bool {
+	validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
+	for _, valid := range validLevels {
+		if level == valid {
+			return true
+		}
+	}
+	return false
+}
+
+// parseValue attempts to parse a string value into the most appropriate type
+func parseValue(value string) any {
+	// Surrounding matching quotes (single or double)
+	if l := len(value); l >= 2 {
+		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
+			value = value[1 : l-1]
+		}
+	}
+
+	lower := strings.ToLower(value)
+	if lower == "true" {
+		return true
+	}
+	if lower == "false" {
+		return false
+	}
+
+	if intVal, err := strconv.Atoi(value); err == nil {
+		return intVal
+	}
+	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
+		return floatVal
+	}
+	return value
+}
+
+// normalizeMultilineCommand handles multiline commands with backslashes
+func normalizeMultilineCommand(command string) string {
+	// Handle escaped newlines (backslash followed by newline)
+	re := regexp.MustCompile(`\\\s*\n\s*`)
+	normalized := re.ReplaceAllString(command, " ")
+
+	// Clean up extra whitespace
+	re = regexp.MustCompile(`\s+`)
+	normalized = re.ReplaceAllString(normalized, " ")
+
+	return strings.TrimSpace(normalized)
+}
+
+// extractArgumentsFromCommand extracts arguments from various command formats
+func extractArgumentsFromCommand(command string) ([]string, error) {
+	// Split command into tokens respecting quotes
+	tokens, err := splitCommandTokens(command)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(tokens) == 0 {
+		return nil, fmt.Errorf("no command tokens found")
+	}
+
+	// Check if first token looks like an executable
+	firstToken := tokens[0]
+
+	// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
+	if strings.Contains(firstToken, string(filepath.Separator)) ||
+		strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
+		return tokens[1:], nil // Return everything except the executable
+	}
+
+	// Case 2: Just "mlx_lm.server" command
+	if strings.ToLower(firstToken) == "mlx_lm.server" {
+		return tokens[1:], nil // Return everything except the command
+	}
+
+	// Case 3: Arguments only (starts with a flag)
+	if strings.HasPrefix(firstToken, "-") {
+		return tokens, nil // Return all tokens as arguments
+	}
+
+	// Case 4: Unknown format - might be a different executable name
+	// Be permissive and assume it's the executable
+	return tokens[1:], nil
+}
+
+// splitCommandTokens splits a command string into tokens, respecting quotes
+func splitCommandTokens(command string) ([]string, error) {
+	var tokens []string
+	var current strings.Builder
+	inQuotes := false
+	quoteChar := byte(0)
+	escaped := false
+
+	for i := 0; i < len(command); i++ {
+		c := command[i]
+
+		if escaped {
+			current.WriteByte(c)
+			escaped = false
+			continue
+		}
+
+		if c == '\\' {
+			escaped = true
+			current.WriteByte(c)
+			continue
+		}
+
+		if !inQuotes && (c == '"' || c == '\'') {
+			inQuotes = true
+			quoteChar = c
+			current.WriteByte(c)
+		} else if inQuotes && c == quoteChar {
+			inQuotes = false
+			quoteChar = 0
+			current.WriteByte(c)
+		} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
+			if current.Len() > 0 {
+				tokens = append(tokens, current.String())
+				current.Reset()
+			}
+		} else {
+			current.WriteByte(c)
+		}
+	}
+
+	if inQuotes {
+		return nil, fmt.Errorf("unclosed quote in command")
+	}
+
+	if current.Len() > 0 {
+		tokens = append(tokens, current.String())
+	}
+
+	return tokens, nil
+}
+
+// isFlag checks if a string looks like a command line flag
+func isFlag(s string) bool {
+	return strings.HasPrefix(s, "-")
+}
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -10,9 +10,19 @@ import (
 	"gopkg.in/yaml.v3"
 )

+// BackendConfig contains backend executable configurations
+type BackendConfig struct {
+	// Path to llama-server executable (llama.cpp backend)
+	LlamaExecutable string `yaml:"llama_executable"`
+
+	// Path to mlx_lm executable (MLX-LM backend)
+	MLXLMExecutable string `yaml:"mlx_lm_executable"`
+}
+
 // AppConfig represents the configuration for llamactl
 type AppConfig struct {
 	Server     ServerConfig    `yaml:"server"`
+	Backends   BackendConfig   `yaml:"backends"`
 	Instances  InstancesConfig `yaml:"instances"`
 	Auth       AuthConfig      `yaml:"auth"`
 	Version    string          `yaml:"-"`
@@ -61,9 +71,6 @@ type InstancesConfig struct {
 	// Enable LRU eviction for instance logs
 	EnableLRUEviction bool `yaml:"enable_lru_eviction"`

-	// Path to llama-server executable
-	LlamaExecutable string `yaml:"llama_executable"`
-
 	// Default auto-restart setting for new instances
 	DefaultAutoRestart bool `yaml:"default_auto_restart"`

@@ -112,6 +119,10 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			AllowedOrigins: []string{"*"}, // Default to allow all origins
 			EnableSwagger:  false,
 		},
+		Backends: BackendConfig{
+			LlamaExecutable: "llama-server",
+			MLXLMExecutable: "mlx_lm.server",
+		},
 		Instances: InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
 			DataDir:              getDefaultDataDirectory(),
@@ -121,7 +132,6 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			MaxInstances:         -1, // -1 means unlimited
 			MaxRunningInstances:  -1, // -1 means unlimited
 			EnableLRUEviction:    true,
-			LlamaExecutable:      "llama-server",
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
@@ -229,8 +239,12 @@ func loadEnvVars(cfg *AppConfig) {
 			cfg.Instances.EnableLRUEviction = b
 		}
 	}
+	// Backend config
 	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
-		cfg.Instances.LlamaExecutable = llamaExec
+		cfg.Backends.LlamaExecutable = llamaExec
+	}
+	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
+		cfg.Backends.MLXLMExecutable = mlxLMExec
 	}
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -42,9 +42,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
 	if cfg.Instances.MaxInstances != -1 {
 		t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Instances.LlamaExecutable != "llama-server" {
-		t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable)
-	}
 	if !cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected default auto restart to be true")
 	}
@@ -101,9 +98,6 @@ instances:
 	if cfg.Instances.MaxInstances != 5 {
 		t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" {
-		t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable)
-	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
 	}
@@ -156,8 +150,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 	if cfg.Instances.MaxInstances != 20 {
 		t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Instances.LlamaExecutable != "/env/llama-server" {
-		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable)
+	if cfg.Backends.LlamaExecutable != "/env/llama-server" {
+		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
 	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -31,9 +31,10 @@ func (realTimeProvider) Now() time.Time {

 // Process represents a running instance of the llama server
 type Process struct {
-	Name           string                 `json:"name"`
-	options        *CreateInstanceOptions `json:"-"`
-	globalSettings *config.InstancesConfig
+	Name                   string                 `json:"name"`
+	options                *CreateInstanceOptions `json:"-"`
+	globalInstanceSettings *config.InstancesConfig
+	globalBackendSettings  *config.BackendConfig

 	// Status
 	Status         InstanceStatus `json:"status"`
@@ -65,22 +66,23 @@ type Process struct {
 }

 // NewInstance creates a new instance with the given name, log path, and options
-func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
+func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
 	// Validate and copy options
-	options.ValidateAndApplyDefaults(name, globalSettings)
+	options.ValidateAndApplyDefaults(name, globalInstanceSettings)

 	// Create the instance logger
-	logger := NewInstanceLogger(name, globalSettings.LogsDir)
+	logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)

 	return &Process{
-		Name:           name,
-		options:        options,
-		globalSettings: globalSettings,
-		logger:         logger,
-		timeProvider:   realTimeProvider{},
-		Created:        time.Now().Unix(),
-		Status:         Stopped,
-		onStatusChange: onStatusChange,
+		Name:                   name,
+		options:                options,
+		globalInstanceSettings: globalInstanceSettings,
+		globalBackendSettings:  globalBackendSettings,
+		logger:                 logger,
+		timeProvider:           realTimeProvider{},
+		Created:                time.Now().Unix(),
+		Status:                 Stopped,
+		onStatusChange:         onStatusChange,
 	}
 }

@@ -96,7 +98,13 @@ func (i *Process) GetPort() int {
 	if i.options != nil {
 		switch i.options.BackendType {
 		case backends.BackendTypeLlamaCpp:
-			return i.options.LlamaServerOptions.Port
+			if i.options.LlamaServerOptions != nil {
+				return i.options.LlamaServerOptions.Port
+			}
+		case backends.BackendTypeMlxLm:
+			if i.options.MlxServerOptions != nil {
+				return i.options.MlxServerOptions.Port
+			}
 		}
 	}
 	return 0
@@ -108,7 +116,13 @@ func (i *Process) GetHost() string {
 	if i.options != nil {
 		switch i.options.BackendType {
 		case backends.BackendTypeLlamaCpp:
-			return i.options.LlamaServerOptions.Host
+			if i.options.LlamaServerOptions != nil {
+				return i.options.LlamaServerOptions.Host
+			}
+		case backends.BackendTypeMlxLm:
+			if i.options.MlxServerOptions != nil {
+				return i.options.MlxServerOptions.Host
+			}
 		}
 	}
 	return ""
@@ -124,7 +138,7 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
 	}

 	// Validate and copy options
-	options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
+	options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)

 	i.options = options
 	// Clear the proxy so it gets recreated with new options
@@ -153,8 +167,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 	var port int
 	switch i.options.BackendType {
 	case backends.BackendTypeLlamaCpp:
-		host = i.options.LlamaServerOptions.Host
-		port = i.options.LlamaServerOptions.Port
+		if i.options.LlamaServerOptions != nil {
+			host = i.options.LlamaServerOptions.Host
+			port = i.options.LlamaServerOptions.Port
+		}
+	case backends.BackendTypeMlxLm:
+		if i.options.MlxServerOptions != nil {
+			host = i.options.MlxServerOptions.Host
+			port = i.options.MlxServerOptions.Port
+		}
 	}

 	targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
@@ -215,7 +236,7 @@ func (i *Process) UnmarshalJSON(data []byte) error {

 	// Handle options with validation and defaults
 	if aux.Options != nil {
-		aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
+		aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
 		i.options = aux.Options
 	}

--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -11,6 +11,11 @@ import (
 )

 func TestNewInstance(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -29,7 +34,7 @@ func TestNewInstance(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)

 	if inst.Name != "test-instance" {
 		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -60,6 +65,11 @@ func TestNewInstance(t *testing.T) {
 }

 func TestNewInstance_WithRestartOptions(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -85,7 +95,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	opts := instance.GetOptions()

 	// Check that explicit values override defaults
@@ -101,6 +111,11 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
 }

 func TestSetOptions(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -119,7 +134,7 @@ func TestSetOptions(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)

 	// Update options
 	newOptions := &instance.CreateInstanceOptions{
@@ -147,6 +162,11 @@ func TestSetOptions(t *testing.T) {
 }

 func TestGetProxy(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -162,7 +182,7 @@ func TestGetProxy(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)

 	// Get proxy for the first time
 	proxy1, err := inst.GetProxy()
@@ -184,6 +204,11 @@ func TestGetProxy(t *testing.T) {
 }

 func TestMarshalJSON(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
@@ -202,7 +227,7 @@ func TestMarshalJSON(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)

 	data, err := json.Marshal(instance)
 	if err != nil {
@@ -338,6 +363,11 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 		},
 	}

+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -356,7 +386,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 			// Mock onStatusChange function
 			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-			instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange)
+			instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
 			opts := instance.GetOptions()

 			if opts.MaxRestarts == nil {
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -9,6 +9,8 @@ import (
 	"runtime"
 	"syscall"
 	"time"
+
+	"llamactl/pkg/backends"
 )

 // Start starts the llama server instance and returns an error if it fails.
@@ -41,7 +43,20 @@ func (i *Process) Start() error {

 	args := i.options.BuildCommandArgs()
 	i.ctx, i.cancel = context.WithCancel(context.Background())
-	i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
+
+	var executable string
+
+	// Get executable from global configuration
+	switch i.options.BackendType {
+	case backends.BackendTypeLlamaCpp:
+		executable = i.globalBackendSettings.LlamaExecutable
+	case backends.BackendTypeMlxLm:
+		executable = i.globalBackendSettings.MLXLMExecutable
+	default:
+		return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
+	}
+
+	i.cmd = exec.CommandContext(i.ctx, executable, args...)

 	if runtime.GOOS != "windows" {
 		setProcAttrs(i.cmd)
@@ -175,9 +190,16 @@ func (i *Process) WaitForHealthy(timeout int) error {
 	var host string
 	var port int
 	switch opts.BackendType {
-	case "llama-cpp":
-		host = opts.LlamaServerOptions.Host
-		port = opts.LlamaServerOptions.Port
+	case backends.BackendTypeLlamaCpp:
+		if opts.LlamaServerOptions != nil {
+			host = opts.LlamaServerOptions.Host
+			port = opts.LlamaServerOptions.Port
+		}
+	case backends.BackendTypeMlxLm:
+		if opts.MlxServerOptions != nil {
+			host = opts.MlxServerOptions.Host
+			port = opts.MlxServerOptions.Port
+		}
 	}
 	if host == "" {
 		host = "localhost"
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/config"
 	"log"
 )
@@ -22,8 +23,9 @@ type CreateInstanceOptions struct {
 	BackendType    backends.BackendType `json:"backend_type"`
 	BackendOptions map[string]any       `json:"backend_options,omitempty"`

-	// LlamaServerOptions contains the options for the llama server
+	// Backend-specific options
 	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
+	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
 }

 // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -55,6 +57,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 				return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
 			}
 		}
+	case backends.BackendTypeMlxLm:
+		if c.BackendOptions != nil {
+			optionsData, err := json.Marshal(c.BackendOptions)
+			if err != nil {
+				return fmt.Errorf("failed to marshal backend options: %w", err)
+			}
+			
+			c.MlxServerOptions = &mlx.MlxServerOptions{}
+			if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
+				return fmt.Errorf("failed to unmarshal MLX options: %w", err)
+			}
+		}
 	default:
 		return fmt.Errorf("unknown backend type: %s", c.BackendType)
 	}
@@ -72,19 +86,36 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
 		Alias: (*Alias)(c),
 	}

-	// Convert LlamaServerOptions back to BackendOptions map for JSON
-	if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil {
-		data, err := json.Marshal(c.LlamaServerOptions)
-		if err != nil {
-			return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
-		}
+	// Convert backend-specific options back to BackendOptions map for JSON
+	switch c.BackendType {
+	case backends.BackendTypeLlamaCpp:
+		if c.LlamaServerOptions != nil {
+			data, err := json.Marshal(c.LlamaServerOptions)
+			if err != nil {
+				return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
+			}

-		var backendOpts map[string]any
-		if err := json.Unmarshal(data, &backendOpts); err != nil {
-			return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
-		}
+			var backendOpts map[string]any
+			if err := json.Unmarshal(data, &backendOpts); err != nil {
+				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
+			}

-		aux.BackendOptions = backendOpts
+			aux.BackendOptions = backendOpts
+		}
+	case backends.BackendTypeMlxLm:
+		if c.MlxServerOptions != nil {
+			data, err := json.Marshal(c.MlxServerOptions)
+			if err != nil {
+				return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
+			}
+
+			var backendOpts map[string]any
+			if err := json.Unmarshal(data, &backendOpts); err != nil {
+				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
+			}
+
+			aux.BackendOptions = backendOpts
+		}
 	}

 	return json.Marshal(aux)
@@ -136,6 +167,10 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
 		if c.LlamaServerOptions != nil {
 			return c.LlamaServerOptions.BuildCommandArgs()
 		}
+	case backends.BackendTypeMlxLm:
+		if c.MlxServerOptions != nil {
+			return c.MlxServerOptions.BuildCommandArgs()
+		}
 	}
 	return []string{}
 }
--- a/pkg/instance/timeout_test.go
+++ b/pkg/instance/timeout_test.go
@@ -33,6 +33,11 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
 // Timeout-related tests

 func TestUpdateLastRequestTime(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -47,13 +52,18 @@ func TestUpdateLastRequestTime(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)

 	// Test that UpdateLastRequestTime doesn't panic
 	inst.UpdateLastRequestTime()
 }

 func TestShouldTimeout_NotRunning(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -70,7 +80,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)

 	// Instance is not running, should not timeout regardless of configuration
 	if inst.ShouldTimeout() {
@@ -79,6 +89,11 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
 }

 func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -105,7 +120,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 				},
 			}

-			inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+			inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 			// Simulate running state
 			inst.SetStatus(instance.Running)

@@ -117,6 +132,11 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 }

 func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -133,7 +153,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	inst.SetStatus(instance.Running)

 	// Update last request time to now
@@ -146,6 +166,11 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 }

 func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -162,7 +187,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-	inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	inst.SetStatus(instance.Running)

 	// Use MockTimeProvider to simulate old last request time
@@ -181,6 +206,11 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 }

 func TestTimeoutConfiguration_Validation(t *testing.T) {
+	backendConfig := &config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
@@ -209,7 +239,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
 			// Mock onStatusChange function
 			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}

-			inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
+			inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 			opts := inst.GetOptions()

 			if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -35,6 +35,7 @@ type instanceManager struct {
 	runningInstances map[string]struct{}
 	ports            map[int]bool
 	instancesConfig  config.InstancesConfig
+	backendsConfig   config.BackendConfig

 	// Timeout checker
 	timeoutChecker *time.Ticker
@@ -44,7 +45,7 @@ type instanceManager struct {
 }

 // NewInstanceManager creates a new instance of InstanceManager.
-func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
+func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
 	if instancesConfig.TimeoutCheckInterval <= 0 {
 		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 	}
@@ -53,6 +54,7 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager
 		runningInstances: make(map[string]struct{}),
 		ports:            make(map[int]bool),
 		instancesConfig:  instancesConfig,
+		backendsConfig:   backendsConfig,

 		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
 		shutdownChan:   make(chan struct{}),
@@ -241,7 +243,7 @@ func (im *instanceManager) loadInstance(name, path string) error {
 	}

 	// Create new inst using NewInstance (handles validation, defaults, setup)
-	inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
+	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)

 	// Restore persisted fields that NewInstance doesn't set
 	inst.Created = persistedInstance.Created
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -15,18 +15,22 @@ import (
 )

 func TestNewInstanceManager(t *testing.T) {
+	backendConfig := config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		LogsDir:              "/tmp/test",
 		MaxInstances:         5,
-		LlamaExecutable:      "llama-server",
 		DefaultAutoRestart:   true,
 		DefaultMaxRestarts:   3,
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}

-	mgr := manager.NewInstanceManager(cfg)
+	mgr := manager.NewInstanceManager(backendConfig, cfg)
 	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}
@@ -44,6 +48,11 @@ func TestNewInstanceManager(t *testing.T) {
 func TestPersistence(t *testing.T) {
 	tempDir := t.TempDir()

+	backendConfig := config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		InstancesDir:         tempDir,
@@ -52,7 +61,7 @@ func TestPersistence(t *testing.T) {
 	}

 	// Test instance persistence on creation
-	manager1 := manager.NewInstanceManager(cfg)
+	manager1 := manager.NewInstanceManager(backendConfig, cfg)
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -73,7 +82,7 @@ func TestPersistence(t *testing.T) {
 	}

 	// Test loading instances from disk
-	manager2 := manager.NewInstanceManager(cfg)
+	manager2 := manager.NewInstanceManager(backendConfig, cfg)
 	instances, err := manager2.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
@@ -172,15 +181,19 @@ func TestShutdown(t *testing.T) {

 // Helper function to create a test manager with standard config
 func createTestManager() manager.InstanceManager {
+	backendConfig := config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
+
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		LogsDir:              "/tmp/test",
 		MaxInstances:         10,
-		LlamaExecutable:      "llama-server",
 		DefaultAutoRestart:   true,
 		DefaultMaxRestarts:   3,
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
-	return manager.NewInstanceManager(cfg)
+	return manager.NewInstanceManager(backendConfig, cfg)
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -62,7 +62,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 		im.onStatusChange(name, oldStatus, newStatus)
 	}

-	inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback)
+	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
 	im.instances[inst.Name] = inst

 	if err := im.persistInstance(inst); err != nil {
@@ -260,6 +260,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
 		if options.LlamaServerOptions != nil {
 			return options.LlamaServerOptions.Port
 		}
+	case backends.BackendTypeMlxLm:
+		if options.MlxServerOptions != nil {
+			return options.MlxServerOptions.Port
+		}
 	}
 	return 0
 }
@@ -271,6 +275,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
 		if options.LlamaServerOptions != nil {
 			options.LlamaServerOptions.Port = port
 		}
+	case backends.BackendTypeMlxLm:
+		if options.MlxServerOptions != nil {
+			options.MlxServerOptions.Port = port
+		}
 	}
 }

--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -62,12 +62,16 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	}

 	// Test max instances limit
+	backendConfig := config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
-	limitedManager := manager.NewInstanceManager(cfg)
+	limitedManager := manager.NewInstanceManager(backendConfig, cfg)

 	_, err = limitedManager.CreateInstance("instance1", options)
 	if err != nil {
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -13,13 +13,17 @@ import (

 func TestTimeoutFunctionality(t *testing.T) {
 	// Test timeout checker initialization
+	backendConfig := config.BackendConfig{
+		LlamaExecutable: "llama-server",
+		MLXLMExecutable: "mlx_lm.server",
+	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		TimeoutCheckInterval: 10,
 		MaxInstances:         5,
 	}

-	manager := manager.NewInstanceManager(cfg)
+	manager := manager.NewInstanceManager(backendConfig, cfg)
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -7,6 +7,7 @@ import (
 	"io"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
@@ -684,3 +685,57 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
 		}
 	}
 }
+
+// ParseMlxCommand godoc
+// @Summary Parse mlx_lm.server command
+// @Description Parses MLX-LM server command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Router /backends/mlx/parse-command [post]
+func (h *Handler) ParseMlxCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+		
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+		
+		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+		
+		// Currently only support mlx_lm backend type
+		backendType := backends.BackendTypeMlxLm
+		
+		options := &instance.CreateInstanceOptions{
+			BackendType:      backendType,
+			MlxServerOptions: mlxOptions,
+		}
+		
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -55,6 +55,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			r.Route("/llama-cpp", func(r chi.Router) {
 				r.Post("/parse-command", handler.ParseLlamaCommand())
 			})
+			r.Route("/mlx", func(r chi.Router) {
+				r.Post("/parse-command", handler.ParseMlxCommand())
+			})
 		})

 		// Instance management endpoints
--- a/pkg/validation/validation.go
+++ b/pkg/validation/validation.go
@@ -44,6 +44,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
 	switch options.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		return validateLlamaCppOptions(options)
+	case backends.BackendTypeMlxLm:
+		return validateMlxOptions(options)
 	default:
 		return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
 	}
@@ -68,6 +70,24 @@ func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
 	return nil
 }

+// validateMlxOptions validates MLX backend specific options
+func validateMlxOptions(options *instance.CreateInstanceOptions) error {
+	if options.MlxServerOptions == nil {
+		return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
+	}
+
+	if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
+		return err
+	}
+
+	// Basic network validation for port
+	if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
+		return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
+	}
+
+	return nil
+}
+
 // validateStructStrings recursively validates all string fields in a struct
 func validateStructStrings(v any, fieldPath string) error {
 	val := reflect.ValueOf(v)
--- a/webui/src/components/BackendFormField.tsx
+++ b/webui/src/components/BackendFormField.tsx
@@ -2,11 +2,10 @@ import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 import { Checkbox } from '@/components/ui/checkbox'
-import type { BackendOptions } from '@/schemas/instanceOptions'
 import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'

 interface BackendFormFieldProps {
-  fieldKey: keyof BackendOptions
+  fieldKey: string
  value: string | number | boolean | string[] | undefined
  onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
 }
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -41,8 +41,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  // Get field lists dynamically from the type
  const basicFields = getBasicFields();
  const advancedFields = getAdvancedFields();
-  const basicBackendFields = getBasicBackendFields();
-  const advancedBackendFields = getAdvancedBackendFields();
+  const basicBackendFields = getBasicBackendFields(formData.backend_type);
+  const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);

  // Reset form when dialog opens/closes or when instance changes
  useEffect(() => {
@@ -66,10 +66,21 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  }, [open, instance]);

  const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
-    setFormData((prev) => ({
-      ...prev,
-      [key]: value,
-    }));
+    setFormData((prev) => {
+      // If backend_type is changing, clear backend_options
+      if (key === 'backend_type' && prev.backend_type !== value) {
+        return {
+          ...prev,
+          [key]: value,
+          backend_options: {}, // Clear backend options when backend type changes
+        };
+      }
+      
+      return {
+        ...prev,
+        [key]: value,
+      };
+    });
  };

  const handleBackendFieldChange = (key: string, value: any) => {
@@ -78,7 +89,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
      backend_options: {
        ...prev.backend_options,
        [key]: value,
-      },
+      } as any,
    }));
  };

@@ -260,7 +271,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
                <BackendFormField
                  key={fieldKey}
                  fieldKey={fieldKey}
-                  value={formData.backend_options?.[fieldKey]}
+                  value={(formData.backend_options as any)?.[fieldKey]}
                  onChange={handleBackendFieldChange}
                />
              ))}
@@ -345,7 +356,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
                        <BackendFormField
                          key={fieldKey}
                          fieldKey={fieldKey}
-                          value={formData.backend_options?.[fieldKey]}
+                          value={(formData.backend_options as any)?.[fieldKey]}
                          onChange={handleBackendFieldChange}
                        />
                      ))}
--- a/webui/src/components/ZodFormField.tsx
+++ b/webui/src/components/ZodFormField.tsx
@@ -2,8 +2,7 @@ import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 import { Checkbox } from '@/components/ui/checkbox'
-import type { CreateInstanceOptions } from '@/types/instance'
-import { BackendType } from '@/types/instance'
+import { BackendType, type CreateInstanceOptions } from '@/types/instance'
 import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'

 interface ZodFormFieldProps {
@@ -39,7 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
            className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
          >
            <option value={BackendType.LLAMA_CPP}>Llama Server</option>
-            {/* Add more backend types here as they become available */}
+            <option value={BackendType.MLX_LM}>MLX LM</option>
          </select>
          {config.description && (
            <p className="text-sm text-muted-foreground">{config.description}</p>
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -93,6 +93,14 @@ export const backendsApi = {
        body: JSON.stringify({ command }),
      }),
  },
+  mlx: {
+    // POST /backends/mlx/parse-command
+    parseCommand: (command: string) =>
+      apiCall<CreateInstanceOptions>('/backends/mlx/parse-command', {
+        method: 'POST',
+        body: JSON.stringify({ command }),
+      }),
+  },
 };

 // Instance API functions
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,4 +1,15 @@
-import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions'
+import { 
+  type CreateInstanceOptions, 
+  type LlamaCppBackendOptions, 
+  type MlxBackendOptions,
+  LlamaCppBackendOptionsSchema,
+  MlxBackendOptionsSchema,
+  getAllFieldKeys, 
+  getAllLlamaCppFieldKeys,
+  getAllMlxFieldKeys,
+  getLlamaCppFieldType,
+  getMlxFieldType
+} from '@/schemas/instanceOptions'

 // Instance-level basic fields (not backend-specific)
 export const basicFieldsConfig: Record<string, {
@@ -36,8 +47,8 @@ export const basicFieldsConfig: Record<string, {
  }
 }

-// Backend-specific basic fields (these go in backend_options)
-export const basicBackendFieldsConfig: Record<string, {
+// LlamaCpp backend-specific basic fields
+const basicLlamaCppFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
@@ -46,7 +57,8 @@ export const basicBackendFieldsConfig: Record<string, {
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
-    description: 'Path to the model file'
+    description: 'Path to the model file',
+    required: true
  },
  hf_repo: {
    label: 'Hugging Face Repository',
@@ -65,13 +77,50 @@ export const basicBackendFieldsConfig: Record<string, {
  }
 }

-export function isBasicField(key: keyof CreateInstanceOptions): boolean {
+// MLX backend-specific basic fields
+const basicMlxFieldsConfig: Record<string, {
+  label: string
+  description?: string
+  placeholder?: string
+  required?: boolean
+}> = {
+  model: {
+    label: 'Model',
+    placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
+    description: 'The path to the MLX model weights, tokenizer, and config',
+    required: true
+  },
+  temp: {
+    label: 'Temperature',
+    placeholder: '0.0',
+    description: 'Default sampling temperature (default: 0.0)'
+  },
+  top_p: {
+    label: 'Top-P',
+    placeholder: '1.0',
+    description: 'Default nucleus sampling top-p (default: 1.0)'
+  },
+  top_k: {
+    label: 'Top-K',
+    placeholder: '0',
+    description: 'Default top-k sampling (default: 0, disables top-k)'
+  },
+  min_p: {
+    label: 'Min-P',
+    placeholder: '0.0',
+    description: 'Default min-p sampling (default: 0.0, disables min-p)'
+  },
+  max_tokens: {
+    label: 'Max Tokens',
+    placeholder: '512',
+    description: 'Default maximum number of tokens to generate (default: 512)'
+  }
+}
+
+function isBasicField(key: keyof CreateInstanceOptions): boolean {
  return key in basicFieldsConfig
 }

-export function isBasicBackendField(key: keyof BackendOptions): boolean {
-  return key in basicBackendFieldsConfig
-}

 export function getBasicFields(): (keyof CreateInstanceOptions)[] {
  return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
@@ -81,13 +130,61 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
  return getAllFieldKeys().filter(key => !isBasicField(key))
 }

-export function getBasicBackendFields(): (keyof BackendOptions)[] {
-  return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[]
+
+export function getBasicBackendFields(backendType?: string): string[] {
+  if (backendType === 'mlx_lm') {
+    return Object.keys(basicMlxFieldsConfig)
+  } else if (backendType === 'llama_cpp') {
+    return Object.keys(basicLlamaCppFieldsConfig)
+  }
+  // Default to LlamaCpp for backward compatibility
+  return Object.keys(basicLlamaCppFieldsConfig)
 }

-export function getAdvancedBackendFields(): (keyof BackendOptions)[] {
-  return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key))
+export function getAdvancedBackendFields(backendType?: string): string[] {
+  if (backendType === 'mlx_lm') {
+    return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
+  } else if (backendType === 'llama_cpp') {
+    return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
+  }
+  // Default to LlamaCpp for backward compatibility
+  return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
+}
+
+// Combined backend fields config for use in BackendFormField
+export const basicBackendFieldsConfig: Record<string, {
+  label: string
+  description?: string
+  placeholder?: string
+  required?: boolean
+}> = {
+  ...basicLlamaCppFieldsConfig,
+  ...basicMlxFieldsConfig
+}
+
+// Get field type for any backend option (union type)
+export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' {
+  // Try to get type from LlamaCpp schema first
+  try {
+    if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) {
+      return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions)
+    }
+  } catch {
+    // Schema might not be available
+  }
+  
+  // Try MLX schema
+  try {
+    if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) {
+      return getMlxFieldType(key as keyof MlxBackendOptions)
+    }
+  } catch {
+    // Schema might not be available
+  }
+  
+  // Default fallback
+  return 'text'
 }

 // Re-export the Zod-based functions
-export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'
+export { getFieldType } from '@/schemas/instanceOptions'
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -1,8 +1,8 @@
 import { BackendType } from '@/types/instance'
 import { z } from 'zod'

-// Define the backend options schema (previously embedded in CreateInstanceOptionsSchema)
-export const BackendOptionsSchema = z.object({
+// Define the LlamaCpp backend options schema
+export const LlamaCppBackendOptionsSchema = z.object({
  // Common params
  verbose_prompt: z.boolean().optional(),
  threads: z.number().optional(),
@@ -170,6 +170,39 @@ export const BackendOptionsSchema = z.object({
  fim_qwen_14b_spec: z.boolean().optional(),
 })

+// Define the MLX backend options schema
+export const MlxBackendOptionsSchema = z.object({
+  // Basic connection options
+  model: z.string().optional(),
+  host: z.string().optional(),
+  port: z.number().optional(),
+  
+  // Model and adapter options
+  adapter_path: z.string().optional(),
+  draft_model: z.string().optional(),
+  num_draft_tokens: z.number().optional(),
+  trust_remote_code: z.boolean().optional(),
+  
+  // Logging and templates
+  log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
+  chat_template: z.string().optional(),
+  use_default_chat_template: z.boolean().optional(),
+  chat_template_args: z.string().optional(), // JSON string
+  
+  // Sampling defaults
+  temp: z.number().optional(),     // Note: MLX uses "temp" not "temperature"
+  top_p: z.number().optional(),
+  top_k: z.number().optional(),
+  min_p: z.number().optional(),
+  max_tokens: z.number().optional(),
+})
+
+// Backend options union
+export const BackendOptionsSchema = z.union([
+  LlamaCppBackendOptionsSchema,
+  MlxBackendOptionsSchema,
+])
+
 // Define the main create instance options schema
 export const CreateInstanceOptionsSchema = z.object({
  // Restart options
@@ -180,11 +213,13 @@ export const CreateInstanceOptionsSchema = z.object({
  on_demand_start: z.boolean().optional(),

  // Backend configuration
-  backend_type: z.enum([BackendType.LLAMA_CPP]).optional(),
+  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
  backend_options: BackendOptionsSchema.optional(),
 })

 // Infer the TypeScript types from the schemas
+export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
+export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
 export type BackendOptions = z.infer<typeof BackendOptionsSchema>
 export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>

@@ -193,9 +228,14 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
  return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
 }

-// Helper to get all backend option field keys
-export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
-  return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
+// Helper to get all LlamaCpp backend option field keys
+export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
+  return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
+}
+
+// Helper to get all MLX backend option field keys
+export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
+  return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
 }

 // Get field type from Zod schema
@@ -213,9 +253,9 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
  return 'text' // ZodString and others default to text
 }

-// Get field type for backend options
-export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
-  const fieldSchema = BackendOptionsSchema.shape[key]
+// Get field type for LlamaCpp backend options
+export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
+  const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  
  // Handle ZodOptional wrapper
@@ -225,4 +265,19 @@ export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  return 'text' // ZodString and others default to text
+}
+
+// Get field type for MLX backend options
+export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
+  const fieldSchema = MlxBackendOptionsSchema.shape[key]
+  if (!fieldSchema) return 'text'
+  
+  // Handle ZodOptional wrapper
+  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
+  
+  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
+  if (innerSchema instanceof z.ZodNumber) return 'number'
+  if (innerSchema instanceof z.ZodArray) return 'array'
+  if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
+  return 'text' // ZodString and others default to text
 }
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -3,7 +3,9 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
 export { type CreateInstanceOptions } from '@/schemas/instanceOptions'

 export const BackendType = {
-  LLAMA_CPP: 'llama_cpp'
+  LLAMA_CPP: 'llama_cpp',
+  MLX_LM: 'mlx_lm',
+  // MLX_VLM: 'mlx_vlm',  // Future expansion
 } as const

 export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]