Merge pull request #34 from lordmathis/feat/vllm-backend

feat: Implement vLLM backend
2025-11-06 09:04:27 +00:00 · 2025-09-22 21:58:19 +02:00
parent 02fdae24ee 48b3a39dfe
commit ebc82c37aa
53 changed files with 3078 additions and 2968 deletions
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 ### 🔗 Universal Compatibility
 - **OpenAI API Compatible**: Drop-in replacement - route requests by model name
- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
+- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
 ### 🌐 User-Friendly Interface
 - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -31,6 +31,7 @@
 # 1. Install backend (one-time setup)
 # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
 # For MLX on macOS: pip install mlx-lm
 # For vLLM: pip install vllm
 # 2. Download and run llamactl
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -47,7 +48,7 @@ llamactl
 ### Create and manage instances via web dashboard:
 1. Open http://localhost:8080
 2. Click "Create Instance"
-3. Choose backend type (llama.cpp or MLX)
+3. Choose backend type (llama.cpp, MLX, or vLLM)
 4. Set model path and backend-specific options
 5. Start or stop the instance
@@ -63,6 +64,11 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
 # Create vLLM instance
 curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
 # Use with OpenAI SDK
 curl -X POST localhost:8080/v1/chat/completions \
  -H "Authorization: Bearer your-key" \
@@ -121,6 +127,21 @@ source mlx-env/bin/activate
 pip install mlx-lm
 ```
 **For vLLM backend:**
 You need vLLM installed:
 ```bash
 # Install via pip (requires Python 3.8+, GPU required)
 pip install vllm
 # Or in a virtual environment (recommended)
 python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
 # For production deployments, consider container-based installation
 ```
 ## Configuration
 llamactl works out of the box with sensible defaults.
@@ -135,6 +156,7 @@ server:
 backends:
  llama_executable: llama-server # Path to llama-server executable
  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
  vllm_executable: vllm # Path to vllm executable
 instances:
  port_range: [8000, 9000]       # Port range for instances
--- a/apidocs/docs.go
+++ b/apidocs/docs.go
@@ -19,6 +19,159 @@ const docTemplate = `{
    "host": "{{.Host}}",
    "basePath": "{{.BasePath}}",
    "paths": {
        "/backends/llama-cpp/parse-command": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Parses a llama-server command string into instance options",
                "consumes": [
                    "application/json"
                ],
                "produces": [
                    "application/json"
                ],
                "tags": [
                    "backends"
                ],
                "summary": "Parse llama-server command",
                "parameters": [
                    {
                        "description": "Command to parse",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/server.ParseCommandRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Parsed options",
                        "schema": {
                            "$ref": "#/definitions/instance.CreateInstanceOptions"
                        }
                    },
                    "400": {
                        "description": "Invalid request or command",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
        },
        "/backends/mlx/parse-command": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Parses MLX-LM server command string into instance options",
                "consumes": [
                    "application/json"
                ],
                "produces": [
                    "application/json"
                ],
                "tags": [
                    "backends"
                ],
                "summary": "Parse mlx_lm.server command",
                "parameters": [
                    {
                        "description": "Command to parse",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/server.ParseCommandRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Parsed options",
                        "schema": {
                            "$ref": "#/definitions/instance.CreateInstanceOptions"
                        }
                    },
                    "400": {
                        "description": "Invalid request or command",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
        },
        "/backends/vllm/parse-command": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Parses a vLLM serve command string into instance options",
                "consumes": [
                    "application/json"
                ],
                "produces": [
                    "application/json"
                ],
                "tags": [
                    "backends"
                ],
                "summary": "Parse vllm serve command",
                "parameters": [
                    {
                        "description": "Command to parse",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/server.ParseCommandRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Parsed options",
                        "schema": {
                            "$ref": "#/definitions/instance.CreateInstanceOptions"
                        }
                    },
                    "400": {
                        "description": "Invalid request or command",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
        },
        "/instances": {
            "get": {
                "security": [
@@ -681,522 +834,46 @@ const docTemplate = `{
        }
    },
    "definitions": {
        "backends.BackendType": {
            "type": "string",
            "enum": [
                "llama_cpp",
                "mlx_lm",
                "vllm"
            ],
            "x-enum-varnames": [
                "BackendTypeLlamaCpp",
                "BackendTypeMlxLm",
                "BackendTypeVllm"
            ]
        },
        "instance.CreateInstanceOptions": {
            "type": "object",
            "properties": {
                "alias": {
                    "type": "string"
                },
                "api_key": {
                    "type": "string"
                },
                "api_key_file": {
                    "type": "string"
                },
                "auto_restart": {
                    "description": "Auto restart",
                    "type": "boolean"
                },
-                "batch_size": {
+                "backend_options": {
-                    "type": "integer"
+                    "type": "object",
                    "additionalProperties": {}
                },
-                "cache_reuse": {
+                "backend_type": {
-                    "type": "integer"
+                    "$ref": "#/definitions/backends.BackendType"
                },
                "cache_type_k": {
                    "type": "string"
                },
                "cache_type_k_draft": {
                    "type": "string"
                },
                "cache_type_v": {
                    "type": "string"
                },
                "cache_type_v_draft": {
                    "type": "string"
                },
                "chat_template": {
                    "type": "string"
                },
                "chat_template_file": {
                    "type": "string"
                },
                "chat_template_kwargs": {
                    "type": "string"
                },
                "check_tensors": {
                    "type": "boolean"
                },
                "cont_batching": {
                    "type": "boolean"
                },
                "control_vector": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "control_vector_layer_range": {
                    "type": "string"
                },
                "control_vector_scaled": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "cpu_mask": {
                    "type": "string"
                },
                "cpu_mask_batch": {
                    "type": "string"
                },
                "cpu_range": {
                    "type": "string"
                },
                "cpu_range_batch": {
                    "type": "string"
                },
                "cpu_strict": {
                    "type": "integer"
                },
                "cpu_strict_batch": {
                    "type": "integer"
                },
                "ctx_size": {
                    "type": "integer"
                },
                "ctx_size_draft": {
                    "type": "integer"
                },
                "defrag_thold": {
                    "type": "number"
                },
                "device": {
                    "type": "string"
                },
                "device_draft": {
                    "type": "string"
                },
                "draft_max": {
                    "type": "integer"
                },
                "draft_min": {
                    "type": "integer"
                },
                "draft_p_min": {
                    "type": "number"
                },
                "dry_allowed_length": {
                    "type": "integer"
                },
                "dry_base": {
                    "type": "number"
                },
                "dry_multiplier": {
                    "type": "number"
                },
                "dry_penalty_last_n": {
                    "type": "integer"
                },
                "dry_sequence_breaker": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "dump_kv_cache": {
                    "type": "boolean"
                },
                "dynatemp_exp": {
                    "type": "number"
                },
                "dynatemp_range": {
                    "type": "number"
                },
                "embd_bge_small_en_default": {
                    "description": "Default model params",
                    "type": "boolean"
                },
                "embd_e5_small_en_default": {
                    "type": "boolean"
                },
                "embd_gte_small_default": {
                    "type": "boolean"
                },
                "embedding": {
                    "type": "boolean"
                },
                "escape": {
                    "type": "boolean"
                },
                "fim_qwen_14b_spec": {
                    "type": "boolean"
                },
                "fim_qwen_1_5b_default": {
                    "type": "boolean"
                },
                "fim_qwen_3b_default": {
                    "type": "boolean"
                },
                "fim_qwen_7b_default": {
                    "type": "boolean"
                },
                "fim_qwen_7b_spec": {
                    "type": "boolean"
                },
                "flash_attn": {
                    "type": "boolean"
                },
                "frequency_penalty": {
                    "type": "number"
                },
                "gpu_layers": {
                    "type": "integer"
                },
                "gpu_layers_draft": {
                    "type": "integer"
                },
                "grammar": {
                    "type": "string"
                },
                "grammar_file": {
                    "type": "string"
                },
                "hf_file": {
                    "type": "string"
                },
                "hf_file_v": {
                    "type": "string"
                },
                "hf_repo": {
                    "type": "string"
                },
                "hf_repo_draft": {
                    "type": "string"
                },
                "hf_repo_v": {
                    "type": "string"
                },
                "hf_token": {
                    "type": "string"
                },
                "host": {
                    "type": "string"
                },
                "idle_timeout": {
                    "description": "Idle timeout",
                    "type": "integer"
                },
                "ignore_eos": {
                    "type": "boolean"
                },
                "jinja": {
                    "type": "boolean"
                },
                "json_schema": {
                    "type": "string"
                },
                "json_schema_file": {
                    "type": "string"
                },
                "keep": {
                    "type": "integer"
                },
                "log_colors": {
                    "type": "boolean"
                },
                "log_disable": {
                    "type": "boolean"
                },
                "log_file": {
                    "type": "string"
                },
                "log_prefix": {
                    "type": "boolean"
                },
                "log_timestamps": {
                    "type": "boolean"
                },
                "logit_bias": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "lora": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "lora_init_without_apply": {
                    "type": "boolean"
                },
                "lora_scaled": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "main_gpu": {
                    "type": "integer"
                },
                "max_restarts": {
                    "type": "integer"
                },
                "metrics": {
                    "type": "boolean"
                },
                "min_p": {
                    "type": "number"
                },
                "mirostat": {
                    "type": "integer"
                },
                "mirostat_ent": {
                    "type": "number"
                },
                "mirostat_lr": {
                    "type": "number"
                },
                "mlock": {
                    "type": "boolean"
                },
                "mmproj": {
                    "type": "string"
                },
                "mmproj_url": {
                    "type": "string"
                },
                "model": {
                    "type": "string"
                },
                "model_draft": {
                    "type": "string"
                },
                "model_url": {
                    "type": "string"
                },
                "model_vocoder": {
                    "description": "Audio/TTS params",
                    "type": "string"
                },
                "no_cont_batching": {
                    "type": "boolean"
                },
                "no_context_shift": {
                    "description": "Example-specific params",
                    "type": "boolean"
                },
                "no_escape": {
                    "type": "boolean"
                },
                "no_kv_offload": {
                    "type": "boolean"
                },
                "no_mmap": {
                    "type": "boolean"
                },
                "no_mmproj": {
                    "type": "boolean"
                },
                "no_mmproj_offload": {
                    "type": "boolean"
                },
                "no_perf": {
                    "type": "boolean"
                },
                "no_prefill_assistant": {
                    "type": "boolean"
                },
                "no_slots": {
                    "type": "boolean"
                },
                "no_warmup": {
                    "type": "boolean"
                },
                "no_webui": {
                    "type": "boolean"
                },
                "numa": {
                    "type": "string"
                },
                "on_demand_start": {
                    "description": "On demand start",
                    "type": "boolean"
                },
                "override_kv": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "override_tensor": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "parallel": {
                    "type": "integer"
                },
                "path": {
                    "type": "string"
                },
                "poll": {
                    "type": "integer"
                },
                "poll_batch": {
                    "type": "integer"
                },
                "pooling": {
                    "type": "string"
                },
                "port": {
                    "type": "integer"
                },
                "predict": {
                    "type": "integer"
                },
                "presence_penalty": {
                    "type": "number"
                },
                "prio": {
                    "type": "integer"
                },
                "prio_batch": {
                    "type": "integer"
                },
                "props": {
                    "type": "boolean"
                },
                "reasoning_budget": {
                    "type": "integer"
                },
                "reasoning_format": {
                    "type": "string"
                },
                "repeat_last_n": {
                    "type": "integer"
                },
                "repeat_penalty": {
                    "type": "number"
                },
                "reranking": {
                    "type": "boolean"
                },
                "restart_delay": {
-                    "type": "integer"
+                    "description": "seconds",
                },
                "rope_freq_base": {
                    "type": "number"
                },
                "rope_freq_scale": {
                    "type": "number"
                },
                "rope_scale": {
                    "type": "number"
                },
                "rope_scaling": {
                    "type": "string"
                },
                "samplers": {
                    "description": "Sampling params",
                    "type": "string"
                },
                "sampling_seq": {
                    "type": "string"
                },
                "seed": {
                    "type": "integer"
                },
                "slot_prompt_similarity": {
                    "type": "number"
                },
                "slot_save_path": {
                    "type": "string"
                },
                "slots": {
                    "type": "boolean"
                },
                "special": {
                    "type": "boolean"
                },
                "split_mode": {
                    "type": "string"
                },
                "spm_infill": {
                    "type": "boolean"
                },
                "ssl_cert_file": {
                    "type": "string"
                },
                "ssl_key_file": {
                    "type": "string"
                },
                "temp": {
                    "type": "number"
                },
                "tensor_split": {
                    "type": "string"
                },
                "threads": {
                    "type": "integer"
                },
                "threads_batch": {
                    "type": "integer"
                },
                "threads_http": {
                    "type": "integer"
                },
                "timeout": {
                    "type": "integer"
                },
                "top_k": {
                    "type": "integer"
                },
                "top_p": {
                    "type": "number"
                },
                "tts_use_guide_tokens": {
                    "type": "boolean"
                },
                "typical": {
                    "type": "number"
                },
                "ubatch_size": {
                    "type": "integer"
                },
                "verbose": {
                    "type": "boolean"
                },
                "verbose_prompt": {
                    "description": "Common params",
                    "type": "boolean"
                },
                "verbosity": {
                    "type": "integer"
                },
                "xtc_probability": {
                    "type": "number"
                },
                "xtc_threshold": {
                    "type": "number"
                },
                "yarn_attn_factor": {
                    "type": "number"
                },
                "yarn_beta_fast": {
                    "type": "number"
                },
                "yarn_beta_slow": {
                    "type": "number"
                },
                "yarn_ext_factor": {
                    "type": "number"
                },
                "yarn_orig_ctx": {
                    "type": "integer"
                }
            }
@@ -1264,6 +941,14 @@ const docTemplate = `{
                    "type": "string"
                }
            }
        },
        "server.ParseCommandRequest": {
            "type": "object",
            "properties": {
                "command": {
                    "type": "string"
                }
            }
        }
    }
 }`
--- a/apidocs/swagger.json
+++ b/apidocs/swagger.json
@@ -12,6 +12,159 @@
    },
    "basePath": "/api/v1",
    "paths": {
        "/backends/llama-cpp/parse-command": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Parses a llama-server command string into instance options",
                "consumes": [
                    "application/json"
                ],
                "produces": [
                    "application/json"
                ],
                "tags": [
                    "backends"
                ],
                "summary": "Parse llama-server command",
                "parameters": [
                    {
                        "description": "Command to parse",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/server.ParseCommandRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Parsed options",
                        "schema": {
                            "$ref": "#/definitions/instance.CreateInstanceOptions"
                        }
                    },
                    "400": {
                        "description": "Invalid request or command",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    },
                    "500": {
                        "description": "Internal Server Error",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
        },
        "/backends/mlx/parse-command": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Parses MLX-LM server command string into instance options",
                "consumes": [
                    "application/json"
                ],
                "produces": [
                    "application/json"
                ],
                "tags": [
                    "backends"
                ],
                "summary": "Parse mlx_lm.server command",
                "parameters": [
                    {
                        "description": "Command to parse",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/server.ParseCommandRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Parsed options",
                        "schema": {
                            "$ref": "#/definitions/instance.CreateInstanceOptions"
                        }
                    },
                    "400": {
                        "description": "Invalid request or command",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
        },
        "/backends/vllm/parse-command": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Parses a vLLM serve command string into instance options",
                "consumes": [
                    "application/json"
                ],
                "produces": [
                    "application/json"
                ],
                "tags": [
                    "backends"
                ],
                "summary": "Parse vllm serve command",
                "parameters": [
                    {
                        "description": "Command to parse",
                        "name": "request",
                        "in": "body",
                        "required": true,
                        "schema": {
                            "$ref": "#/definitions/server.ParseCommandRequest"
                        }
                    }
                ],
                "responses": {
                    "200": {
                        "description": "Parsed options",
                        "schema": {
                            "$ref": "#/definitions/instance.CreateInstanceOptions"
                        }
                    },
                    "400": {
                        "description": "Invalid request or command",
                        "schema": {
                            "type": "object",
                            "additionalProperties": {
                                "type": "string"
                            }
                        }
                    }
                }
            }
        },
        "/instances": {
            "get": {
                "security": [
@@ -674,522 +827,46 @@
        }
    },
    "definitions": {
        "backends.BackendType": {
            "type": "string",
            "enum": [
                "llama_cpp",
                "mlx_lm",
                "vllm"
            ],
            "x-enum-varnames": [
                "BackendTypeLlamaCpp",
                "BackendTypeMlxLm",
                "BackendTypeVllm"
            ]
        },
        "instance.CreateInstanceOptions": {
            "type": "object",
            "properties": {
                "alias": {
                    "type": "string"
                },
                "api_key": {
                    "type": "string"
                },
                "api_key_file": {
                    "type": "string"
                },
                "auto_restart": {
                    "description": "Auto restart",
                    "type": "boolean"
                },
-                "batch_size": {
+                "backend_options": {
-                    "type": "integer"
+                    "type": "object",
                    "additionalProperties": {}
                },
-                "cache_reuse": {
+                "backend_type": {
-                    "type": "integer"
+                    "$ref": "#/definitions/backends.BackendType"
                },
                "cache_type_k": {
                    "type": "string"
                },
                "cache_type_k_draft": {
                    "type": "string"
                },
                "cache_type_v": {
                    "type": "string"
                },
                "cache_type_v_draft": {
                    "type": "string"
                },
                "chat_template": {
                    "type": "string"
                },
                "chat_template_file": {
                    "type": "string"
                },
                "chat_template_kwargs": {
                    "type": "string"
                },
                "check_tensors": {
                    "type": "boolean"
                },
                "cont_batching": {
                    "type": "boolean"
                },
                "control_vector": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "control_vector_layer_range": {
                    "type": "string"
                },
                "control_vector_scaled": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "cpu_mask": {
                    "type": "string"
                },
                "cpu_mask_batch": {
                    "type": "string"
                },
                "cpu_range": {
                    "type": "string"
                },
                "cpu_range_batch": {
                    "type": "string"
                },
                "cpu_strict": {
                    "type": "integer"
                },
                "cpu_strict_batch": {
                    "type": "integer"
                },
                "ctx_size": {
                    "type": "integer"
                },
                "ctx_size_draft": {
                    "type": "integer"
                },
                "defrag_thold": {
                    "type": "number"
                },
                "device": {
                    "type": "string"
                },
                "device_draft": {
                    "type": "string"
                },
                "draft_max": {
                    "type": "integer"
                },
                "draft_min": {
                    "type": "integer"
                },
                "draft_p_min": {
                    "type": "number"
                },
                "dry_allowed_length": {
                    "type": "integer"
                },
                "dry_base": {
                    "type": "number"
                },
                "dry_multiplier": {
                    "type": "number"
                },
                "dry_penalty_last_n": {
                    "type": "integer"
                },
                "dry_sequence_breaker": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "dump_kv_cache": {
                    "type": "boolean"
                },
                "dynatemp_exp": {
                    "type": "number"
                },
                "dynatemp_range": {
                    "type": "number"
                },
                "embd_bge_small_en_default": {
                    "description": "Default model params",
                    "type": "boolean"
                },
                "embd_e5_small_en_default": {
                    "type": "boolean"
                },
                "embd_gte_small_default": {
                    "type": "boolean"
                },
                "embedding": {
                    "type": "boolean"
                },
                "escape": {
                    "type": "boolean"
                },
                "fim_qwen_14b_spec": {
                    "type": "boolean"
                },
                "fim_qwen_1_5b_default": {
                    "type": "boolean"
                },
                "fim_qwen_3b_default": {
                    "type": "boolean"
                },
                "fim_qwen_7b_default": {
                    "type": "boolean"
                },
                "fim_qwen_7b_spec": {
                    "type": "boolean"
                },
                "flash_attn": {
                    "type": "boolean"
                },
                "frequency_penalty": {
                    "type": "number"
                },
                "gpu_layers": {
                    "type": "integer"
                },
                "gpu_layers_draft": {
                    "type": "integer"
                },
                "grammar": {
                    "type": "string"
                },
                "grammar_file": {
                    "type": "string"
                },
                "hf_file": {
                    "type": "string"
                },
                "hf_file_v": {
                    "type": "string"
                },
                "hf_repo": {
                    "type": "string"
                },
                "hf_repo_draft": {
                    "type": "string"
                },
                "hf_repo_v": {
                    "type": "string"
                },
                "hf_token": {
                    "type": "string"
                },
                "host": {
                    "type": "string"
                },
                "idle_timeout": {
                    "description": "Idle timeout",
                    "type": "integer"
                },
                "ignore_eos": {
                    "type": "boolean"
                },
                "jinja": {
                    "type": "boolean"
                },
                "json_schema": {
                    "type": "string"
                },
                "json_schema_file": {
                    "type": "string"
                },
                "keep": {
                    "type": "integer"
                },
                "log_colors": {
                    "type": "boolean"
                },
                "log_disable": {
                    "type": "boolean"
                },
                "log_file": {
                    "type": "string"
                },
                "log_prefix": {
                    "type": "boolean"
                },
                "log_timestamps": {
                    "type": "boolean"
                },
                "logit_bias": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "lora": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "lora_init_without_apply": {
                    "type": "boolean"
                },
                "lora_scaled": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "main_gpu": {
                    "type": "integer"
                },
                "max_restarts": {
                    "type": "integer"
                },
                "metrics": {
                    "type": "boolean"
                },
                "min_p": {
                    "type": "number"
                },
                "mirostat": {
                    "type": "integer"
                },
                "mirostat_ent": {
                    "type": "number"
                },
                "mirostat_lr": {
                    "type": "number"
                },
                "mlock": {
                    "type": "boolean"
                },
                "mmproj": {
                    "type": "string"
                },
                "mmproj_url": {
                    "type": "string"
                },
                "model": {
                    "type": "string"
                },
                "model_draft": {
                    "type": "string"
                },
                "model_url": {
                    "type": "string"
                },
                "model_vocoder": {
                    "description": "Audio/TTS params",
                    "type": "string"
                },
                "no_cont_batching": {
                    "type": "boolean"
                },
                "no_context_shift": {
                    "description": "Example-specific params",
                    "type": "boolean"
                },
                "no_escape": {
                    "type": "boolean"
                },
                "no_kv_offload": {
                    "type": "boolean"
                },
                "no_mmap": {
                    "type": "boolean"
                },
                "no_mmproj": {
                    "type": "boolean"
                },
                "no_mmproj_offload": {
                    "type": "boolean"
                },
                "no_perf": {
                    "type": "boolean"
                },
                "no_prefill_assistant": {
                    "type": "boolean"
                },
                "no_slots": {
                    "type": "boolean"
                },
                "no_warmup": {
                    "type": "boolean"
                },
                "no_webui": {
                    "type": "boolean"
                },
                "numa": {
                    "type": "string"
                },
                "on_demand_start": {
                    "description": "On demand start",
                    "type": "boolean"
                },
                "override_kv": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "override_tensor": {
                    "type": "array",
                    "items": {
                        "type": "string"
                    }
                },
                "parallel": {
                    "type": "integer"
                },
                "path": {
                    "type": "string"
                },
                "poll": {
                    "type": "integer"
                },
                "poll_batch": {
                    "type": "integer"
                },
                "pooling": {
                    "type": "string"
                },
                "port": {
                    "type": "integer"
                },
                "predict": {
                    "type": "integer"
                },
                "presence_penalty": {
                    "type": "number"
                },
                "prio": {
                    "type": "integer"
                },
                "prio_batch": {
                    "type": "integer"
                },
                "props": {
                    "type": "boolean"
                },
                "reasoning_budget": {
                    "type": "integer"
                },
                "reasoning_format": {
                    "type": "string"
                },
                "repeat_last_n": {
                    "type": "integer"
                },
                "repeat_penalty": {
                    "type": "number"
                },
                "reranking": {
                    "type": "boolean"
                },
                "restart_delay": {
-                    "type": "integer"
+                    "description": "seconds",
                },
                "rope_freq_base": {
                    "type": "number"
                },
                "rope_freq_scale": {
                    "type": "number"
                },
                "rope_scale": {
                    "type": "number"
                },
                "rope_scaling": {
                    "type": "string"
                },
                "samplers": {
                    "description": "Sampling params",
                    "type": "string"
                },
                "sampling_seq": {
                    "type": "string"
                },
                "seed": {
                    "type": "integer"
                },
                "slot_prompt_similarity": {
                    "type": "number"
                },
                "slot_save_path": {
                    "type": "string"
                },
                "slots": {
                    "type": "boolean"
                },
                "special": {
                    "type": "boolean"
                },
                "split_mode": {
                    "type": "string"
                },
                "spm_infill": {
                    "type": "boolean"
                },
                "ssl_cert_file": {
                    "type": "string"
                },
                "ssl_key_file": {
                    "type": "string"
                },
                "temp": {
                    "type": "number"
                },
                "tensor_split": {
                    "type": "string"
                },
                "threads": {
                    "type": "integer"
                },
                "threads_batch": {
                    "type": "integer"
                },
                "threads_http": {
                    "type": "integer"
                },
                "timeout": {
                    "type": "integer"
                },
                "top_k": {
                    "type": "integer"
                },
                "top_p": {
                    "type": "number"
                },
                "tts_use_guide_tokens": {
                    "type": "boolean"
                },
                "typical": {
                    "type": "number"
                },
                "ubatch_size": {
                    "type": "integer"
                },
                "verbose": {
                    "type": "boolean"
                },
                "verbose_prompt": {
                    "description": "Common params",
                    "type": "boolean"
                },
                "verbosity": {
                    "type": "integer"
                },
                "xtc_probability": {
                    "type": "number"
                },
                "xtc_threshold": {
                    "type": "number"
                },
                "yarn_attn_factor": {
                    "type": "number"
                },
                "yarn_beta_fast": {
                    "type": "number"
                },
                "yarn_beta_slow": {
                    "type": "number"
                },
                "yarn_ext_factor": {
                    "type": "number"
                },
                "yarn_orig_ctx": {
                    "type": "integer"
                }
            }
@@ -1257,6 +934,14 @@
                    "type": "string"
                }
            }
        },
        "server.ParseCommandRequest": {
            "type": "object",
            "properties": {
                "command": {
                    "type": "string"
                }
            }
        }
    }
 }
--- a/apidocs/swagger.yaml
+++ b/apidocs/swagger.yaml
@@ -1,352 +1,35 @@
 basePath: /api/v1
 definitions:
  backends.BackendType:
    enum:
    - llama_cpp
    - mlx_lm
    - vllm
    type: string
    x-enum-varnames:
    - BackendTypeLlamaCpp
    - BackendTypeMlxLm
    - BackendTypeVllm
  instance.CreateInstanceOptions:
    properties:
      alias:
        type: string
      api_key:
        type: string
      api_key_file:
        type: string
      auto_restart:
        description: Auto restart
        type: boolean
-      batch_size:
+      backend_options:
-        type: integer
+        additionalProperties: {}
-      cache_reuse:
+        type: object
-        type: integer
+      backend_type:
-      cache_type_k:
+        $ref: '#/definitions/backends.BackendType'
        type: string
      cache_type_k_draft:
        type: string
      cache_type_v:
        type: string
      cache_type_v_draft:
        type: string
      chat_template:
        type: string
      chat_template_file:
        type: string
      chat_template_kwargs:
        type: string
      check_tensors:
        type: boolean
      cont_batching:
        type: boolean
      control_vector:
        items:
          type: string
        type: array
      control_vector_layer_range:
        type: string
      control_vector_scaled:
        items:
          type: string
        type: array
      cpu_mask:
        type: string
      cpu_mask_batch:
        type: string
      cpu_range:
        type: string
      cpu_range_batch:
        type: string
      cpu_strict:
        type: integer
      cpu_strict_batch:
        type: integer
      ctx_size:
        type: integer
      ctx_size_draft:
        type: integer
      defrag_thold:
        type: number
      device:
        type: string
      device_draft:
        type: string
      draft_max:
        type: integer
      draft_min:
        type: integer
      draft_p_min:
        type: number
      dry_allowed_length:
        type: integer
      dry_base:
        type: number
      dry_multiplier:
        type: number
      dry_penalty_last_n:
        type: integer
      dry_sequence_breaker:
        items:
          type: string
        type: array
      dump_kv_cache:
        type: boolean
      dynatemp_exp:
        type: number
      dynatemp_range:
        type: number
      embd_bge_small_en_default:
        description: Default model params
        type: boolean
      embd_e5_small_en_default:
        type: boolean
      embd_gte_small_default:
        type: boolean
      embedding:
        type: boolean
      escape:
        type: boolean
      fim_qwen_1_5b_default:
        type: boolean
      fim_qwen_3b_default:
        type: boolean
      fim_qwen_7b_default:
        type: boolean
      fim_qwen_7b_spec:
        type: boolean
      fim_qwen_14b_spec:
        type: boolean
      flash_attn:
        type: boolean
      frequency_penalty:
        type: number
      gpu_layers:
        type: integer
      gpu_layers_draft:
        type: integer
      grammar:
        type: string
      grammar_file:
        type: string
      hf_file:
        type: string
      hf_file_v:
        type: string
      hf_repo:
        type: string
      hf_repo_draft:
        type: string
      hf_repo_v:
        type: string
      hf_token:
        type: string
      host:
        type: string
      idle_timeout:
        description: Idle timeout
        type: integer
      ignore_eos:
        type: boolean
      jinja:
        type: boolean
      json_schema:
        type: string
      json_schema_file:
        type: string
      keep:
        type: integer
      log_colors:
        type: boolean
      log_disable:
        type: boolean
      log_file:
        type: string
      log_prefix:
        type: boolean
      log_timestamps:
        type: boolean
      logit_bias:
        items:
          type: string
        type: array
      lora:
        items:
          type: string
        type: array
      lora_init_without_apply:
        type: boolean
      lora_scaled:
        items:
          type: string
        type: array
      main_gpu:
        type: integer
      max_restarts:
        type: integer
      metrics:
        type: boolean
      min_p:
        type: number
      mirostat:
        type: integer
      mirostat_ent:
        type: number
      mirostat_lr:
        type: number
      mlock:
        type: boolean
      mmproj:
        type: string
      mmproj_url:
        type: string
      model:
        type: string
      model_draft:
        type: string
      model_url:
        type: string
      model_vocoder:
        description: Audio/TTS params
        type: string
      no_cont_batching:
        type: boolean
      no_context_shift:
        description: Example-specific params
        type: boolean
      no_escape:
        type: boolean
      no_kv_offload:
        type: boolean
      no_mmap:
        type: boolean
      no_mmproj:
        type: boolean
      no_mmproj_offload:
        type: boolean
      no_perf:
        type: boolean
      no_prefill_assistant:
        type: boolean
      no_slots:
        type: boolean
      no_warmup:
        type: boolean
      no_webui:
        type: boolean
      numa:
        type: string
      on_demand_start:
        description: On demand start
        type: boolean
      override_kv:
        items:
          type: string
        type: array
      override_tensor:
        items:
          type: string
        type: array
      parallel:
        type: integer
      path:
        type: string
      poll:
        type: integer
      poll_batch:
        type: integer
      pooling:
        type: string
      port:
        type: integer
      predict:
        type: integer
      presence_penalty:
        type: number
      prio:
        type: integer
      prio_batch:
        type: integer
      props:
        type: boolean
      reasoning_budget:
        type: integer
      reasoning_format:
        type: string
      repeat_last_n:
        type: integer
      repeat_penalty:
        type: number
      reranking:
        type: boolean
      restart_delay:
-        type: integer
+        description: seconds
      rope_freq_base:
        type: number
      rope_freq_scale:
        type: number
      rope_scale:
        type: number
      rope_scaling:
        type: string
      samplers:
        description: Sampling params
        type: string
      sampling_seq:
        type: string
      seed:
        type: integer
      slot_prompt_similarity:
        type: number
      slot_save_path:
        type: string
      slots:
        type: boolean
      special:
        type: boolean
      split_mode:
        type: string
      spm_infill:
        type: boolean
      ssl_cert_file:
        type: string
      ssl_key_file:
        type: string
      temp:
        type: number
      tensor_split:
        type: string
      threads:
        type: integer
      threads_batch:
        type: integer
      threads_http:
        type: integer
      timeout:
        type: integer
      top_k:
        type: integer
      top_p:
        type: number
      tts_use_guide_tokens:
        type: boolean
      typical:
        type: number
      ubatch_size:
        type: integer
      verbose:
        type: boolean
      verbose_prompt:
        description: Common params
        type: boolean
      verbosity:
        type: integer
      xtc_probability:
        type: number
      xtc_threshold:
        type: number
      yarn_attn_factor:
        type: number
      yarn_beta_fast:
        type: number
      yarn_beta_slow:
        type: number
      yarn_ext_factor:
        type: number
      yarn_orig_ctx:
        type: integer
    type: object
  instance.InstanceStatus:
@@ -391,6 +74,11 @@ definitions:
      object:
        type: string
    type: object
  server.ParseCommandRequest:
    properties:
      command:
        type: string
    type: object
 info:
  contact: {}
  description: llamactl is a control server for managing Llama Server instances.
@@ -400,6 +88,102 @@ info:
  title: llamactl API
  version: "1.0"
 paths:
  /backends/llama-cpp/parse-command:
    post:
      consumes:
      - application/json
      description: Parses a llama-server command string into instance options
      parameters:
      - description: Command to parse
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/server.ParseCommandRequest'
      produces:
      - application/json
      responses:
        "200":
          description: Parsed options
          schema:
            $ref: '#/definitions/instance.CreateInstanceOptions'
        "400":
          description: Invalid request or command
          schema:
            additionalProperties:
              type: string
            type: object
        "500":
          description: Internal Server Error
          schema:
            additionalProperties:
              type: string
            type: object
      security:
      - ApiKeyAuth: []
      summary: Parse llama-server command
      tags:
      - backends
  /backends/mlx/parse-command:
    post:
      consumes:
      - application/json
      description: Parses MLX-LM server command string into instance options
      parameters:
      - description: Command to parse
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/server.ParseCommandRequest'
      produces:
      - application/json
      responses:
        "200":
          description: Parsed options
          schema:
            $ref: '#/definitions/instance.CreateInstanceOptions'
        "400":
          description: Invalid request or command
          schema:
            additionalProperties:
              type: string
            type: object
      security:
      - ApiKeyAuth: []
      summary: Parse mlx_lm.server command
      tags:
      - backends
  /backends/vllm/parse-command:
    post:
      consumes:
      - application/json
      description: Parses a vLLM serve command string into instance options
      parameters:
      - description: Command to parse
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/server.ParseCommandRequest'
      produces:
      - application/json
      responses:
        "200":
          description: Parsed options
          schema:
            $ref: '#/definitions/instance.CreateInstanceOptions'
        "400":
          description: Invalid request or command
          schema:
            additionalProperties:
              type: string
            type: object
      security:
      - ApiKeyAuth: []
      summary: Parse vllm serve command
      tags:
      - backends
  /instances:
    get:
      description: Returns a list of all instances managed by the server
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -22,6 +22,7 @@ server:
 backends:
  llama_executable: llama-server # Path to llama-server executable
  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
  vllm_executable: vllm # Path to vllm executable
 instances:
  port_range: [8000, 9000]       # Port range for instances
@@ -94,11 +95,13 @@ server:
 backends:
  llama_executable: "llama-server"     # Path to llama-server executable (default: "llama-server")
  mlx_lm_executable: "mlx_lm.server"   # Path to mlx_lm.server executable (default: "mlx_lm.server")
  vllm_executable: "vllm"              # Path to vllm executable (default: "vllm")
 ```
 **Environment Variables:**
 - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
 - `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
 - `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
 ### Instance Configuration
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -37,6 +37,22 @@ pip install mlx-lm
 Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
 **For vLLM backend:**
 vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
 ```bash
 # Install via pip (requires Python 3.8+, GPU required)
 pip install vllm
 # Or in a virtual environment (recommended)
 python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
 # For production deployments, consider container-based installation
 ```
 ## Installation Methods
 ### Option 1: Download Binary (Recommended)
--- a/docs/getting-started/quick-start.md
+++ b/docs/getting-started/quick-start.md
@@ -29,8 +29,9 @@ You should see the Llamactl web interface.
 1. Click the "Add Instance" button
 2. Fill in the instance configuration:
   - **Name**: Give your instance a descriptive name
-   - **Model Path**: Path to your Llama.cpp model file
+   - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
-   - **Additional Options**: Any extra Llama.cpp parameters
+   - **Model**: Model path or identifier for your chosen backend
   - **Additional Options**: Backend-specific parameters
 3. Click "Create Instance"
@@ -43,17 +44,46 @@ Once created, you can:
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed
-## Example Configuration
+## Example Configurations
-Here's a basic example configuration for a Llama 2 model:
+Here are basic example configurations for each backend:
 **llama.cpp backend:**
 ```json
 {
  "name": "llama2-7b",
-  "model_path": "/path/to/llama-2-7b-chat.gguf",
+  "backend_type": "llama_cpp",
-  "options": {
+  "backend_options": {
    "model": "/path/to/llama-2-7b-chat.gguf",
    "threads": 4,
-    "context_size": 2048
+    "ctx_size": 2048,
    "gpu_layers": 32
  }
 }
 ```
 **MLX backend (macOS only):**
 ```json
 {
  "name": "mistral-mlx",
  "backend_type": "mlx_lm",
  "backend_options": {
    "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
    "temp": 0.7,
    "max_tokens": 2048
  }
 }
 ```
 **vLLM backend:**
 ```json
 {
  "name": "dialogpt-vllm",
  "backend_type": "vllm",
  "backend_options": {
    "model": "microsoft/DialoGPT-medium",
    "tensor_parallel_size": 2,
    "gpu_memory_utilization": 0.9
  }
 }
 ```
@@ -66,12 +96,14 @@ You can also manage instances via the REST API:
 # List all instances
 curl http://localhost:8080/api/instances
-# Create a new instance
+# Create a new llama.cpp instance
-curl -X POST http://localhost:8080/api/instances \
+curl -X POST http://localhost:8080/api/instances/my-model \
  -H "Content-Type: application/json" \
  -d '{
-    "name": "my-model",
+    "backend_type": "llama_cpp",
-    "model_path": "/path/to/model.gguf",
+    "backend_options": {
      "model": "/path/to/model.gguf"
    }
  }'
 # Start an instance
--- a/docs/user-guide/api-reference.md
+++ b/docs/user-guide/api-reference.md
@@ -170,7 +170,7 @@ POST /api/v1/instances/{name}/start
 ```json
 {
  "name": "llama2-7b",
-  "status": "starting",
+  "status": "running",
  "created": 1705312200
 }
 ```
@@ -191,7 +191,7 @@ POST /api/v1/instances/{name}/stop
 ```json
 {
  "name": "llama2-7b",
-  "status": "stopping",
+  "status": "stopped",
  "created": 1705312200
 }
 ```
@@ -208,7 +208,7 @@ POST /api/v1/instances/{name}/restart
 ```json
 {
  "name": "llama2-7b",
-  "status": "restarting",
+  "status": "running",
  "created": 1705312200
 }
 ```
@@ -401,6 +401,102 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
  }'
 ```
 ## Backend-Specific Endpoints
 ### Parse Commands
 Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
 #### Parse Llama.cpp Command
 Parse a llama-server command string into instance options.
 ```http
 POST /api/v1/backends/llama-cpp/parse-command
 ```
 **Request Body:**
 ```json
 {
  "command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
 }
 ```
 **Response:**
 ```json
 {
  "backend_type": "llama_cpp",
  "llama_server_options": {
    "model": "/path/to/model.gguf",
    "ctx_size": 2048,
    "port": 8080
  }
 }
 ```
 #### Parse MLX-LM Command
 Parse an MLX-LM server command string into instance options.
 ```http
 POST /api/v1/backends/mlx/parse-command
 ```
 **Request Body:**
 ```json
 {
  "command": "mlx_lm.server --model /path/to/model --port 8080"
 }
 ```
 **Response:**
 ```json
 {
  "backend_type": "mlx_lm",
  "mlx_server_options": {
    "model": "/path/to/model",
    "port": 8080
  }
 }
 ```
 #### Parse vLLM Command
 Parse a vLLM serve command string into instance options.
 ```http
 POST /api/v1/backends/vllm/parse-command
 ```
 **Request Body:**
 ```json
 {
  "command": "vllm serve /path/to/model --port 8080"
 }
 ```
 **Response:**
 ```json
 {
  "backend_type": "vllm",
  "vllm_server_options": {
    "model": "/path/to/model",
    "port": 8080
  }
 }
 ```
 **Error Responses for Parse Commands:**
 - `400 Bad Request`: Invalid request body, empty command, or parse error
 - `500 Internal Server Error`: Encoding error
 ## Auto-Generated Documentation
 The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
 1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
 2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
 ## Swagger Documentation
 If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -1,6 +1,6 @@
 # Managing Instances
-Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.
+Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
 ## Overview
@@ -42,9 +42,11 @@ Each instance is displayed as a card showing:
 3. **Choose Backend Type**:
    - **llama.cpp**: For GGUF models using llama-server
    - **MLX**: For MLX-optimized models (macOS only)
    - **vLLM**: For distributed serving and high-throughput inference
 4. Configure model source:
    - **For llama.cpp**: GGUF model path or HuggingFace repo
    - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
    - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
 5. Configure optional instance management settings:
    - **Auto Restart**: Automatically restart instance on failure
    - **Max Restarts**: Maximum number of restart attempts
@@ -54,6 +56,7 @@ Each instance is displayed as a card showing:
 6. Configure backend-specific options:
    - **llama.cpp**: Threads, context size, GPU layers, port, etc.
    - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
    - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
 7. Click **"Create"** to save the instance  
 ### Via API
@@ -87,6 +90,20 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
    "max_restarts": 3
  }'
 # Create vLLM instance
 curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
  -H "Content-Type: application/json" \
  -d '{
    "backend_type": "vllm",
    "backend_options": {
      "model": "microsoft/DialoGPT-medium",
      "tensor_parallel_size": 2,
      "gpu_memory_utilization": 0.9
    },
    "auto_restart": true,
    "on_demand_start": true
  }'
 # Create llama.cpp instance with HuggingFace model
 curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
  -H "Content-Type: application/json" \
@@ -179,16 +196,17 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
 ## Instance Proxy
-Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).
+Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
 ```bash
 # Get instance details
 curl http://localhost:8080/api/instances/{name}/proxy/
 ```
-Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
+All backends provide OpenAI-compatible endpoints. Check the respective documentation:
 - [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
 - [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
 - [vLLM docs](https://docs.vllm.ai/en/latest/)
 ### Instance Health
--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -5,5 +5,6 @@ type BackendType string
 const (
 	BackendTypeLlamaCpp BackendType = "llama_cpp"
 	BackendTypeMlxLm    BackendType = "mlx_lm"
 	BackendTypeVllm     BackendType = "vllm"
 	// BackendTypeMlxVlm BackendType = "mlx_vlm"  // Future expansion
 )
--- a/pkg/backends/builder.go
+++ b/pkg/backends/builder.go
@@ -0,0 +1,70 @@
 package backends
 import (
 	"reflect"
 	"strconv"
 	"strings"
 )
 // BuildCommandArgs converts a struct to command line arguments
 func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
 	var args []string
 	v := reflect.ValueOf(options).Elem()
 	t := v.Type()
 	for i := 0; i < v.NumField(); i++ {
 		field := v.Field(i)
 		fieldType := t.Field(i)
 		if !field.CanInterface() {
 			continue
 		}
 		jsonTag := fieldType.Tag.Get("json")
 		if jsonTag == "" || jsonTag == "-" {
 			continue
 		}
 		// Get flag name from JSON tag
 		flagName := strings.Split(jsonTag, ",")[0]
 		flagName = strings.ReplaceAll(flagName, "_", "-")
 		switch field.Kind() {
 		case reflect.Bool:
 			if field.Bool() {
 				args = append(args, "--"+flagName)
 			}
 		case reflect.Int:
 			if field.Int() != 0 {
 				args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
 			}
 		case reflect.Float64:
 			if field.Float() != 0 {
 				args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
 			}
 		case reflect.String:
 			if field.String() != "" {
 				args = append(args, "--"+flagName, field.String())
 			}
 		case reflect.Slice:
 			if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
 				if multipleFlags[flagName] {
 					// Multiple flags: --flag value1 --flag value2
 					for j := 0; j < field.Len(); j++ {
 						args = append(args, "--"+flagName, field.Index(j).String())
 					}
 				} else {
 					// Comma-separated: --flag value1,value2
 					var values []string
 					for j := 0; j < field.Len(); j++ {
 						values = append(values, field.Index(j).String())
 					}
 					args = append(args, "--"+flagName, strings.Join(values, ","))
 				}
 			}
 		}
 	}
 	return args
 }
--- a/pkg/backends/llamacpp/llama.go
+++ b/pkg/backends/llamacpp/llama.go
@@ -2,9 +2,9 @@ package llamacpp
 import (
 	"encoding/json"
 	"llamactl/pkg/backends"
 	"reflect"
 	"strconv"
 	"strings"
 )
 type LlamaServerOptions struct {
@@ -315,62 +315,44 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 // BuildCommandArgs converts InstanceOptions to command line arguments
 func (o *LlamaServerOptions) BuildCommandArgs() []string {
-	var args []string
+	// Llama uses multiple flags for arrays by default (not comma-separated)
-
+	multipleFlags := map[string]bool{
-	v := reflect.ValueOf(o).Elem()
+		"override-tensor":       true,
-	t := v.Type()
+		"override-kv":           true,
-
+		"lora":                  true,
-	for i := 0; i < v.NumField(); i++ {
+		"lora-scaled":           true,
-		field := v.Field(i)
+		"control-vector":        true,
-		fieldType := t.Field(i)
+		"control-vector-scaled": true,
-
+		"dry-sequence-breaker":  true,
-		// Skip unexported fields
+		"logit-bias":            true,
-		if !field.CanInterface() {
+	}
-			continue
+	return backends.BuildCommandArgs(o, multipleFlags)
 }
-		// Get the JSON tag to determine the flag name
+// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
-		jsonTag := fieldType.Tag.Get("json")
+// Supports multiple formats:
-		if jsonTag == "" || jsonTag == "-" {
+// 1. Full command: "llama-server --model file.gguf"
-			continue
+// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
 // 3. Args only: "--model file.gguf --gpu-layers 32"
 // 4. Multiline commands with backslashes
 func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
 	executableNames := []string{"llama-server"}
 	var subcommandNames []string // Llama has no subcommands
 	multiValuedFlags := map[string]bool{
 		"override_tensor":       true,
 		"override_kv":           true,
 		"lora":                  true,
 		"lora_scaled":           true,
 		"control_vector":        true,
 		"control_vector_scaled": true,
 		"dry_sequence_breaker":  true,
 		"logit_bias":            true,
 	}
-		// Remove ",omitempty" from the tag
+	var llamaOptions LlamaServerOptions
-		flagName := jsonTag
+	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
-		if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
+		return nil, err
 			flagName = jsonTag[:commaIndex]
 	}
-		// Convert snake_case to kebab-case for CLI flags
+	return &llamaOptions, nil
 		flagName = strings.ReplaceAll(flagName, "_", "-")
 		// Add the appropriate arguments based on field type and value
 		switch field.Kind() {
 		case reflect.Bool:
 			if field.Bool() {
 				args = append(args, "--"+flagName)
 			}
 		case reflect.Int:
 			if field.Int() != 0 {
 				args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
 			}
 		case reflect.Float64:
 			if field.Float() != 0 {
 				args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
 			}
 		case reflect.String:
 			if field.String() != "" {
 				args = append(args, "--"+flagName, field.String())
 			}
 		case reflect.Slice:
 			if field.Type().Elem().Kind() == reflect.String {
 				// Handle []string fields
 				for j := 0; j < field.Len(); j++ {
 					args = append(args, "--"+flagName, field.Index(j).String())
 				}
 			}
 		}
 	}
 	return args
 }
--- a/pkg/backends/llamacpp/llama_test.go
+++ b/pkg/backends/llamacpp/llama_test.go
@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
 	}
 }
 func TestParseLlamaCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 	}{
 		{
 			name:      "basic command",
 			command:   "llama-server --model /path/to/model.gguf --gpu-layers 32",
 			expectErr: false,
 		},
 		{
 			name:      "args only",
 			command:   "--model /path/to/model.gguf --ctx-size 4096",
 			expectErr: false,
 		},
 		{
 			name:      "mixed flag formats",
 			command:   "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
 			expectErr: false,
 		},
 		{
 			name:      "quoted strings",
 			command:   `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
 			expectErr: false,
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `llama-server --model test.gguf --api-key "unterminated`,
 			expectErr: true,
 		},
 		{
 			name:      "malformed flag",
 			command:   "llama-server ---model test.gguf",
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := llamacpp.ParseLlamaCommand(tt.command)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 			}
 		})
 	}
 }
 func TestParseLlamaCommandValues(t *testing.T) {
 	command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
 	result, err := llamacpp.ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/test/model.gguf" {
 		t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
 	}
 	if result.GPULayers != 32 {
 		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
 	}
 	if result.Temperature != 0.7 {
 		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
 	}
 	if !result.Verbose {
 		t.Errorf("expected verbose to be true")
 	}
 	if !result.NoMmap {
 		t.Errorf("expected no_mmap to be true")
 	}
 }
 func TestParseLlamaCommandArrays(t *testing.T) {
 	command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
 	result, err := llamacpp.ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(result.Lora) != 2 {
 		t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
 	}
 	expected := []string{"adapter1.bin", "adapter2.bin"}
 	for i, v := range expected {
 		if result.Lora[i] != v {
 			t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
 		}
 	}
 }
 // Helper functions
 func contains(slice []string, item string) bool {
 	return slices.Contains(slice, item)
--- a/pkg/backends/llamacpp/parser.go
+++ b/pkg/backends/llamacpp/parser.go
@@ -1,286 +0,0 @@
 package llamacpp
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
 )
 // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
 // Supports multiple formats:
 // 1. Full command: "llama-server --model file.gguf"
 // 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
 // 3. Args only: "--model file.gguf --gpu-layers 32"
 // 4. Multiline commands with backslashes
 func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
 	// 1. Normalize the command - handle multiline with backslashes
 	trimmed := normalizeMultilineCommand(command)
 	if trimmed == "" {
 		return nil, fmt.Errorf("command cannot be empty")
 	}
 	// 2. Extract arguments from command
 	args, err := extractArgumentsFromCommand(trimmed)
 	if err != nil {
 		return nil, err
 	}
 	// 3. Parse arguments into map
 	options := make(map[string]any)
 	// Known multi-valued flags (snake_case form)
 	multiValued := map[string]struct{}{
 		"override_tensor":       {},
 		"override_kv":           {},
 		"lora":                  {},
 		"lora_scaled":           {},
 		"control_vector":        {},
 		"control_vector_scaled": {},
 		"dry_sequence_breaker":  {},
 		"logit_bias":            {},
 	}
 	i := 0
 	for i < len(args) {
 		arg := args[i]
 		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
 			i++
 			continue
 		}
 		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
 		if strings.HasPrefix(arg, "---") {
 			return nil, fmt.Errorf("malformed flag: %s", arg)
 		}
 		// Unified parsing for --flag=value vs --flag value
 		var rawFlag, rawValue string
 		hasEquals := false
 		if strings.Contains(arg, "=") {
 			parts := strings.SplitN(arg, "=", 2)
 			rawFlag = parts[0]
 			rawValue = parts[1] // may be empty string
 			hasEquals = true
 		} else {
 			rawFlag = arg
 		}
 		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
 		flagName := strings.ReplaceAll(flagCore, "-", "_")
 		// Detect value if not in equals form
 		valueProvided := hasEquals
 		if !hasEquals {
 			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
 				rawValue = args[i+1]
 				valueProvided = true
 			}
 		}
 		// Determine if multi-valued flag
 		_, isMulti := multiValued[flagName]
 		// Normalization helper: ensure slice for multi-valued flags
 		appendValue := func(valStr string) {
 			if existing, ok := options[flagName]; ok {
 				// Existing value; ensure slice semantics for multi-valued flags or repeated occurrences
 				if slice, ok := existing.([]string); ok {
 					options[flagName] = append(slice, valStr)
 					return
 				}
 				// Convert scalar to slice
 				options[flagName] = []string{fmt.Sprintf("%v", existing), valStr}
 				return
 			}
 			// First value
 			if isMulti {
 				options[flagName] = []string{valStr}
 			} else {
 				// We'll parse type below for single-valued flags
 				options[flagName] = valStr
 			}
 		}
 		if valueProvided {
 			// Use raw token for multi-valued flags; else allow typed parsing
 			appendValue(rawValue)
 			if !isMulti { // convert to typed value if scalar
 				if strVal, ok := options[flagName].(string); ok { // still scalar
 					options[flagName] = parseValue(strVal)
 				}
 			}
 			// Advance index: if we consumed a following token as value (non equals form), skip it
 			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
 				i += 2
 			} else {
 				i++
 			}
 			continue
 		}
 		// Boolean flag (no value)
 		options[flagName] = true
 		i++
 	}
 	// 4. Convert to LlamaServerOptions using existing UnmarshalJSON
 	jsonData, err := json.Marshal(options)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
 	}
 	var llamaOptions LlamaServerOptions
 	if err := json.Unmarshal(jsonData, &llamaOptions); err != nil {
 		return nil, fmt.Errorf("failed to parse command options: %w", err)
 	}
 	// 5. Return LlamaServerOptions
 	return &llamaOptions, nil
 }
 // parseValue attempts to parse a string value into the most appropriate type
 func parseValue(value string) any {
 	// Surrounding matching quotes (single or double)
 	if l := len(value); l >= 2 {
 		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
 			value = value[1 : l-1]
 		}
 	}
 	lower := strings.ToLower(value)
 	if lower == "true" {
 		return true
 	}
 	if lower == "false" {
 		return false
 	}
 	if intVal, err := strconv.Atoi(value); err == nil {
 		return intVal
 	}
 	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
 		return floatVal
 	}
 	return value
 }
 // normalizeMultilineCommand handles multiline commands with backslashes
 func normalizeMultilineCommand(command string) string {
 	// Handle escaped newlines (backslash followed by newline)
 	re := regexp.MustCompile(`\\\s*\n\s*`)
 	normalized := re.ReplaceAllString(command, " ")
 	// Clean up extra whitespace
 	re = regexp.MustCompile(`\s+`)
 	normalized = re.ReplaceAllString(normalized, " ")
 	return strings.TrimSpace(normalized)
 }
 // extractArgumentsFromCommand extracts arguments from various command formats
 func extractArgumentsFromCommand(command string) ([]string, error) {
 	// Split command into tokens respecting quotes
 	tokens, err := splitCommandTokens(command)
 	if err != nil {
 		return nil, err
 	}
 	if len(tokens) == 0 {
 		return nil, fmt.Errorf("no command tokens found")
 	}
 	// Check if first token looks like an executable
 	firstToken := tokens[0]
 	// Case 1: Full path to executable (contains path separator or ends with llama-server)
 	if strings.Contains(firstToken, string(filepath.Separator)) ||
 		strings.HasSuffix(filepath.Base(firstToken), "llama-server") {
 		return tokens[1:], nil // Return everything except the executable
 	}
 	// Case 2: Just "llama-server" command
 	if strings.ToLower(firstToken) == "llama-server" {
 		return tokens[1:], nil // Return everything except the command
 	}
 	// Case 3: Arguments only (starts with a flag)
 	if strings.HasPrefix(firstToken, "-") {
 		return tokens, nil // Return all tokens as arguments
 	}
 	// Case 4: Unknown format - might be a different executable name
 	// Be permissive and assume it's the executable
 	return tokens[1:], nil
 }
 // splitCommandTokens splits a command string into tokens, respecting quotes
 func splitCommandTokens(command string) ([]string, error) {
 	var tokens []string
 	var current strings.Builder
 	inQuotes := false
 	quoteChar := byte(0)
 	escaped := false
 	for i := 0; i < len(command); i++ {
 		c := command[i]
 		if escaped {
 			current.WriteByte(c)
 			escaped = false
 			continue
 		}
 		if c == '\\' {
 			escaped = true
 			current.WriteByte(c)
 			continue
 		}
 		if !inQuotes && (c == '"' || c == '\'') {
 			inQuotes = true
 			quoteChar = c
 			current.WriteByte(c)
 		} else if inQuotes && c == quoteChar {
 			inQuotes = false
 			quoteChar = 0
 			current.WriteByte(c)
 		} else if !inQuotes && (c == ' ' || c == '\t') {
 			if current.Len() > 0 {
 				tokens = append(tokens, current.String())
 				current.Reset()
 			}
 		} else {
 			current.WriteByte(c)
 		}
 	}
 	if inQuotes {
 		return nil, errors.New("unterminated quoted string")
 	}
 	if current.Len() > 0 {
 		tokens = append(tokens, current.String())
 	}
 	return tokens, nil
 }
 // isFlag determines if a string is a command line flag or a value
 // Handles the special case where negative numbers should be treated as values, not flags
 func isFlag(arg string) bool {
 	if !strings.HasPrefix(arg, "-") {
 		return false
 	}
 	// Special case: if it's a negative number, treat it as a value
 	if _, err := strconv.ParseFloat(arg, 64); err == nil {
 		return false
 	}
 	return true
 }
--- a/pkg/backends/llamacpp/parser_test.go
+++ b/pkg/backends/llamacpp/parser_test.go
@@ -1,413 +0,0 @@
 package llamacpp
 import (
 	"testing"
 )
 func TestParseLlamaCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 	}{
 		{
 			name:      "basic command with model",
 			command:   "llama-server --model /path/to/model.gguf",
 			expectErr: false,
 		},
 		{
 			name:      "command with multiple flags",
 			command:   "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
 			expectErr: false,
 		},
 		{
 			name:      "command with short flags",
 			command:   "llama-server -m /path/to/model.gguf -ngl 32 -c 4096",
 			expectErr: false,
 		},
 		{
 			name:      "command with equals format",
 			command:   "llama-server --model=/path/to/model.gguf --gpu-layers=32",
 			expectErr: false,
 		},
 		{
 			name:      "command with boolean flags",
 			command:   "llama-server --model /path/to/model.gguf --verbose --no-mmap",
 			expectErr: false,
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "case insensitive command",
 			command:   "LLAMA-SERVER --model /path/to/model.gguf",
 			expectErr: false,
 		},
 		// New test cases for improved functionality
 		{
 			name:      "args only without llama-server",
 			command:   "--model /path/to/model.gguf --gpu-layers 32",
 			expectErr: false,
 		},
 		{
 			name:      "full path to executable",
 			command:   "/usr/local/bin/llama-server --model /path/to/model.gguf",
 			expectErr: false,
 		},
 		{
 			name:      "negative number handling",
 			command:   "llama-server --gpu-layers -1 --model test.gguf",
 			expectErr: false,
 		},
 		{
 			name:      "multiline command with backslashes",
 			command:   "llama-server --model /path/to/model.gguf \\\n  --ctx-size 4096 \\\n  --batch-size 512",
 			expectErr: false,
 		},
 		{
 			name:      "quoted string with special characters",
 			command:   `llama-server --model test.gguf --chat-template "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"`,
 			expectErr: false,
 		},
 		{
 			name:      "unterminated quoted string",
 			command:   `llama-server --model test.gguf --chat-template "unterminated quote`,
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := ParseLlamaCommand(tt.command)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 				return
 			}
 		})
 	}
 }
 func TestParseLlamaCommandSpecificValues(t *testing.T) {
 	// Test specific value parsing
 	command := "llama-server --model /test/model.gguf --gpu-layers 32 --ctx-size 4096 --verbose"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/test/model.gguf" {
 		t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
 	}
 	if result.GPULayers != 32 {
 		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
 	}
 	if result.CtxSize != 4096 {
 		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
 	}
 	if !result.Verbose {
 		t.Errorf("expected verbose to be true, got %v", result.Verbose)
 	}
 }
 func TestParseLlamaCommandArrayFlags(t *testing.T) {
 	// Test array flag handling (critical for lora, override-tensor, etc.)
 	command := "llama-server --model test.gguf --lora adapter1.bin --lora adapter2.bin"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(result.Lora) != 2 {
 		t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
 	}
 	if result.Lora[0] != "adapter1.bin" || result.Lora[1] != "adapter2.bin" {
 		t.Errorf("expected lora adapters [adapter1.bin, adapter2.bin], got %v", result.Lora)
 	}
 }
 func TestParseLlamaCommandMixedFormats(t *testing.T) {
 	// Test mixing --flag=value and --flag value formats
 	command := "llama-server --model=/path/model.gguf --gpu-layers 16 --batch-size=512 --verbose"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/path/model.gguf" {
 		t.Errorf("expected model '/path/model.gguf', got '%s'", result.Model)
 	}
 	if result.GPULayers != 16 {
 		t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
 	}
 	if result.BatchSize != 512 {
 		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
 	}
 	if !result.Verbose {
 		t.Errorf("expected verbose to be true, got %v", result.Verbose)
 	}
 }
 func TestParseLlamaCommandTypeConversion(t *testing.T) {
 	// Test that values are converted to appropriate types
 	command := "llama-server --model test.gguf --temp 0.7 --top-k 40 --no-mmap"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Temperature != 0.7 {
 		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
 	}
 	if result.TopK != 40 {
 		t.Errorf("expected top_k 40, got %d", result.TopK)
 	}
 	if !result.NoMmap {
 		t.Errorf("expected no_mmap to be true, got %v", result.NoMmap)
 	}
 }
 func TestParseLlamaCommandArgsOnly(t *testing.T) {
 	// Test parsing arguments without llama-server command
 	command := "--model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/path/to/model.gguf" {
 		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
 	}
 	if result.GPULayers != 32 {
 		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
 	}
 	if result.CtxSize != 4096 {
 		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
 	}
 }
 func TestParseLlamaCommandFullPath(t *testing.T) {
 	// Test full path to executable
 	command := "/usr/local/bin/llama-server --model test.gguf --gpu-layers 16"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "test.gguf" {
 		t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
 	}
 	if result.GPULayers != 16 {
 		t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
 	}
 }
 func TestParseLlamaCommandNegativeNumbers(t *testing.T) {
 	// Test negative number parsing
 	command := "llama-server --model test.gguf --gpu-layers -1 --seed -12345"
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.GPULayers != -1 {
 		t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
 	}
 	if result.Seed != -12345 {
 		t.Errorf("expected seed -12345, got %d", result.Seed)
 	}
 }
 func TestParseLlamaCommandMultiline(t *testing.T) {
 	// Test multiline command with backslashes
 	command := `llama-server --model /path/to/model.gguf \
  --ctx-size 4096 \
  --batch-size 512 \
  --gpu-layers 32`
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/path/to/model.gguf" {
 		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
 	}
 	if result.CtxSize != 4096 {
 		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
 	}
 	if result.BatchSize != 512 {
 		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
 	}
 	if result.GPULayers != 32 {
 		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
 	}
 }
 func TestParseLlamaCommandQuotedStrings(t *testing.T) {
 	// Test quoted strings with special characters
 	command := `llama-server --model test.gguf --api-key "sk-1234567890abcdef" --chat-template "User: {user}\nAssistant: "`
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "test.gguf" {
 		t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
 	}
 	if result.APIKey != "sk-1234567890abcdef" {
 		t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
 	}
 	expectedTemplate := "User: {user}\\nAssistant: "
 	if result.ChatTemplate != expectedTemplate {
 		t.Errorf("expected chat_template '%s', got '%s'", expectedTemplate, result.ChatTemplate)
 	}
 }
 func TestParseLlamaCommandUnslothExample(t *testing.T) {
 	// Test with realistic unsloth-style command
 	command := `llama-server --model /path/to/model.gguf \
  --ctx-size 4096 \
  --batch-size 512 \
  --gpu-layers -1 \
  --temp 0.7 \
  --repeat-penalty 1.1 \
  --top-k 40 \
  --top-p 0.95 \
  --host 0.0.0.0 \
  --port 8000 \
  --api-key "sk-1234567890abcdef"`
 	result, err := ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	// Verify key fields
 	if result.Model != "/path/to/model.gguf" {
 		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
 	}
 	if result.CtxSize != 4096 {
 		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
 	}
 	if result.BatchSize != 512 {
 		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
 	}
 	if result.GPULayers != -1 {
 		t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
 	}
 	if result.Temperature != 0.7 {
 		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
 	}
 	if result.RepeatPenalty != 1.1 {
 		t.Errorf("expected repeat_penalty 1.1, got %f", result.RepeatPenalty)
 	}
 	if result.TopK != 40 {
 		t.Errorf("expected top_k 40, got %d", result.TopK)
 	}
 	if result.TopP != 0.95 {
 		t.Errorf("expected top_p 0.95, got %f", result.TopP)
 	}
 	if result.Host != "0.0.0.0" {
 		t.Errorf("expected host '0.0.0.0', got '%s'", result.Host)
 	}
 	if result.Port != 8000 {
 		t.Errorf("expected port 8000, got %d", result.Port)
 	}
 	if result.APIKey != "sk-1234567890abcdef" {
 		t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
 	}
 }
 // Focused additional edge case tests (kept minimal per guidance)
 func TestParseLlamaCommandSingleQuotedValue(t *testing.T) {
 	cmd := "llama-server --model 'my model.gguf' --alias 'Test Alias'"
 	result, err := ParseLlamaCommand(cmd)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "my model.gguf" {
 		t.Errorf("expected model 'my model.gguf', got '%s'", result.Model)
 	}
 	if result.Alias != "Test Alias" {
 		t.Errorf("expected alias 'Test Alias', got '%s'", result.Alias)
 	}
 }
 func TestParseLlamaCommandMixedArrayForms(t *testing.T) {
 	// Same multi-value flag using --flag value and --flag=value forms
 	cmd := "llama-server --lora adapter1.bin --lora=adapter2.bin --lora adapter3.bin"
 	result, err := ParseLlamaCommand(cmd)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if len(result.Lora) != 3 {
 		t.Fatalf("expected 3 lora values, got %d (%v)", len(result.Lora), result.Lora)
 	}
 	expected := []string{"adapter1.bin", "adapter2.bin", "adapter3.bin"}
 	for i, v := range expected {
 		if result.Lora[i] != v {
 			t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
 		}
 	}
 }
 func TestParseLlamaCommandMalformedFlag(t *testing.T) {
 	cmd := "llama-server ---model test.gguf"
 	_, err := ParseLlamaCommand(cmd)
 	if err == nil {
 		t.Fatalf("expected error for malformed flag but got none")
 	}
 }
--- a/pkg/backends/mlx/mlx.go
+++ b/pkg/backends/mlx/mlx.go
@@ -1,9 +1,7 @@
 package mlx
 import (
-	"encoding/json"
+	"llamactl/pkg/backends"
 	"reflect"
 	"strconv"
 )
 type MlxServerOptions struct {
@@ -25,181 +23,34 @@ type MlxServerOptions struct {
 	ChatTemplateArgs       string `json:"chat_template_args,omitempty"` // JSON string
 	// Sampling defaults
-	Temp     float64 `json:"temp,omitempty"`      // Note: MLX uses "temp" not "temperature"
+	Temp      float64 `json:"temp,omitempty"`
 	TopP      float64 `json:"top_p,omitempty"`
 	TopK      int     `json:"top_k,omitempty"`
 	MinP      float64 `json:"min_p,omitempty"`
 	MaxTokens int     `json:"max_tokens,omitempty"`
 }
 // UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
 func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
 	// First unmarshal into a map to handle multiple field names
 	var raw map[string]any
 	if err := json.Unmarshal(data, &raw); err != nil {
 		return err
 	}
 	// Create a temporary struct for standard unmarshaling
 	type tempOptions MlxServerOptions
 	temp := tempOptions{}
 	// Standard unmarshal first
 	if err := json.Unmarshal(data, &temp); err != nil {
 		return err
 	}
 	// Copy to our struct
 	*o = MlxServerOptions(temp)
 	// Handle alternative field names
 	fieldMappings := map[string]string{
 		// Basic connection options
 		"m":            "model",
 		"host":         "host",
 		"port":         "port",
 //     "python_path":  "python_path", // removed
 		// Model and adapter options
 		"adapter-path":      "adapter_path",
 		"draft-model":       "draft_model",
 		"num-draft-tokens":  "num_draft_tokens",
 		"trust-remote-code": "trust_remote_code",
 		// Logging and templates
 		"log-level":                   "log_level",
 		"chat-template":               "chat_template",
 		"use-default-chat-template":   "use_default_chat_template",
 		"chat-template-args":          "chat_template_args",
 		// Sampling defaults
 		"temperature": "temp",        // Support both temp and temperature
 		"top-p":       "top_p",
 		"top-k":       "top_k",
 		"min-p":       "min_p",
 		"max-tokens":  "max_tokens",
 	}
 	// Process alternative field names
 	for altName, canonicalName := range fieldMappings {
 		if value, exists := raw[altName]; exists {
 			// Use reflection to set the field value
 			v := reflect.ValueOf(o).Elem()
 			field := v.FieldByNameFunc(func(fieldName string) bool {
 				field, _ := v.Type().FieldByName(fieldName)
 				jsonTag := field.Tag.Get("json")
 				return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
 			})
 			if field.IsValid() && field.CanSet() {
 				switch field.Kind() {
 				case reflect.Int:
 					if intVal, ok := value.(float64); ok {
 						field.SetInt(int64(intVal))
 					} else if strVal, ok := value.(string); ok {
 						if intVal, err := strconv.Atoi(strVal); err == nil {
 							field.SetInt(int64(intVal))
 						}
 					}
 				case reflect.Float64:
 					if floatVal, ok := value.(float64); ok {
 						field.SetFloat(floatVal)
 					} else if strVal, ok := value.(string); ok {
 						if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
 							field.SetFloat(floatVal)
 						}
 					}
 				case reflect.String:
 					if strVal, ok := value.(string); ok {
 						field.SetString(strVal)
 					}
 				case reflect.Bool:
 					if boolVal, ok := value.(bool); ok {
 						field.SetBool(boolVal)
 					}
 				}
 			}
 		}
 	}
 	return nil
 }
 // NewMlxServerOptions creates MlxServerOptions with MLX defaults
 func NewMlxServerOptions() *MlxServerOptions {
 	return &MlxServerOptions{
 		Host:           "127.0.0.1",  // MLX default (different from llama-server)
 		Port:           8080,         // MLX default
 		NumDraftTokens: 3,            // MLX default for speculative decoding
 		LogLevel:       "INFO",       // MLX default
 		Temp:           0.0,          // MLX default
 		TopP:           1.0,          // MLX default  
 		TopK:           0,            // MLX default (disabled)
 		MinP:           0.0,          // MLX default (disabled)
 		MaxTokens:      512,          // MLX default
 		ChatTemplateArgs: "{}",       // MLX default (empty JSON object)
 	}
 }
 // BuildCommandArgs converts to command line arguments
 func (o *MlxServerOptions) BuildCommandArgs() []string {
-	var args []string
+	multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
-		
+	return backends.BuildCommandArgs(o, multipleFlags)
 	// Required and basic options
 	if o.Model != "" {
 		args = append(args, "--model", o.Model)
 	}
 	if o.Host != "" {
 		args = append(args, "--host", o.Host)
 	}
 	if o.Port != 0 {
 		args = append(args, "--port", strconv.Itoa(o.Port))
 }
-	// Model and adapter options
+// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
-	if o.AdapterPath != "" {
+// Supports multiple formats:
-		args = append(args, "--adapter-path", o.AdapterPath)
+// 1. Full command: "mlx_lm.server --model model/path"
-	}
+// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
-	if o.DraftModel != "" {
+// 3. Args only: "--model model/path --host 0.0.0.0"
-		args = append(args, "--draft-model", o.DraftModel)
+// 4. Multiline commands with backslashes
-	}
+func ParseMlxCommand(command string) (*MlxServerOptions, error) {
-	if o.NumDraftTokens != 0 {
+	executableNames := []string{"mlx_lm.server"}
-		args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
+	var subcommandNames []string          // MLX has no subcommands
-	}
+	multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
-	if o.TrustRemoteCode {
+
-		args = append(args, "--trust-remote-code")
+	var mlxOptions MlxServerOptions
 	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
 		return nil, err
 	}
-	// Logging and templates
+	return &mlxOptions, nil
 	if o.LogLevel != "" {
 		args = append(args, "--log-level", o.LogLevel)
 	}
 	if o.ChatTemplate != "" {
 		args = append(args, "--chat-template", o.ChatTemplate)
 	}
 	if o.UseDefaultChatTemplate {
 		args = append(args, "--use-default-chat-template")
 	}
 	if o.ChatTemplateArgs != "" {
 		args = append(args, "--chat-template-args", o.ChatTemplateArgs)
 	}
 	// Sampling defaults
 	if o.Temp != 0 {
 		args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
 	}
 	if o.TopP != 0 {
 		args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
 	}
 	if o.TopK != 0 {
 		args = append(args, "--top-k", strconv.Itoa(o.TopK))
 	}
 	if o.MinP != 0 {
 		args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
 	}
 	if o.MaxTokens != 0 {
 		args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
 	}
 	return args
 }
--- a/pkg/backends/mlx/mlx_test.go
+++ b/pkg/backends/mlx/mlx_test.go
@@ -0,0 +1,157 @@
 package mlx_test
 import (
 	"llamactl/pkg/backends/mlx"
 	"testing"
 )
 func TestParseMlxCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 	}{
 		{
 			name:      "basic command",
 			command:   "mlx_lm.server --model /path/to/model --host 0.0.0.0",
 			expectErr: false,
 		},
 		{
 			name:      "args only",
 			command:   "--model /path/to/model --port 8080",
 			expectErr: false,
 		},
 		{
 			name:      "mixed flag formats",
 			command:   "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
 			expectErr: false,
 		},
 		{
 			name:      "quoted strings",
 			command:   `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
 			expectErr: false,
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `mlx_lm.server --model test.mlx --chat-template "unterminated`,
 			expectErr: true,
 		},
 		{
 			name:      "malformed flag",
 			command:   "mlx_lm.server ---model test.mlx",
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := mlx.ParseMlxCommand(tt.command)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 			}
 		})
 	}
 }
 func TestParseMlxCommandValues(t *testing.T) {
 	command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
 	result, err := mlx.ParseMlxCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/test/model.mlx" {
 		t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
 	}
 	if result.Port != 8080 {
 		t.Errorf("expected port 8080, got %d", result.Port)
 	}
 	if result.Temp != 0.7 {
 		t.Errorf("expected temp 0.7, got %f", result.Temp)
 	}
 	if !result.TrustRemoteCode {
 		t.Errorf("expected trust_remote_code to be true")
 	}
 	if result.LogLevel != "DEBUG" {
 		t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
 	}
 }
 func TestBuildCommandArgs(t *testing.T) {
 	options := &mlx.MlxServerOptions{
 		Model:           "/test/model.mlx",
 		Host:            "127.0.0.1",
 		Port:            8080,
 		Temp:            0.7,
 		TopP:            0.9,
 		TopK:            40,
 		MaxTokens:       2048,
 		TrustRemoteCode: true,
 		LogLevel:        "DEBUG",
 		ChatTemplate:    "custom template",
 	}
 	args := options.BuildCommandArgs()
 	// Check that all expected flags are present
 	expectedFlags := map[string]string{
 		"--model":         "/test/model.mlx",
 		"--host":          "127.0.0.1",
 		"--port":          "8080",
 		"--log-level":     "DEBUG",
 		"--chat-template": "custom template",
 		"--temp":          "0.7",
 		"--top-p":         "0.9",
 		"--top-k":         "40",
 		"--max-tokens":    "2048",
 	}
 	for i := 0; i < len(args); i++ {
 		if args[i] == "--trust-remote-code" {
 			continue // Boolean flag with no value
 		}
 		if args[i] == "--use-default-chat-template" {
 			continue // Boolean flag with no value
 		}
 		if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
 			if args[i+1] != expectedValue {
 				t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
 			}
 		}
 	}
 	// Check boolean flags
 	foundTrustRemoteCode := false
 	for _, arg := range args {
 		if arg == "--trust-remote-code" {
 			foundTrustRemoteCode = true
 		}
 	}
 	if !foundTrustRemoteCode {
 		t.Errorf("expected --trust-remote-code flag to be present")
 	}
 }
--- a/pkg/backends/mlx/parser.go
+++ b/pkg/backends/mlx/parser.go
@@ -1,254 +0,0 @@
 package mlx
 import (
 	"encoding/json"
 	"fmt"
 	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
 )
 // ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
 // Supports multiple formats:
 // 1. Full command: "mlx_lm.server --model model/path"
 // 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
 // 3. Args only: "--model model/path --host 0.0.0.0"
 // 4. Multiline commands with backslashes
 func ParseMlxCommand(command string) (*MlxServerOptions, error) {
 	// 1. Normalize the command - handle multiline with backslashes
 	trimmed := normalizeMultilineCommand(command)
 	if trimmed == "" {
 		return nil, fmt.Errorf("command cannot be empty")
 	}
 	// 2. Extract arguments from command
 	args, err := extractArgumentsFromCommand(trimmed)
 	if err != nil {
 		return nil, err
 	}
 	// 3. Parse arguments into map
 	options := make(map[string]any)
 	i := 0
 	for i < len(args) {
 		arg := args[i]
 		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
 			i++
 			continue
 		}
 		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
 		if strings.HasPrefix(arg, "---") {
 			return nil, fmt.Errorf("malformed flag: %s", arg)
 		}
 		// Unified parsing for --flag=value vs --flag value
 		var rawFlag, rawValue string
 		hasEquals := false
 		if strings.Contains(arg, "=") {
 			parts := strings.SplitN(arg, "=", 2)
 			rawFlag = parts[0]
 			rawValue = parts[1] // may be empty string
 			hasEquals = true
 		} else {
 			rawFlag = arg
 		}
 		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
 		flagName := strings.ReplaceAll(flagCore, "-", "_")
 		// Detect value if not in equals form
 		valueProvided := hasEquals
 		if !hasEquals {
 			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
 				rawValue = args[i+1]
 				valueProvided = true
 			}
 		}
 		if valueProvided {
 			// MLX-specific validation for certain flags
 			if flagName == "log_level" && !isValidLogLevel(rawValue) {
 				return nil, fmt.Errorf("invalid log level: %s", rawValue)
 			}
 			options[flagName] = parseValue(rawValue)
 			// Advance index: if we consumed a following token as value (non equals form), skip it
 			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
 				i += 2
 			} else {
 				i++
 			}
 			continue
 		}
 		// Boolean flag (no value) - MLX specific boolean flags
 		if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
 			options[flagName] = true
 		} else {
 			options[flagName] = true
 		}
 		i++
 	}
 	// 4. Convert to MlxServerOptions using existing UnmarshalJSON
 	jsonData, err := json.Marshal(options)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
 	}
 	var mlxOptions MlxServerOptions
 	if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
 		return nil, fmt.Errorf("failed to parse command options: %w", err)
 	}
 	// 5. Return MlxServerOptions
 	return &mlxOptions, nil
 }
 // isValidLogLevel validates MLX log levels
 func isValidLogLevel(level string) bool {
 	validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
 	for _, valid := range validLevels {
 		if level == valid {
 			return true
 		}
 	}
 	return false
 }
 // parseValue attempts to parse a string value into the most appropriate type
 func parseValue(value string) any {
 	// Surrounding matching quotes (single or double)
 	if l := len(value); l >= 2 {
 		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
 			value = value[1 : l-1]
 		}
 	}
 	lower := strings.ToLower(value)
 	if lower == "true" {
 		return true
 	}
 	if lower == "false" {
 		return false
 	}
 	if intVal, err := strconv.Atoi(value); err == nil {
 		return intVal
 	}
 	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
 		return floatVal
 	}
 	return value
 }
 // normalizeMultilineCommand handles multiline commands with backslashes
 func normalizeMultilineCommand(command string) string {
 	// Handle escaped newlines (backslash followed by newline)
 	re := regexp.MustCompile(`\\\s*\n\s*`)
 	normalized := re.ReplaceAllString(command, " ")
 	// Clean up extra whitespace
 	re = regexp.MustCompile(`\s+`)
 	normalized = re.ReplaceAllString(normalized, " ")
 	return strings.TrimSpace(normalized)
 }
 // extractArgumentsFromCommand extracts arguments from various command formats
 func extractArgumentsFromCommand(command string) ([]string, error) {
 	// Split command into tokens respecting quotes
 	tokens, err := splitCommandTokens(command)
 	if err != nil {
 		return nil, err
 	}
 	if len(tokens) == 0 {
 		return nil, fmt.Errorf("no command tokens found")
 	}
 	// Check if first token looks like an executable
 	firstToken := tokens[0]
 	// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
 	if strings.Contains(firstToken, string(filepath.Separator)) ||
 		strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
 		return tokens[1:], nil // Return everything except the executable
 	}
 	// Case 2: Just "mlx_lm.server" command
 	if strings.ToLower(firstToken) == "mlx_lm.server" {
 		return tokens[1:], nil // Return everything except the command
 	}
 	// Case 3: Arguments only (starts with a flag)
 	if strings.HasPrefix(firstToken, "-") {
 		return tokens, nil // Return all tokens as arguments
 	}
 	// Case 4: Unknown format - might be a different executable name
 	// Be permissive and assume it's the executable
 	return tokens[1:], nil
 }
 // splitCommandTokens splits a command string into tokens, respecting quotes
 func splitCommandTokens(command string) ([]string, error) {
 	var tokens []string
 	var current strings.Builder
 	inQuotes := false
 	quoteChar := byte(0)
 	escaped := false
 	for i := 0; i < len(command); i++ {
 		c := command[i]
 		if escaped {
 			current.WriteByte(c)
 			escaped = false
 			continue
 		}
 		if c == '\\' {
 			escaped = true
 			current.WriteByte(c)
 			continue
 		}
 		if !inQuotes && (c == '"' || c == '\'') {
 			inQuotes = true
 			quoteChar = c
 			current.WriteByte(c)
 		} else if inQuotes && c == quoteChar {
 			inQuotes = false
 			quoteChar = 0
 			current.WriteByte(c)
 		} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
 			if current.Len() > 0 {
 				tokens = append(tokens, current.String())
 				current.Reset()
 			}
 		} else {
 			current.WriteByte(c)
 		}
 	}
 	if inQuotes {
 		return nil, fmt.Errorf("unclosed quote in command")
 	}
 	if current.Len() > 0 {
 		tokens = append(tokens, current.String())
 	}
 	return tokens, nil
 }
 // isFlag checks if a string looks like a command line flag
 func isFlag(s string) bool {
 	return strings.HasPrefix(s, "-")
 }
--- a/pkg/backends/parser.go
+++ b/pkg/backends/parser.go
@@ -0,0 +1,213 @@
 package backends
 import (
 	"encoding/json"
 	"fmt"
 	"path/filepath"
 	"regexp"
 	"strconv"
 	"strings"
 )
 // ParseCommand parses a command string into a target struct
 func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
 	// Normalize multiline commands
 	command = normalizeCommand(command)
 	if command == "" {
 		return fmt.Errorf("command cannot be empty")
 	}
 	// Extract arguments and positional model
 	args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
 	if err != nil {
 		return err
 	}
 	// Parse flags into map
 	options, err := parseFlags(args, multiValuedFlags)
 	if err != nil {
 		return err
 	}
 	// If we found a positional model and no --model flag was provided, set the model
 	if modelFromPositional != "" {
 		if _, hasModelFlag := options["model"]; !hasModelFlag {
 			options["model"] = modelFromPositional
 		}
 	}
 	// Convert to target struct via JSON
 	jsonData, err := json.Marshal(options)
 	if err != nil {
 		return fmt.Errorf("failed to marshal options: %w", err)
 	}
 	if err := json.Unmarshal(jsonData, target); err != nil {
 		return fmt.Errorf("failed to unmarshal to target: %w", err)
 	}
 	return nil
 }
 // normalizeCommand handles multiline commands with backslashes
 func normalizeCommand(command string) string {
 	re := regexp.MustCompile(`\\\s*\n\s*`)
 	normalized := re.ReplaceAllString(command, " ")
 	re = regexp.MustCompile(`\s+`)
 	return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
 }
 // extractArgs extracts arguments from command, removing executable and subcommands
 // Returns: args, modelFromPositional, error
 func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
 	// Check for unterminated quotes
 	if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
 		return nil, "", fmt.Errorf("unterminated quoted string")
 	}
 	tokens := strings.Fields(command)
 	if len(tokens) == 0 {
 		return nil, "", fmt.Errorf("no tokens found")
 	}
 	// Skip executable
 	start := 0
 	firstToken := tokens[0]
 	// Check for executable name (with or without path)
 	if strings.Contains(firstToken, string(filepath.Separator)) {
 		baseName := filepath.Base(firstToken)
 		for _, execName := range executableNames {
 			if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
 				start = 1
 				break
 			}
 		}
 	} else {
 		for _, execName := range executableNames {
 			if strings.EqualFold(firstToken, execName) {
 				start = 1
 				break
 			}
 		}
 	}
 	// Skip subcommand if present
 	if start < len(tokens) {
 		for _, subCmd := range subcommandNames {
 			if strings.EqualFold(tokens[start], subCmd) {
 				start++
 				break
 			}
 		}
 	}
 	// Handle case where command starts with subcommand (no executable)
 	if start == 0 {
 		for _, subCmd := range subcommandNames {
 			if strings.EqualFold(firstToken, subCmd) {
 				start = 1
 				break
 			}
 		}
 	}
 	args := tokens[start:]
 	// Extract first positional argument (model) if present and not a flag
 	var modelFromPositional string
 	if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
 		modelFromPositional = args[0]
 		args = args[1:] // Remove the model from args to process remaining flags
 	}
 	return args, modelFromPositional, nil
 }
 // parseFlags parses command line flags into a map
 func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
 	options := make(map[string]any)
 	for i := 0; i < len(args); i++ {
 		arg := args[i]
 		if !strings.HasPrefix(arg, "-") {
 			continue
 		}
 		// Check for malformed flags (more than two leading dashes)
 		if strings.HasPrefix(arg, "---") {
 			return nil, fmt.Errorf("malformed flag: %s", arg)
 		}
 		// Get flag name and value
 		var flagName, value string
 		var hasValue bool
 		if strings.Contains(arg, "=") {
 			parts := strings.SplitN(arg, "=", 2)
 			flagName = strings.TrimLeft(parts[0], "-")
 			value = parts[1]
 			hasValue = true
 		} else {
 			flagName = strings.TrimLeft(arg, "-")
 			if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
 				value = args[i+1]
 				hasValue = true
 				i++ // Skip next arg since we consumed it
 			}
 		}
 		// Convert kebab-case to snake_case for JSON
 		flagName = strings.ReplaceAll(flagName, "-", "_")
 		if hasValue {
 			// Handle multi-valued flags
 			if multiValuedFlags[flagName] {
 				if existing, ok := options[flagName].([]string); ok {
 					options[flagName] = append(existing, value)
 				} else {
 					options[flagName] = []string{value}
 				}
 			} else {
 				options[flagName] = parseValue(value)
 			}
 		} else {
 			// Boolean flag
 			options[flagName] = true
 		}
 	}
 	return options, nil
 }
 // parseValue converts string to appropriate type
 func parseValue(value string) any {
 	// Remove quotes
 	if len(value) >= 2 {
 		if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
 			value = value[1 : len(value)-1]
 		}
 	}
 	// Try boolean
 	switch strings.ToLower(value) {
 	case "true":
 		return true
 	case "false":
 		return false
 	}
 	// Try integer
 	if intVal, err := strconv.Atoi(value); err == nil {
 		return intVal
 	}
 	// Try float
 	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
 		return floatVal
 	}
 	// Return as string
 	return value
 }
--- a/pkg/backends/vllm/vllm.go
+++ b/pkg/backends/vllm/vllm.go
@@ -0,0 +1,189 @@
 package vllm
 import (
 	"llamactl/pkg/backends"
 )
 type VllmServerOptions struct {
 	// Basic connection options (auto-assigned by llamactl)
 	Host string `json:"host,omitempty"`
 	Port int    `json:"port,omitempty"`
 	// Model and engine configuration
 	Model                      string `json:"model,omitempty"`
 	Tokenizer                  string `json:"tokenizer,omitempty"`
 	SkipTokenizerInit          bool   `json:"skip_tokenizer_init,omitempty"`
 	Revision                   string `json:"revision,omitempty"`
 	CodeRevision               string `json:"code_revision,omitempty"`
 	TokenizerRevision          string `json:"tokenizer_revision,omitempty"`
 	TokenizerMode              string `json:"tokenizer_mode,omitempty"`
 	TrustRemoteCode            bool   `json:"trust_remote_code,omitempty"`
 	DownloadDir                string `json:"download_dir,omitempty"`
 	LoadFormat                 string `json:"load_format,omitempty"`
 	ConfigFormat               string `json:"config_format,omitempty"`
 	Dtype                      string `json:"dtype,omitempty"`
 	KVCacheDtype               string `json:"kv_cache_dtype,omitempty"`
 	QuantizationParamPath      string `json:"quantization_param_path,omitempty"`
 	Seed                       int    `json:"seed,omitempty"`
 	MaxModelLen                int    `json:"max_model_len,omitempty"`
 	GuidedDecodingBackend      string `json:"guided_decoding_backend,omitempty"`
 	DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
 	WorkerUseRay               bool   `json:"worker_use_ray,omitempty"`
 	RayWorkersUseNSight        bool   `json:"ray_workers_use_nsight,omitempty"`
 	// Performance and serving configuration
 	BlockSize                int     `json:"block_size,omitempty"`
 	EnablePrefixCaching      bool    `json:"enable_prefix_caching,omitempty"`
 	DisableSlidingWindow     bool    `json:"disable_sliding_window,omitempty"`
 	UseV2BlockManager        bool    `json:"use_v2_block_manager,omitempty"`
 	NumLookaheadSlots        int     `json:"num_lookahead_slots,omitempty"`
 	SwapSpace                int     `json:"swap_space,omitempty"`
 	CPUOffloadGB             int     `json:"cpu_offload_gb,omitempty"`
 	GPUMemoryUtilization     float64 `json:"gpu_memory_utilization,omitempty"`
 	NumGPUBlocksOverride     int     `json:"num_gpu_blocks_override,omitempty"`
 	MaxNumBatchedTokens      int     `json:"max_num_batched_tokens,omitempty"`
 	MaxNumSeqs               int     `json:"max_num_seqs,omitempty"`
 	MaxLogprobs              int     `json:"max_logprobs,omitempty"`
 	DisableLogStats          bool    `json:"disable_log_stats,omitempty"`
 	Quantization             string  `json:"quantization,omitempty"`
 	RopeScaling              string  `json:"rope_scaling,omitempty"`
 	RopeTheta                float64 `json:"rope_theta,omitempty"`
 	EnforceEager             bool    `json:"enforce_eager,omitempty"`
 	MaxContextLenToCapture   int     `json:"max_context_len_to_capture,omitempty"`
 	MaxSeqLenToCapture       int     `json:"max_seq_len_to_capture,omitempty"`
 	DisableCustomAllReduce   bool    `json:"disable_custom_all_reduce,omitempty"`
 	TokenizerPoolSize        int     `json:"tokenizer_pool_size,omitempty"`
 	TokenizerPoolType        string  `json:"tokenizer_pool_type,omitempty"`
 	TokenizerPoolExtraConfig string  `json:"tokenizer_pool_extra_config,omitempty"`
 	EnableLoraBias           bool    `json:"enable_lora_bias,omitempty"`
 	LoraExtraVocabSize       int     `json:"lora_extra_vocab_size,omitempty"`
 	LoraRank                 int     `json:"lora_rank,omitempty"`
 	PromptLookbackDistance   int     `json:"prompt_lookback_distance,omitempty"`
 	PreemptionMode           string  `json:"preemption_mode,omitempty"`
 	// Distributed and parallel processing
 	TensorParallelSize            int     `json:"tensor_parallel_size,omitempty"`
 	PipelineParallelSize          int     `json:"pipeline_parallel_size,omitempty"`
 	MaxParallelLoadingWorkers     int     `json:"max_parallel_loading_workers,omitempty"`
 	DisableAsyncOutputProc        bool    `json:"disable_async_output_proc,omitempty"`
 	WorkerClass                   string  `json:"worker_class,omitempty"`
 	EnabledLoraModules            string  `json:"enabled_lora_modules,omitempty"`
 	MaxLoraRank                   int     `json:"max_lora_rank,omitempty"`
 	FullyShardedLoras             bool    `json:"fully_sharded_loras,omitempty"`
 	LoraModules                   string  `json:"lora_modules,omitempty"`
 	PromptAdapters                string  `json:"prompt_adapters,omitempty"`
 	MaxPromptAdapterToken         int     `json:"max_prompt_adapter_token,omitempty"`
 	Device                        string  `json:"device,omitempty"`
 	SchedulerDelay                float64 `json:"scheduler_delay,omitempty"`
 	EnableChunkedPrefill          bool    `json:"enable_chunked_prefill,omitempty"`
 	SpeculativeModel              string  `json:"speculative_model,omitempty"`
 	SpeculativeModelQuantization  string  `json:"speculative_model_quantization,omitempty"`
 	SpeculativeRevision           string  `json:"speculative_revision,omitempty"`
 	SpeculativeMaxModelLen        int     `json:"speculative_max_model_len,omitempty"`
 	SpeculativeDisableByBatchSize int     `json:"speculative_disable_by_batch_size,omitempty"`
 	NgptSpeculativeLength         int     `json:"ngpt_speculative_length,omitempty"`
 	SpeculativeDisableMqa         bool    `json:"speculative_disable_mqa,omitempty"`
 	ModelLoaderExtraConfig        string  `json:"model_loader_extra_config,omitempty"`
 	IgnorePatterns                string  `json:"ignore_patterns,omitempty"`
 	PreloadedLoraModules          string  `json:"preloaded_lora_modules,omitempty"`
 	// OpenAI server specific options
 	UDS                            string   `json:"uds,omitempty"`
 	UvicornLogLevel                string   `json:"uvicorn_log_level,omitempty"`
 	ResponseRole                   string   `json:"response_role,omitempty"`
 	SSLKeyfile                     string   `json:"ssl_keyfile,omitempty"`
 	SSLCertfile                    string   `json:"ssl_certfile,omitempty"`
 	SSLCACerts                     string   `json:"ssl_ca_certs,omitempty"`
 	SSLCertReqs                    int      `json:"ssl_cert_reqs,omitempty"`
 	RootPath                       string   `json:"root_path,omitempty"`
 	Middleware                     []string `json:"middleware,omitempty"`
 	ReturnTokensAsTokenIDS         bool     `json:"return_tokens_as_token_ids,omitempty"`
 	DisableFrontendMultiprocessing bool     `json:"disable_frontend_multiprocessing,omitempty"`
 	EnableAutoToolChoice           bool     `json:"enable_auto_tool_choice,omitempty"`
 	ToolCallParser                 string   `json:"tool_call_parser,omitempty"`
 	ToolServer                     string   `json:"tool_server,omitempty"`
 	ChatTemplate                   string   `json:"chat_template,omitempty"`
 	ChatTemplateContentFormat      string   `json:"chat_template_content_format,omitempty"`
 	AllowCredentials               bool     `json:"allow_credentials,omitempty"`
 	AllowedOrigins                 []string `json:"allowed_origins,omitempty"`
 	AllowedMethods                 []string `json:"allowed_methods,omitempty"`
 	AllowedHeaders                 []string `json:"allowed_headers,omitempty"`
 	APIKey                         []string `json:"api_key,omitempty"`
 	EnableLogOutputs               bool     `json:"enable_log_outputs,omitempty"`
 	EnableTokenUsage               bool     `json:"enable_token_usage,omitempty"`
 	EnableAsyncEngineDebug         bool     `json:"enable_async_engine_debug,omitempty"`
 	EngineUseRay                   bool     `json:"engine_use_ray,omitempty"`
 	DisableLogRequests             bool     `json:"disable_log_requests,omitempty"`
 	MaxLogLen                      int      `json:"max_log_len,omitempty"`
 	// Additional engine configuration
 	Task                      string `json:"task,omitempty"`
 	MultiModalConfig          string `json:"multi_modal_config,omitempty"`
 	LimitMmPerPrompt          string `json:"limit_mm_per_prompt,omitempty"`
 	EnableSleepMode           bool   `json:"enable_sleep_mode,omitempty"`
 	EnableChunkingRequest     bool   `json:"enable_chunking_request,omitempty"`
 	CompilationConfig         string `json:"compilation_config,omitempty"`
 	DisableSlidingWindowMask  bool   `json:"disable_sliding_window_mask,omitempty"`
 	EnableTRTLLMEngineLatency bool   `json:"enable_trtllm_engine_latency,omitempty"`
 	OverridePoolingConfig     string `json:"override_pooling_config,omitempty"`
 	OverrideNeuronConfig      string `json:"override_neuron_config,omitempty"`
 	OverrideKVCacheALIGNSize  int    `json:"override_kv_cache_align_size,omitempty"`
 }
 // BuildCommandArgs converts VllmServerOptions to command line arguments
 // Note: This does NOT include the "serve" subcommand, that's handled at the instance level
 // For vLLM, the model parameter is passed as a positional argument, not a --model flag
 func (o *VllmServerOptions) BuildCommandArgs() []string {
 	var args []string
 	// Add model as positional argument if specified
 	if o.Model != "" {
 		args = append(args, o.Model)
 	}
 	// Create a copy of the options without the Model field to avoid including it as --model flag
 	optionsCopy := *o
 	optionsCopy.Model = "" // Clear model field so it won't be included as a flag
 	multipleFlags := map[string]bool{
 		"api-key":         true,
 		"allowed-origins": true,
 		"allowed-methods": true,
 		"allowed-headers": true,
 		"middleware":      true,
 	}
 	// Build the rest of the arguments as flags
 	flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
 	args = append(args, flagArgs...)
 	return args
 }
 // ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
 // Supports multiple formats:
 // 1. Full command: "vllm serve --model MODEL_NAME --other-args"
 // 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
 // 3. Serve only: "serve --model MODEL_NAME --other-args"
 // 4. Args only: "--model MODEL_NAME --other-args"
 // 5. Multiline commands with backslashes
 func ParseVllmCommand(command string) (*VllmServerOptions, error) {
 	executableNames := []string{"vllm"}
 	subcommandNames := []string{"serve"}
 	multiValuedFlags := map[string]bool{
 		"middleware":      true,
 		"api_key":         true,
 		"allowed_origins": true,
 		"allowed_methods": true,
 		"allowed_headers": true,
 		"lora_modules":    true,
 		"prompt_adapters": true,
 	}
 	var vllmOptions VllmServerOptions
 	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
 		return nil, err
 	}
 	return &vllmOptions, nil
 }
--- a/pkg/backends/vllm/vllm_test.go
+++ b/pkg/backends/vllm/vllm_test.go
@@ -0,0 +1,153 @@
 package vllm_test
 import (
 	"llamactl/pkg/backends/vllm"
 	"slices"
 	"testing"
 )
 func TestParseVllmCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 	}{
 		{
 			name:      "basic vllm serve command",
 			command:   "vllm serve microsoft/DialoGPT-medium",
 			expectErr: false,
 		},
 		{
 			name:      "serve only command",
 			command:   "serve microsoft/DialoGPT-medium",
 			expectErr: false,
 		},
 		{
 			name:      "positional model with flags",
 			command:   "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
 			expectErr: false,
 		},
 		{
 			name:      "model with path",
 			command:   "vllm serve /path/to/model --gpu-memory-utilization 0.8",
 			expectErr: false,
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `vllm serve "unterminated`,
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := vllm.ParseVllmCommand(tt.command)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 			}
 		})
 	}
 }
 func TestParseVllmCommandValues(t *testing.T) {
 	command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
 	result, err := vllm.ParseVllmCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "test-model" {
 		t.Errorf("expected model 'test-model', got '%s'", result.Model)
 	}
 	if result.TensorParallelSize != 4 {
 		t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
 	}
 	if result.GPUMemoryUtilization != 0.8 {
 		t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
 	}
 	if !result.EnableLogOutputs {
 		t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
 	}
 }
 func TestBuildCommandArgs(t *testing.T) {
 	options := vllm.VllmServerOptions{
 		Model:                "microsoft/DialoGPT-medium",
 		Port:                 8080,
 		Host:                 "localhost",
 		TensorParallelSize:   2,
 		GPUMemoryUtilization: 0.8,
 		EnableLogOutputs:     true,
 		AllowedOrigins:       []string{"http://localhost:3000", "https://example.com"},
 	}
 	args := options.BuildCommandArgs()
 	// Check that model is the first positional argument (not a --model flag)
 	if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
 		t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
 	}
 	// Check that --model flag is NOT present (since model should be positional)
 	if contains(args, "--model") {
 		t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
 	}
 	// Check other flags
 	if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
 		t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
 	}
 	if !contains(args, "--enable-log-outputs") {
 		t.Errorf("Expected --enable-log-outputs not found in %v", args)
 	}
 	if !contains(args, "--host") {
 		t.Errorf("Expected --host not found in %v", args)
 	}
 	if !contains(args, "--port") {
 		t.Errorf("Expected --port not found in %v", args)
 	}
 	// Check array handling (multiple flags)
 	allowedOriginsCount := 0
 	for i := range args {
 		if args[i] == "--allowed-origins" {
 			allowedOriginsCount++
 		}
 	}
 	if allowedOriginsCount != 2 {
 		t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
 	}
 }
 // Helper functions
 func contains(slice []string, item string) bool {
 	return slices.Contains(slice, item)
 }
 func containsFlagWithValue(args []string, flag, value string) bool {
 	for i, arg := range args {
 		if arg == flag && i+1 < len(args) && args[i+1] == value {
 			return true
 		}
 	}
 	return false
 }
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -17,6 +17,9 @@ type BackendConfig struct {
 	// Path to mlx_lm executable (MLX-LM backend)
 	MLXLMExecutable string `yaml:"mlx_lm_executable"`
 	// Path to vllm executable (vLLM backend)
 	VllmExecutable string `yaml:"vllm_executable"`
 }
 // AppConfig represents the configuration for llamactl
@@ -122,6 +125,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
 		Backends: BackendConfig{
 			LlamaExecutable: "llama-server",
 			MLXLMExecutable: "mlx_lm.server",
 			VllmExecutable:  "vllm",
 		},
 		Instances: InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
@@ -246,6 +250,9 @@ func loadEnvVars(cfg *AppConfig) {
 	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
 		cfg.Backends.MLXLMExecutable = mlxLMExec
 	}
 	if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
 		cfg.Backends.VllmExecutable = vllmExec
 	}
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
 			cfg.Instances.DefaultAutoRestart = b
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -105,6 +105,10 @@ func (i *Process) GetPort() int {
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Port
 			}
 		case backends.BackendTypeVllm:
 			if i.options.VllmServerOptions != nil {
 				return i.options.VllmServerOptions.Port
 			}
 		}
 	}
 	return 0
@@ -123,6 +127,10 @@ func (i *Process) GetHost() string {
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Host
 			}
 		case backends.BackendTypeVllm:
 			if i.options.VllmServerOptions != nil {
 				return i.options.VllmServerOptions.Host
 			}
 		}
 	}
 	return ""
@@ -176,6 +184,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 			host = i.options.MlxServerOptions.Host
 			port = i.options.MlxServerOptions.Port
 		}
 	case backends.BackendTypeVllm:
 		if i.options.VllmServerOptions != nil {
 			host = i.options.VllmServerOptions.Host
 			port = i.options.VllmServerOptions.Port
 		}
 	}
 	targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -52,6 +52,8 @@ func (i *Process) Start() error {
 		executable = i.globalBackendSettings.LlamaExecutable
 	case backends.BackendTypeMlxLm:
 		executable = i.globalBackendSettings.MLXLMExecutable
 	case backends.BackendTypeVllm:
 		executable = i.globalBackendSettings.VllmExecutable
 	default:
 		return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
 	}
@@ -200,6 +202,11 @@ func (i *Process) WaitForHealthy(timeout int) error {
 			host = opts.MlxServerOptions.Host
 			port = opts.MlxServerOptions.Port
 		}
 	case backends.BackendTypeVllm:
 		if opts.VllmServerOptions != nil {
 			host = opts.VllmServerOptions.Host
 			port = opts.VllmServerOptions.Port
 		}
 	}
 	if host == "" {
 		host = "localhost"
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -6,6 +6,7 @@ import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
 	"log"
 )
@@ -26,6 +27,7 @@ type CreateInstanceOptions struct {
 	// Backend-specific options
 	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
 	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
 	VllmServerOptions  *vllm.VllmServerOptions      `json:"-"`
 }
 // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -69,6 +71,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 				return fmt.Errorf("failed to unmarshal MLX options: %w", err)
 			}
 		}
 	case backends.BackendTypeVllm:
 		if c.BackendOptions != nil {
 			optionsData, err := json.Marshal(c.BackendOptions)
 			if err != nil {
 				return fmt.Errorf("failed to marshal backend options: %w", err)
 			}
 			c.VllmServerOptions = &vllm.VllmServerOptions{}
 			if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
 				return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
 			}
 		}
 	default:
 		return fmt.Errorf("unknown backend type: %s", c.BackendType)
 	}
@@ -114,6 +128,20 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}
 			aux.BackendOptions = backendOpts
 		}
 	case backends.BackendTypeVllm:
 		if c.VllmServerOptions != nil {
 			data, err := json.Marshal(c.VllmServerOptions)
 			if err != nil {
 				return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
 			}
 			var backendOpts map[string]any
 			if err := json.Unmarshal(data, &backendOpts); err != nil {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}
 			aux.BackendOptions = backendOpts
 		}
 	}
@@ -171,6 +199,13 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
 		if c.MlxServerOptions != nil {
 			return c.MlxServerOptions.BuildCommandArgs()
 		}
 	case backends.BackendTypeVllm:
 		if c.VllmServerOptions != nil {
 			// Prepend "serve" as first argument
 			args := []string{"serve"}
 			args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
 			return args
 		}
 	}
 	return []string{}
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -264,6 +264,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
 		if options.MlxServerOptions != nil {
 			return options.MlxServerOptions.Port
 		}
 	case backends.BackendTypeVllm:
 		if options.VllmServerOptions != nil {
 			return options.VllmServerOptions.Port
 		}
 	}
 	return 0
 }
@@ -279,6 +283,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
 		if options.MlxServerOptions != nil {
 			options.MlxServerOptions.Port = port
 		}
 	case backends.BackendTypeVllm:
 		if options.VllmServerOptions != nil {
 			options.VllmServerOptions.Port = port
 		}
 	}
 }
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -8,6 +8,7 @@ import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
@@ -739,3 +740,56 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 		}
 	}
 }
 // ParseVllmCommand godoc
 // @Summary Parse vllm serve command
 // @Description Parses a vLLM serve command string into instance options
 // @Tags backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Router /backends/vllm/parse-command [post]
 func (h *Handler) ParseVllmCommand() http.HandlerFunc {
 	type errorResponse struct {
 		Error   string `json:"error"`
 		Details string `json:"details,omitempty"`
 	}
 	writeError := func(w http.ResponseWriter, status int, code, details string) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(status)
 		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
 	}
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req ParseCommandRequest
 		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
 			return
 		}
 		if strings.TrimSpace(req.Command) == "" {
 			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
 			return
 		}
 		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
 			return
 		}
 		backendType := backends.BackendTypeVllm
 		options := &instance.CreateInstanceOptions{
 			BackendType:       backendType,
 			VllmServerOptions: vllmOptions,
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(options); err != nil {
 			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
 		}
 	}
 }
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -58,6 +58,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			r.Route("/mlx", func(r chi.Router) {
 				r.Post("/parse-command", handler.ParseMlxCommand())
 			})
 			r.Route("/vllm", func(r chi.Router) {
 				r.Post("/parse-command", handler.ParseVllmCommand())
 			})
 		})
 		// Instance management endpoints
--- a/pkg/validation/validation.go
+++ b/pkg/validation/validation.go
@@ -46,6 +46,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
 		return validateLlamaCppOptions(options)
 	case backends.BackendTypeMlxLm:
 		return validateMlxOptions(options)
 	case backends.BackendTypeVllm:
 		return validateVllmOptions(options)
 	default:
 		return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
 	}
@@ -88,6 +90,25 @@ func validateMlxOptions(options *instance.CreateInstanceOptions) error {
 	return nil
 }
 // validateVllmOptions validates vLLM backend specific options
 func validateVllmOptions(options *instance.CreateInstanceOptions) error {
 	if options.VllmServerOptions == nil {
 		return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
 	}
 	// Use reflection to check all string fields for injection patterns
 	if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
 		return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
 	}
 	return nil
 }
 // validateStructStrings recursively validates all string fields in a struct
 func validateStructStrings(v any, fieldPath string) error {
 	val := reflect.ValueOf(v)
--- a/webui/src/components/BackendBadge.tsx
+++ b/webui/src/components/BackendBadge.tsx
@@ -0,0 +1,65 @@
 import React from "react";
 import { Badge } from "@/components/ui/badge";
 import { BackendType, type BackendTypeValue } from "@/types/instance";
 import { Cpu, Zap, Server } from "lucide-react";
 interface BackendBadgeProps {
  backend?: BackendTypeValue;
 }
 const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
  if (!backend) {
    return null;
  }
  const getIcon = () => {
    switch (backend) {
      case BackendType.LLAMA_CPP:
        return <Cpu className="h-3 w-3" />;
      case BackendType.MLX_LM:
        return <Zap className="h-3 w-3" />;
      case BackendType.VLLM:
        return <Server className="h-3 w-3" />;
      default:
        return <Server className="h-3 w-3" />;
    }
  };
  const getText = () => {
    switch (backend) {
      case BackendType.LLAMA_CPP:
        return "llama.cpp";
      case BackendType.MLX_LM:
        return "MLX";
      case BackendType.VLLM:
        return "vLLM";
      default:
        return backend;
    }
  };
  const getVariant = () => {
    switch (backend) {
      case BackendType.LLAMA_CPP:
        return "secondary";
      case BackendType.MLX_LM:
        return "outline";
      case BackendType.VLLM:
        return "default";
      default:
        return "secondary";
    }
  };
  return (
    <Badge
      variant={getVariant()}
      className="flex items-center gap-1.5"
    >
      {getIcon()}
      <span className="text-xs">{getText()}</span>
    </Badge>
  );
 };
 export default BackendBadge;
--- a/webui/src/components/BackendFormField.tsx
+++ b/webui/src/components/BackendFormField.tsx
@@ -45,7 +45,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              {config.required && <span className="text-red-500 ml-1">*</span>}
            </Label>
            <Input
              id={fieldKey}
@@ -72,7 +71,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              {config.required && <span className="text-red-500 ml-1">*</span>}
            </Label>
            <Input
              id={fieldKey}
@@ -99,7 +97,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              {config.required && <span className="text-red-500 ml-1">*</span>}
            </Label>
            <Input
              id={fieldKey}
--- a/webui/src/components/InstanceCard.tsx
+++ b/webui/src/components/InstanceCard.tsx
@@ -5,6 +5,7 @@ import type { Instance } from "@/types/instance";
 import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
 import LogsDialog from "@/components/LogDialog";
 import HealthBadge from "@/components/HealthBadge";
 import BackendBadge from "@/components/BackendBadge";
 import { useState } from "react";
 import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -58,7 +59,10 @@ function InstanceCard({
        <CardHeader className="pb-3">
          <div className="flex items-center justify-between">
            <CardTitle className="text-lg">{instance.name}</CardTitle>
            <div className="flex flex-col items-end gap-2">
              {running && <HealthBadge health={health} />}
              <BackendBadge backend={instance.options?.backend_type} />
            </div>
          </div>
        </CardHeader>
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -11,11 +11,13 @@ import {
  DialogTitle,
 } from "@/components/ui/dialog";
 import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
-import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
+import { getAdvancedFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
 import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
 import ZodFormField from "@/components/ZodFormField";
 import BackendFormField from "@/components/BackendFormField";
 import ParseCommandDialog from "@/components/ParseCommandDialog";
 import AutoRestartConfiguration from "@/components/instance/AutoRestartConfiguration";
 import BasicInstanceFields from "@/components/instance/BasicInstanceFields";
 import BackendConfiguration from "@/components/instance/BackendConfiguration";
 import AdvancedInstanceFields from "@/components/instance/AdvancedInstanceFields";
 interface InstanceDialogProps {
  open: boolean;
@@ -39,9 +41,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  const [showParseDialog, setShowParseDialog] = useState(false);
  // Get field lists dynamically from the type
  const basicFields = getBasicFields();
  const advancedFields = getAdvancedFields();
  const basicBackendFields = getBasicBackendFields(formData.backend_type);
  const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
  // Reset form when dialog opens/closes or when instance changes
@@ -163,8 +163,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
    setShowParseDialog(false);
  };
  // Check if auto_restart is enabled
  const isAutoRestartEnabled = formData.auto_restart === true;
  // Save button label logic
  let saveButtonLabel = "Create Instance";
@@ -212,70 +210,23 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
            </div>
            {/* Auto Restart Configuration Section */}
-            <div className="space-y-4">
+            <AutoRestartConfiguration
-              <h3 className="text-lg font-medium">
+              formData={formData}
                Auto Restart Configuration
              </h3>
              {/* Auto Restart Toggle */}
              <ZodFormField
                fieldKey="auto_restart"
                value={formData.auto_restart}
              onChange={handleFieldChange}
            />
-              {/* Show restart options only when auto restart is enabled */}
+            {/* Basic Fields */}
-              {isAutoRestartEnabled && (
+            <BasicInstanceFields
-                <div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
+              formData={formData}
                  <ZodFormField
                    fieldKey="max_restarts"
                    value={formData.max_restarts}
              onChange={handleFieldChange}
            />
                  <ZodFormField
                    fieldKey="restart_delay"
                    value={formData.restart_delay}
                    onChange={handleFieldChange}
                  />
                </div>
              )}
            </div>
            {/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
            <div className="space-y-4">
              <h3 className="text-lg font-medium">Basic Configuration</h3>
              {basicFields
                .filter(
                  (fieldKey) =>
                    fieldKey !== "auto_restart" &&
                    fieldKey !== "max_restarts" &&
                    fieldKey !== "restart_delay" &&
                    fieldKey !== "backend_options" // backend_options is handled separately
                ) 
                .map((fieldKey) => (
                  <ZodFormField
                    key={fieldKey}
                    fieldKey={fieldKey}
                    value={formData[fieldKey]}
                    onChange={handleFieldChange}
                  />
                ))}
            </div>
            {/* Backend Configuration Section */}
-            <div className="space-y-4">
+            <BackendConfiguration
-              <h3 className="text-lg font-medium">Backend Configuration</h3>
+              formData={formData}
-              
+              onBackendFieldChange={handleBackendFieldChange}
-              {/* Basic backend fields */}
+              showAdvanced={showAdvanced}
              {basicBackendFields.map((fieldKey) => (
                <BackendFormField
                  key={fieldKey}
                  fieldKey={fieldKey}
                  value={(formData.backend_options as any)?.[fieldKey]}
                  onChange={handleBackendFieldChange}
            />
              ))}
            </div>
            {/* Advanced Fields Toggle */}
            <div className="border-t pt-4">
@@ -314,54 +265,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
              </div>
            </div>
-            {/* Advanced Fields - Automatically generated from type (excluding restart options) */}
+            {/* Advanced Fields */}
            {showAdvanced && (
              <div className="space-y-4 pl-6 border-l-2 border-muted">
-                {/* Advanced instance fields */}
+                <AdvancedInstanceFields
-                {advancedFields
+                  formData={formData}
                  .filter(
                    (fieldKey) =>
                      !["max_restarts", "restart_delay", "backend_options"].includes(
                        fieldKey as string
                      )
                  ).length > 0 && (
                  <div className="space-y-4">
                    <h4 className="text-md font-medium">Advanced Instance Configuration</h4>
                    {advancedFields
                      .filter(
                        (fieldKey) =>
                          !["max_restarts", "restart_delay", "backend_options"].includes(
                            fieldKey as string
                          )
                      )
                      .sort()
                      .map((fieldKey) => (
                        <ZodFormField
                          key={fieldKey}
                          fieldKey={fieldKey}
                          value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
                  onChange={handleFieldChange}
                />
                      ))}
                  </div>
                )}
                {/* Advanced backend fields */}
                {advancedBackendFields.length > 0 && (
                  <div className="space-y-4">
                    <h4 className="text-md font-medium">Advanced Backend Configuration</h4>
                    {advancedBackendFields
                      .sort()
                      .map((fieldKey) => (
                        <BackendFormField
                          key={fieldKey}
                          fieldKey={fieldKey}
                          value={(formData.backend_options as any)?.[fieldKey]}
                          onChange={handleBackendFieldChange}
                        />
                      ))}
                  </div>
                )}
              </div>
            )}
          </div>
--- a/webui/src/components/ParseCommandDialog.tsx
+++ b/webui/src/components/ParseCommandDialog.tsx
@@ -9,7 +9,7 @@ import {
  DialogHeader,
  DialogTitle,
 } from "@/components/ui/dialog";
-import { type CreateInstanceOptions } from "@/types/instance";
+import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
 import { backendsApi } from "@/lib/api";
 import { toast } from "sonner";
@@ -25,6 +25,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
  onParsed,
 }) => {
  const [command, setCommand] = useState('');
  const [backendType, setBackendType] = useState<BackendTypeValue>(BackendType.LLAMA_CPP);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);
@@ -38,18 +39,31 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
    setError(null);
    try {
-      const options = await backendsApi.llamaCpp.parseCommand(command);
+      let options: CreateInstanceOptions;
      // Parse based on selected backend type
      switch (backendType) {
        case BackendType.LLAMA_CPP:
          options = await backendsApi.llamaCpp.parseCommand(command);
          break;
        case BackendType.MLX_LM:
          options = await backendsApi.mlx.parseCommand(command);
          break;
        case BackendType.VLLM:
          options = await backendsApi.vllm.parseCommand(command);
          break;
        default:
          throw new Error(`Unsupported backend type: ${backendType}`);
      }
      onParsed(options);
      onOpenChange(false);
      // Reset form
      setCommand('');
      setError(null);
      // Show success toast
      toast.success('Command parsed successfully');
    } catch (err) {
      const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
      setError(errorMessage);
      // Show error toast
      toast.error('Failed to parse command', {
        description: errorMessage
      });
@@ -60,31 +74,55 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
  const handleOpenChange = (open: boolean) => {
    if (!open) {
      // Reset form when closing
      setCommand('');
      setBackendType(BackendType.LLAMA_CPP);
      setError(null);
    }
    onOpenChange(open);
  };
  const backendPlaceholders: Record<BackendTypeValue, string> = {
    [BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
    [BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
    [BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
  };
  const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
    return backendPlaceholders[backendType] || "Enter your command here...";
  };
  return (
    <Dialog open={open} onOpenChange={handleOpenChange}>
      <DialogContent className="sm:max-w-[600px]">
        <DialogHeader>
-          <DialogTitle>Parse Llama Server Command</DialogTitle>
+          <DialogTitle>Parse Backend Command</DialogTitle>
          <DialogDescription>
-            Paste your llama-server command to automatically populate the form fields
+            Select your backend type and paste the command to automatically populate the form fields
          </DialogDescription>
        </DialogHeader>
        <div className="space-y-4">
          <div>
            <Label htmlFor="backend-type">Backend Type</Label>
            <select
              id="backend-type"
              value={backendType}
              onChange={(e) => setBackendType(e.target.value as BackendTypeValue)}
              className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
            >
              <option value={BackendType.LLAMA_CPP}>Llama Server</option>
              <option value={BackendType.MLX_LM}>MLX LM</option>
              <option value={BackendType.VLLM}>vLLM</option>
            </select>
          </div>
          <div>
            <Label htmlFor="command">Command</Label>
            <textarea
              id="command"
              value={command}
              onChange={(e) => setCommand(e.target.value)}
-              placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
+              placeholder={getPlaceholderForBackend(backendType)}
              className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
            />
          </div>
--- a/webui/src/components/ZodFormField.tsx
+++ b/webui/src/components/ZodFormField.tsx
@@ -29,7 +29,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
        <div className="grid gap-2">
          <Label htmlFor={fieldKey}>
            {config.label}
            {config.required && <span className="text-red-500 ml-1">*</span>}
          </Label>
          <select
            id={fieldKey}
@@ -39,6 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          >
            <option value={BackendType.LLAMA_CPP}>Llama Server</option>
            <option value={BackendType.MLX_LM}>MLX LM</option>
            <option value={BackendType.VLLM}>vLLM</option>
          </select>
          {config.description && (
            <p className="text-sm text-muted-foreground">{config.description}</p>
@@ -70,7 +70,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              {config.required && <span className="text-red-500 ml-1">*</span>}
              </Label>
            <Input
              id={fieldKey}
@@ -97,7 +96,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              {config.required && <span className="text-red-500 ml-1">*</span>}
              </Label>
            <Input
              id={fieldKey}
@@ -124,7 +122,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              {config.required && <span className="text-red-500 ml-1">*</span>}
              </Label>
            <Input
              id={fieldKey}
--- a/webui/src/components/form/ArrayInput.tsx
+++ b/webui/src/components/form/ArrayInput.tsx
@@ -0,0 +1,62 @@
 import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 interface ArrayInputProps {
  id: string
  label: string
  value: string[] | undefined
  onChange: (value: string[] | undefined) => void
  placeholder?: string
  description?: string
  disabled?: boolean
  className?: string
 }
 const ArrayInput: React.FC<ArrayInputProps> = ({
  id,
  label,
  value,
  onChange,
  placeholder = "item1, item2, item3",
  description,
  disabled = false,
  className
 }) => {
  const handleChange = (inputValue: string) => {
    if (inputValue === '') {
      onChange(undefined)
      return
    }
    const arrayValue = inputValue
      .split(',')
      .map(s => s.trim())
      .filter(Boolean)
    onChange(arrayValue.length > 0 ? arrayValue : undefined)
  }
  return (
    <div className="grid gap-2">
      <Label htmlFor={id}>
        {label}
      </Label>
      <Input
        id={id}
        type="text"
        value={Array.isArray(value) ? value.join(', ') : ''}
        onChange={(e) => handleChange(e.target.value)}
        placeholder={placeholder}
        disabled={disabled}
        className={className}
      />
      {description && (
        <p className="text-sm text-muted-foreground">{description}</p>
      )}
      <p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
    </div>
  )
 }
 export default ArrayInput
--- a/webui/src/components/form/CheckboxInput.tsx
+++ b/webui/src/components/form/CheckboxInput.tsx
@@ -0,0 +1,42 @@
 import React from 'react'
 import { Checkbox } from '@/components/ui/checkbox'
 import { Label } from '@/components/ui/label'
 interface CheckboxInputProps {
  id: string
  label: string
  value: boolean | undefined
  onChange: (value: boolean) => void
  description?: string
  disabled?: boolean
  className?: string
 }
 const CheckboxInput: React.FC<CheckboxInputProps> = ({
  id,
  label,
  value,
  onChange,
  description,
  disabled = false,
  className
 }) => {
  return (
    <div className={`flex items-center space-x-2 ${className || ''}`}>
      <Checkbox
        id={id}
        checked={value === true}
        onCheckedChange={(checked) => onChange(!!checked)}
        disabled={disabled}
      />
      <Label htmlFor={id} className="text-sm font-normal">
        {label}
        {description && (
          <span className="text-muted-foreground ml-1">- {description}</span>
        )}
      </Label>
    </div>
  )
 }
 export default CheckboxInput
--- a/webui/src/components/form/NumberInput.tsx
+++ b/webui/src/components/form/NumberInput.tsx
@@ -0,0 +1,60 @@
 import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 interface NumberInputProps {
  id: string
  label: string
  value: number | undefined
  onChange: (value: number | undefined) => void
  placeholder?: string
  description?: string
  disabled?: boolean
  className?: string
 }
 const NumberInput: React.FC<NumberInputProps> = ({
  id,
  label,
  value,
  onChange,
  placeholder,
  description,
  disabled = false,
  className
 }) => {
  const handleChange = (inputValue: string) => {
    if (inputValue === '') {
      onChange(undefined)
      return
    }
    const numValue = parseFloat(inputValue)
    if (!isNaN(numValue)) {
      onChange(numValue)
    }
  }
  return (
    <div className="grid gap-2">
      <Label htmlFor={id}>
        {label}
      </Label>
      <Input
        id={id}
        type="number"
        step="any"
        value={value !== undefined ? value : ''}
        onChange={(e) => handleChange(e.target.value)}
        placeholder={placeholder}
        disabled={disabled}
        className={className}
      />
      {description && (
        <p className="text-sm text-muted-foreground">{description}</p>
      )}
    </div>
  )
 }
 export default NumberInput
--- a/webui/src/components/form/SelectInput.tsx
+++ b/webui/src/components/form/SelectInput.tsx
@@ -0,0 +1,55 @@
 import React from 'react'
 import { Label } from '@/components/ui/label'
 interface SelectOption {
  value: string
  label: string
 }
 interface SelectInputProps {
  id: string
  label: string
  value: string | undefined
  onChange: (value: string | undefined) => void
  options: SelectOption[]
  description?: string
  disabled?: boolean
  className?: string
 }
 const SelectInput: React.FC<SelectInputProps> = ({
  id,
  label,
  value,
  onChange,
  options,
  description,
  disabled = false,
  className
 }) => {
  return (
    <div className="grid gap-2">
      <Label htmlFor={id}>
        {label}
      </Label>
      <select
        id={id}
        value={value || ''}
        onChange={(e) => onChange(e.target.value || undefined)}
        disabled={disabled}
        className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
      >
        {options.map(option => (
          <option key={option.value} value={option.value}>
            {option.label}
          </option>
        ))}
      </select>
      {description && (
        <p className="text-sm text-muted-foreground">{description}</p>
      )}
    </div>
  )
 }
 export default SelectInput
--- a/webui/src/components/form/TextInput.tsx
+++ b/webui/src/components/form/TextInput.tsx
@@ -0,0 +1,47 @@
 import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 interface TextInputProps {
  id: string
  label: string
  value: string | number | undefined
  onChange: (value: string | undefined) => void
  placeholder?: string
  description?: string
  disabled?: boolean
  className?: string
 }
 const TextInput: React.FC<TextInputProps> = ({
  id,
  label,
  value,
  onChange,
  placeholder,
  description,
  disabled = false,
  className
 }) => {
  return (
    <div className="grid gap-2">
      <Label htmlFor={id}>
        {label}
      </Label>
      <Input
        id={id}
        type="text"
        value={typeof value === 'string' || typeof value === 'number' ? value : ''}
        onChange={(e) => onChange(e.target.value || undefined)}
        placeholder={placeholder}
        disabled={disabled}
        className={className}
      />
      {description && (
        <p className="text-sm text-muted-foreground">{description}</p>
      )}
    </div>
  )
 }
 export default TextInput
--- a/webui/src/components/instance/AdvancedInstanceFields.tsx
+++ b/webui/src/components/instance/AdvancedInstanceFields.tsx
@@ -0,0 +1,98 @@
 import React from 'react'
 import type { CreateInstanceOptions } from '@/types/instance'
 import { getAdvancedFields, basicFieldsConfig } from '@/lib/zodFormUtils'
 import { getFieldType } from '@/schemas/instanceOptions'
 import TextInput from '@/components/form/TextInput'
 import NumberInput from '@/components/form/NumberInput'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import ArrayInput from '@/components/form/ArrayInput'
 interface AdvancedInstanceFieldsProps {
  formData: CreateInstanceOptions
  onChange: (key: keyof CreateInstanceOptions, value: any) => void
 }
 const AdvancedInstanceFields: React.FC<AdvancedInstanceFieldsProps> = ({
  formData,
  onChange
 }) => {
  const advancedFields = getAdvancedFields()
  const renderField = (fieldKey: keyof CreateInstanceOptions) => {
    const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
    const fieldType = getFieldType(fieldKey)
    switch (fieldType) {
      case 'boolean':
        return (
          <CheckboxInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as boolean | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            description={config.description}
          />
        )
      case 'number':
        return (
          <NumberInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as number | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
      case 'array':
        return (
          <ArrayInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as string[] | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
      default:
        return (
          <TextInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as string | number | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
    }
  }
  // Filter out restart options and backend_options (handled separately)
  const fieldsToRender = advancedFields.filter(
    fieldKey => !['max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
  )
  if (fieldsToRender.length === 0) {
    return null
  }
  return (
    <div className="space-y-4">
      <h4 className="text-md font-medium">Advanced Instance Configuration</h4>
      {fieldsToRender
        .sort()
        .map(renderField)}
    </div>
  )
 }
 export default AdvancedInstanceFields
--- a/webui/src/components/instance/AutoRestartConfiguration.tsx
+++ b/webui/src/components/instance/AutoRestartConfiguration.tsx
@@ -0,0 +1,53 @@
 import React from 'react'
 import type { CreateInstanceOptions } from '@/types/instance'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import NumberInput from '@/components/form/NumberInput'
 interface AutoRestartConfigurationProps {
  formData: CreateInstanceOptions
  onChange: (key: keyof CreateInstanceOptions, value: any) => void
 }
 const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
  formData,
  onChange
 }) => {
  const isAutoRestartEnabled = formData.auto_restart === true
  return (
    <div className="space-y-4">
      <h3 className="text-lg font-medium">Auto Restart Configuration</h3>
      <CheckboxInput
        id="auto_restart"
        label="Auto Restart"
        value={formData.auto_restart}
        onChange={(value) => onChange('auto_restart', value)}
        description="Automatically restart the instance on failure"
      />
      {isAutoRestartEnabled && (
        <div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
          <NumberInput
            id="max_restarts"
            label="Max Restarts"
            value={formData.max_restarts}
            onChange={(value) => onChange('max_restarts', value)}
            placeholder="3"
            description="Maximum number of restart attempts (0 = unlimited)"
          />
          <NumberInput
            id="restart_delay"
            label="Restart Delay (seconds)"
            value={formData.restart_delay}
            onChange={(value) => onChange('restart_delay', value)}
            placeholder="5"
            description="Delay in seconds before attempting restart"
          />
        </div>
      )}
    </div>
  )
 }
 export default AutoRestartConfiguration
--- a/webui/src/components/instance/BackendConfiguration.tsx
+++ b/webui/src/components/instance/BackendConfiguration.tsx
@@ -0,0 +1,54 @@
 import React from 'react'
 import type { CreateInstanceOptions } from '@/types/instance'
 import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
 import BackendFormField from '@/components/BackendFormField'
 interface BackendConfigurationProps {
  formData: CreateInstanceOptions
  onBackendFieldChange: (key: string, value: any) => void
  showAdvanced?: boolean
 }
 const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
  formData,
  onBackendFieldChange,
  showAdvanced = false
 }) => {
  const basicBackendFields = getBasicBackendFields(formData.backend_type)
  const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
  return (
    <div className="space-y-4">
      <h3 className="text-lg font-medium">Backend Configuration</h3>
      {/* Basic backend fields */}
      {basicBackendFields.map((fieldKey) => (
        <BackendFormField
          key={fieldKey}
          fieldKey={fieldKey}
          value={(formData.backend_options as any)?.[fieldKey]}
          onChange={onBackendFieldChange}
        />
      ))}
      {/* Advanced backend fields */}
      {showAdvanced && advancedBackendFields.length > 0 && (
        <div className="space-y-4 pl-6 border-l-2 border-muted">
          <h4 className="text-md font-medium">Advanced Backend Configuration</h4>
          {advancedBackendFields
            .sort()
            .map((fieldKey) => (
              <BackendFormField
                key={fieldKey}
                fieldKey={fieldKey}
                value={(formData.backend_options as any)?.[fieldKey]}
                onChange={onBackendFieldChange}
              />
            ))}
        </div>
      )}
    </div>
  )
 }
 export default BackendConfiguration
--- a/webui/src/components/instance/BasicInstanceFields.tsx
+++ b/webui/src/components/instance/BasicInstanceFields.tsx
@@ -0,0 +1,99 @@
 import React from 'react'
 import { BackendType, type CreateInstanceOptions } from '@/types/instance'
 import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
 import { getFieldType } from '@/schemas/instanceOptions'
 import TextInput from '@/components/form/TextInput'
 import NumberInput from '@/components/form/NumberInput'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import SelectInput from '@/components/form/SelectInput'
 interface BasicInstanceFieldsProps {
  formData: CreateInstanceOptions
  onChange: (key: keyof CreateInstanceOptions, value: any) => void
 }
 const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
  formData,
  onChange
 }) => {
  const basicFields = getBasicFields()
  const renderField = (fieldKey: keyof CreateInstanceOptions) => {
    const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
    const fieldType = getFieldType(fieldKey)
    // Special handling for backend_type field
    if (fieldKey === 'backend_type') {
      return (
        <SelectInput
          key={fieldKey}
          id={fieldKey}
          label={config.label}
          value={formData[fieldKey] || BackendType.LLAMA_CPP}
          onChange={(value) => onChange(fieldKey, value)}
          options={[
            { value: BackendType.LLAMA_CPP, label: 'Llama Server' },
            { value: BackendType.MLX_LM, label: 'MLX LM' },
            { value: BackendType.VLLM, label: 'vLLM' }
          ]}
          description={config.description}
        />
      )
    }
    // Render based on field type
    switch (fieldType) {
      case 'boolean':
        return (
          <CheckboxInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as boolean | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            description={config.description}
          />
        )
      case 'number':
        return (
          <NumberInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as number | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
      default:
        return (
          <TextInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as string | number | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
    }
  }
  // Filter out auto restart fields and backend_options (handled separately)
  const fieldsToRender = basicFields.filter(
    fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
  )
  return (
    <div className="space-y-4">
      <h3 className="text-lg font-medium">Basic Configuration</h3>
      {fieldsToRender.map(renderField)}
    </div>
  )
 }
 export default BasicInstanceFields
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,4 +1,5 @@
 import type { CreateInstanceOptions, Instance } from "@/types/instance";
 import { handleApiError } from "./errorUtils";
 const API_BASE = "/api/v1";
@@ -30,25 +31,8 @@ async function apiCall<T>(
      headers,
    });
-    // Handle authentication errors
+    // Handle errors using centralized error handler
-    if (response.status === 401) {
+    await handleApiError(response);
      throw new Error('Authentication required');
    }
    if (!response.ok) {
      // Try to get error message from response
      let errorMessage = `HTTP ${response.status}`;
      try {
        const errorText = await response.text();
        if (errorText) {
          errorMessage += `: ${errorText}`;
        }
      } catch {
        // If we can't read the error, just use status
      }
      throw new Error(errorMessage);
    }
    // Handle empty responses (like DELETE)
    if (response.status === 204) {
@@ -60,6 +44,14 @@ async function apiCall<T>(
      const text = await response.text();
      return text as T;
    } else {
      // Handle empty responses for JSON endpoints
      const contentLength = response.headers.get('content-length');
      if (contentLength === '0' || contentLength === null) {
        const text = await response.text();
        if (text.trim() === '') {
          return {} as T; // Return empty object for empty JSON responses
        }
      }
      const data = await response.json() as T;
      return data;
    }
@@ -101,6 +93,14 @@ export const backendsApi = {
        body: JSON.stringify({ command }),
      }),
  },
  vllm: {
    // POST /backends/vllm/parse-command
    parseCommand: (command: string) =>
      apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
        method: 'POST',
        body: JSON.stringify({ command }),
      }),
  },
 };
 // Instance API functions
--- a/webui/src/lib/errorUtils.ts
+++ b/webui/src/lib/errorUtils.ts
@@ -0,0 +1,32 @@
 /**
 * Parses error response from API calls and returns a formatted error message
 */
 export async function parseErrorResponse(response: Response): Promise<string> {
  let errorMessage = `HTTP ${response.status}`
  try {
    const errorText = await response.text()
    if (errorText) {
      errorMessage += `: ${errorText}`
    }
  } catch {
    // If we can't read the error, just use status
  }
  return errorMessage
 }
 /**
 * Handles common API call errors and throws appropriate Error objects
 */
 export async function handleApiError(response: Response): Promise<void> {
  // Handle authentication errors
  if (response.status === 401) {
    throw new Error('Authentication required')
  }
  if (!response.ok) {
    const errorMessage = await parseErrorResponse(response)
    throw new Error(errorMessage)
  }
 }
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -2,13 +2,17 @@ import {
  type CreateInstanceOptions,
  type LlamaCppBackendOptions,
  type MlxBackendOptions,
  type VllmBackendOptions,
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
  VllmBackendOptionsSchema,
  getAllFieldKeys,
  getAllLlamaCppFieldKeys,
  getAllMlxFieldKeys,
  getAllVllmFieldKeys,
  getLlamaCppFieldType,
-  getMlxFieldType
+  getMlxFieldType,
  getVllmFieldType
 } from '@/schemas/instanceOptions'
 // Instance-level basic fields (not backend-specific)
@@ -16,7 +20,6 @@ export const basicFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
  required?: boolean
 }> = {
  auto_restart: {
    label: 'Auto Restart',
@@ -52,13 +55,11 @@ const basicLlamaCppFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
  required?: boolean
 }> = {
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
-    description: 'Path to the model file',
+    description: 'Path to the model file'
    required: true
  },
  hf_repo: {
    label: 'Hugging Face Repository',
@@ -82,13 +83,11 @@ const basicMlxFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
  required?: boolean
 }> = {
  model: {
    label: 'Model',
    placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
-    description: 'The path to the MLX model weights, tokenizer, and config',
+    description: 'The path to the MLX model weights, tokenizer, and config'
    required: true
  },
  temp: {
    label: 'Temperature',
@@ -117,11 +116,46 @@ const basicMlxFieldsConfig: Record<string, {
  }
 }
 // vLLM backend-specific basic fields
 const basicVllmFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
 }> = {
  model: {
    label: 'Model',
    placeholder: 'microsoft/DialoGPT-medium',
    description: 'The name or path of the Hugging Face model to use'
  },
  tensor_parallel_size: {
    label: 'Tensor Parallel Size',
    placeholder: '1',
    description: 'Number of GPUs to use for distributed serving'
  },
  gpu_memory_utilization: {
    label: 'GPU Memory Utilization',
    placeholder: '0.9',
    description: 'The fraction of GPU memory to be used for the model executor'
  }
 }
 // Backend field configuration lookup
 const backendFieldConfigs = {
  mlx_lm: basicMlxFieldsConfig,
  vllm: basicVllmFieldsConfig,
  llama_cpp: basicLlamaCppFieldsConfig,
 } as const
 const backendFieldGetters = {
  mlx_lm: getAllMlxFieldKeys,
  vllm: getAllVllmFieldKeys,
  llama_cpp: getAllLlamaCppFieldKeys,
 } as const
 function isBasicField(key: keyof CreateInstanceOptions): boolean {
  return key in basicFieldsConfig
 }
 export function getBasicFields(): (keyof CreateInstanceOptions)[] {
  return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
 }
@@ -130,25 +164,18 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
  return getAllFieldKeys().filter(key => !isBasicField(key))
 }
 export function getBasicBackendFields(backendType?: string): string[] {
-  if (backendType === 'mlx_lm') {
+  const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
-    return Object.keys(basicMlxFieldsConfig)
+  const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
-  } else if (backendType === 'llama_cpp') {
+  return Object.keys(config)
    return Object.keys(basicLlamaCppFieldsConfig)
  }
  // Default to LlamaCpp for backward compatibility
  return Object.keys(basicLlamaCppFieldsConfig)
 }
 export function getAdvancedBackendFields(backendType?: string): string[] {
-  if (backendType === 'mlx_lm') {
+  const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
-    return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
+  const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
-  } else if (backendType === 'llama_cpp') {
+  const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
-    return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
+
-  }
+  return fieldGetter().filter(key => !(key in basicConfig))
  // Default to LlamaCpp for backward compatibility
  return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
 }
 // Combined backend fields config for use in BackendFormField
@@ -156,10 +183,10 @@ export const basicBackendFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
  required?: boolean
 }> = {
  ...basicLlamaCppFieldsConfig,
-  ...basicMlxFieldsConfig
+  ...basicMlxFieldsConfig,
  ...basicVllmFieldsConfig
 }
 // Get field type for any backend option (union type)
@@ -182,6 +209,15 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
    // Schema might not be available
  }
  // Try vLLM schema
  try {
    if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
      return getVllmFieldType(key as keyof VllmBackendOptions)
    }
  } catch {
    // Schema might not be available
  }
  // Default fallback
  return 'text'
 }
--- a/webui/src/schemas/backends/index.ts
+++ b/webui/src/schemas/backends/index.ts
@@ -0,0 +1,4 @@
 // Re-export all backend schemas from one place
 export * from './llamacpp'
 export * from './mlx'
 export * from './vllm'
--- a/webui/src/schemas/backends/llamacpp.ts
+++ b/webui/src/schemas/backends/llamacpp.ts
@@ -0,0 +1,192 @@
 import { z } from 'zod'
 // Define the LlamaCpp backend options schema
 export const LlamaCppBackendOptionsSchema = z.object({
  // Common params
  verbose_prompt: z.boolean().optional(),
  threads: z.number().optional(),
  threads_batch: z.number().optional(),
  cpu_mask: z.string().optional(),
  cpu_range: z.string().optional(),
  cpu_strict: z.number().optional(),
  prio: z.number().optional(),
  poll: z.number().optional(),
  cpu_mask_batch: z.string().optional(),
  cpu_range_batch: z.string().optional(),
  cpu_strict_batch: z.number().optional(),
  prio_batch: z.number().optional(),
  poll_batch: z.number().optional(),
  ctx_size: z.number().optional(),
  predict: z.number().optional(),
  batch_size: z.number().optional(),
  ubatch_size: z.number().optional(),
  keep: z.number().optional(),
  flash_attn: z.boolean().optional(),
  no_perf: z.boolean().optional(),
  escape: z.boolean().optional(),
  no_escape: z.boolean().optional(),
  rope_scaling: z.string().optional(),
  rope_scale: z.number().optional(),
  rope_freq_base: z.number().optional(),
  rope_freq_scale: z.number().optional(),
  yarn_orig_ctx: z.number().optional(),
  yarn_ext_factor: z.number().optional(),
  yarn_attn_factor: z.number().optional(),
  yarn_beta_slow: z.number().optional(),
  yarn_beta_fast: z.number().optional(),
  dump_kv_cache: z.boolean().optional(),
  no_kv_offload: z.boolean().optional(),
  cache_type_k: z.string().optional(),
  cache_type_v: z.string().optional(),
  defrag_thold: z.number().optional(),
  parallel: z.number().optional(),
  mlock: z.boolean().optional(),
  no_mmap: z.boolean().optional(),
  numa: z.string().optional(),
  device: z.string().optional(),
  override_tensor: z.array(z.string()).optional(),
  gpu_layers: z.number().optional(),
  split_mode: z.string().optional(),
  tensor_split: z.string().optional(),
  main_gpu: z.number().optional(),
  check_tensors: z.boolean().optional(),
  override_kv: z.array(z.string()).optional(),
  lora: z.array(z.string()).optional(),
  lora_scaled: z.array(z.string()).optional(),
  control_vector: z.array(z.string()).optional(),
  control_vector_scaled: z.array(z.string()).optional(),
  control_vector_layer_range: z.string().optional(),
  model: z.string().optional(),
  model_url: z.string().optional(),
  hf_repo: z.string().optional(),
  hf_repo_draft: z.string().optional(),
  hf_file: z.string().optional(),
  hf_repo_v: z.string().optional(),
  hf_file_v: z.string().optional(),
  hf_token: z.string().optional(),
  log_disable: z.boolean().optional(),
  log_file: z.string().optional(),
  log_colors: z.boolean().optional(),
  verbose: z.boolean().optional(),
  verbosity: z.number().optional(),
  log_prefix: z.boolean().optional(),
  log_timestamps: z.boolean().optional(),
  // Sampling params
  samplers: z.string().optional(),
  seed: z.number().optional(),
  sampling_seq: z.string().optional(),
  ignore_eos: z.boolean().optional(),
  temp: z.number().optional(),
  top_k: z.number().optional(),
  top_p: z.number().optional(),
  min_p: z.number().optional(),
  xtc_probability: z.number().optional(),
  xtc_threshold: z.number().optional(),
  typical: z.number().optional(),
  repeat_last_n: z.number().optional(),
  repeat_penalty: z.number().optional(),
  presence_penalty: z.number().optional(),
  frequency_penalty: z.number().optional(),
  dry_multiplier: z.number().optional(),
  dry_base: z.number().optional(),
  dry_allowed_length: z.number().optional(),
  dry_penalty_last_n: z.number().optional(),
  dry_sequence_breaker: z.array(z.string()).optional(),
  dynatemp_range: z.number().optional(),
  dynatemp_exp: z.number().optional(),
  mirostat: z.number().optional(),
  mirostat_lr: z.number().optional(),
  mirostat_ent: z.number().optional(),
  logit_bias: z.array(z.string()).optional(),
  grammar: z.string().optional(),
  grammar_file: z.string().optional(),
  json_schema: z.string().optional(),
  json_schema_file: z.string().optional(),
  // Example-specific params
  no_context_shift: z.boolean().optional(),
  special: z.boolean().optional(),
  no_warmup: z.boolean().optional(),
  spm_infill: z.boolean().optional(),
  pooling: z.string().optional(),
  cont_batching: z.boolean().optional(),
  no_cont_batching: z.boolean().optional(),
  mmproj: z.string().optional(),
  mmproj_url: z.string().optional(),
  no_mmproj: z.boolean().optional(),
  no_mmproj_offload: z.boolean().optional(),
  alias: z.string().optional(),
  host: z.string().optional(),
  port: z.number().optional(),
  path: z.string().optional(),
  no_webui: z.boolean().optional(),
  embedding: z.boolean().optional(),
  reranking: z.boolean().optional(),
  api_key: z.string().optional(),
  api_key_file: z.string().optional(),
  ssl_key_file: z.string().optional(),
  ssl_cert_file: z.string().optional(),
  chat_template_kwargs: z.string().optional(),
  timeout: z.number().optional(),
  threads_http: z.number().optional(),
  cache_reuse: z.number().optional(),
  metrics: z.boolean().optional(),
  slots: z.boolean().optional(),
  props: z.boolean().optional(),
  no_slots: z.boolean().optional(),
  slot_save_path: z.string().optional(),
  jinja: z.boolean().optional(),
  reasoning_format: z.string().optional(),
  reasoning_budget: z.number().optional(),
  chat_template: z.string().optional(),
  chat_template_file: z.string().optional(),
  no_prefill_assistant: z.boolean().optional(),
  slot_prompt_similarity: z.number().optional(),
  lora_init_without_apply: z.boolean().optional(),
  draft_max: z.number().optional(),
  draft_min: z.number().optional(),
  draft_p_min: z.number().optional(),
  ctx_size_draft: z.number().optional(),
  device_draft: z.string().optional(),
  gpu_layers_draft: z.number().optional(),
  model_draft: z.string().optional(),
  cache_type_k_draft: z.string().optional(),
  cache_type_v_draft: z.string().optional(),
  // Audio/TTS params
  model_vocoder: z.string().optional(),
  tts_use_guide_tokens: z.boolean().optional(),
  // Default model params
  embd_bge_small_en_default: z.boolean().optional(),
  embd_e5_small_en_default: z.boolean().optional(),
  embd_gte_small_default: z.boolean().optional(),
  fim_qwen_1_5b_default: z.boolean().optional(),
  fim_qwen_3b_default: z.boolean().optional(),
  fim_qwen_7b_default: z.boolean().optional(),
  fim_qwen_7b_spec: z.boolean().optional(),
  fim_qwen_14b_spec: z.boolean().optional(),
 })
 // Infer the TypeScript type from the schema
 export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
 // Helper to get all LlamaCpp backend option field keys
 export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
  return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
 }
 // Get field type for LlamaCpp backend options
 export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
  const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  return 'text' // ZodString and others default to text
 }
--- a/webui/src/schemas/backends/mlx.ts
+++ b/webui/src/schemas/backends/mlx.ts
@@ -0,0 +1,51 @@
 import { z } from 'zod'
 // Define the MLX backend options schema
 export const MlxBackendOptionsSchema = z.object({
  // Basic connection options
  model: z.string().optional(),
  host: z.string().optional(),
  port: z.number().optional(),
  // Model and adapter options
  adapter_path: z.string().optional(),
  draft_model: z.string().optional(),
  num_draft_tokens: z.number().optional(),
  trust_remote_code: z.boolean().optional(),
  // Logging and templates
  log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
  chat_template: z.string().optional(),
  use_default_chat_template: z.boolean().optional(),
  chat_template_args: z.string().optional(), // JSON string
  // Sampling defaults
  temp: z.number().optional(),     // Note: MLX uses "temp" not "temperature"
  top_p: z.number().optional(),
  top_k: z.number().optional(),
  min_p: z.number().optional(),
  max_tokens: z.number().optional(),
 })
 // Infer the TypeScript type from the schema
 export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
 // Helper to get all MLX backend option field keys
 export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
  return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
 }
 // Get field type for MLX backend options
 export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
  const fieldSchema = MlxBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
  return 'text' // ZodString and others default to text
 }
--- a/webui/src/schemas/backends/vllm.ts
+++ b/webui/src/schemas/backends/vllm.ts
@@ -0,0 +1,150 @@
 import { z } from 'zod'
 // Define the vLLM backend options schema
 export const VllmBackendOptionsSchema = z.object({
  // Basic connection options (auto-assigned by llamactl)
  host: z.string().optional(),
  port: z.number().optional(),
  // Model and engine configuration
  model: z.string().optional(),
  tokenizer: z.string().optional(),
  skip_tokenizer_init: z.boolean().optional(),
  revision: z.string().optional(),
  code_revision: z.string().optional(),
  tokenizer_revision: z.string().optional(),
  tokenizer_mode: z.string().optional(),
  trust_remote_code: z.boolean().optional(),
  download_dir: z.string().optional(),
  load_format: z.string().optional(),
  config_format: z.string().optional(),
  dtype: z.string().optional(),
  kv_cache_dtype: z.string().optional(),
  quantization_param_path: z.string().optional(),
  seed: z.number().optional(),
  max_model_len: z.number().optional(),
  guided_decoding_backend: z.string().optional(),
  distributed_executor_backend: z.string().optional(),
  worker_use_ray: z.boolean().optional(),
  ray_workers_use_nsight: z.boolean().optional(),
  // Performance and serving configuration
  block_size: z.number().optional(),
  enable_prefix_caching: z.boolean().optional(),
  disable_sliding_window: z.boolean().optional(),
  use_v2_block_manager: z.boolean().optional(),
  num_lookahead_slots: z.number().optional(),
  swap_space: z.number().optional(),
  cpu_offload_gb: z.number().optional(),
  gpu_memory_utilization: z.number().optional(),
  num_gpu_blocks_override: z.number().optional(),
  max_num_batched_tokens: z.number().optional(),
  max_num_seqs: z.number().optional(),
  max_logprobs: z.number().optional(),
  disable_log_stats: z.boolean().optional(),
  quantization: z.string().optional(),
  rope_scaling: z.string().optional(),
  rope_theta: z.number().optional(),
  enforce_eager: z.boolean().optional(),
  max_context_len_to_capture: z.number().optional(),
  max_seq_len_to_capture: z.number().optional(),
  disable_custom_all_reduce: z.boolean().optional(),
  tokenizer_pool_size: z.number().optional(),
  tokenizer_pool_type: z.string().optional(),
  tokenizer_pool_extra_config: z.string().optional(),
  enable_lora_bias: z.boolean().optional(),
  lora_extra_vocab_size: z.number().optional(),
  lora_rank: z.number().optional(),
  prompt_lookback_distance: z.number().optional(),
  preemption_mode: z.string().optional(),
  // Distributed and parallel processing
  tensor_parallel_size: z.number().optional(),
  pipeline_parallel_size: z.number().optional(),
  max_parallel_loading_workers: z.number().optional(),
  disable_async_output_proc: z.boolean().optional(),
  worker_class: z.string().optional(),
  enabled_lora_modules: z.string().optional(),
  max_lora_rank: z.number().optional(),
  fully_sharded_loras: z.boolean().optional(),
  lora_modules: z.string().optional(),
  prompt_adapters: z.string().optional(),
  max_prompt_adapter_token: z.number().optional(),
  device: z.string().optional(),
  scheduler_delay: z.number().optional(),
  enable_chunked_prefill: z.boolean().optional(),
  speculative_model: z.string().optional(),
  speculative_model_quantization: z.string().optional(),
  speculative_revision: z.string().optional(),
  speculative_max_model_len: z.number().optional(),
  speculative_disable_by_batch_size: z.number().optional(),
  ngpt_speculative_length: z.number().optional(),
  speculative_disable_mqa: z.boolean().optional(),
  model_loader_extra_config: z.string().optional(),
  ignore_patterns: z.string().optional(),
  preloaded_lora_modules: z.string().optional(),
  // OpenAI server specific options
  uds: z.string().optional(),
  uvicorn_log_level: z.string().optional(),
  response_role: z.string().optional(),
  ssl_keyfile: z.string().optional(),
  ssl_certfile: z.string().optional(),
  ssl_ca_certs: z.string().optional(),
  ssl_cert_reqs: z.number().optional(),
  root_path: z.string().optional(),
  middleware: z.array(z.string()).optional(),
  return_tokens_as_token_ids: z.boolean().optional(),
  disable_frontend_multiprocessing: z.boolean().optional(),
  enable_auto_tool_choice: z.boolean().optional(),
  tool_call_parser: z.string().optional(),
  tool_server: z.string().optional(),
  chat_template: z.string().optional(),
  chat_template_content_format: z.string().optional(),
  allow_credentials: z.boolean().optional(),
  allowed_origins: z.array(z.string()).optional(),
  allowed_methods: z.array(z.string()).optional(),
  allowed_headers: z.array(z.string()).optional(),
  api_key: z.array(z.string()).optional(),
  enable_log_outputs: z.boolean().optional(),
  enable_token_usage: z.boolean().optional(),
  enable_async_engine_debug: z.boolean().optional(),
  engine_use_ray: z.boolean().optional(),
  disable_log_requests: z.boolean().optional(),
  max_log_len: z.number().optional(),
  // Additional engine configuration
  task: z.string().optional(),
  multi_modal_config: z.string().optional(),
  limit_mm_per_prompt: z.string().optional(),
  enable_sleep_mode: z.boolean().optional(),
  enable_chunking_request: z.boolean().optional(),
  compilation_config: z.string().optional(),
  disable_sliding_window_mask: z.boolean().optional(),
  enable_trtllm_engine_latency: z.boolean().optional(),
  override_pooling_config: z.string().optional(),
  override_neuron_config: z.string().optional(),
  override_kv_cache_align_size: z.number().optional(),
 })
 // Infer the TypeScript type from the schema
 export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
 // Helper to get all vLLM backend option field keys
 export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
  return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
 }
 // Get field type for vLLM backend options
 export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
  const fieldSchema = VllmBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  return 'text' // ZodString and others default to text
 }
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -1,206 +1,27 @@
 import { BackendType } from '@/types/instance'
 import { z } from 'zod'
-// Define the LlamaCpp backend options schema
+// Import backend schemas from separate files
-export const LlamaCppBackendOptionsSchema = z.object({
+import {
-  // Common params
+  LlamaCppBackendOptionsSchema,
-  verbose_prompt: z.boolean().optional(),
+  type LlamaCppBackendOptions,
-  threads: z.number().optional(),
+  getAllLlamaCppFieldKeys,
-  threads_batch: z.number().optional(),
+  getLlamaCppFieldType,
-  cpu_mask: z.string().optional(),
+  MlxBackendOptionsSchema,
-  cpu_range: z.string().optional(),
+  type MlxBackendOptions,
-  cpu_strict: z.number().optional(),
+  getAllMlxFieldKeys,
-  prio: z.number().optional(),
+  getMlxFieldType,
-  poll: z.number().optional(),
+  VllmBackendOptionsSchema,
-  cpu_mask_batch: z.string().optional(),
+  type VllmBackendOptions,
-  cpu_range_batch: z.string().optional(),
+  getAllVllmFieldKeys,
-  cpu_strict_batch: z.number().optional(),
+  getVllmFieldType
-  prio_batch: z.number().optional(),
+} from './backends'
  poll_batch: z.number().optional(),
  ctx_size: z.number().optional(),
  predict: z.number().optional(),
  batch_size: z.number().optional(),
  ubatch_size: z.number().optional(),
  keep: z.number().optional(),
  flash_attn: z.boolean().optional(),
  no_perf: z.boolean().optional(),
  escape: z.boolean().optional(),
  no_escape: z.boolean().optional(),
  rope_scaling: z.string().optional(),
  rope_scale: z.number().optional(),
  rope_freq_base: z.number().optional(),
  rope_freq_scale: z.number().optional(),
  yarn_orig_ctx: z.number().optional(),
  yarn_ext_factor: z.number().optional(),
  yarn_attn_factor: z.number().optional(),
  yarn_beta_slow: z.number().optional(),
  yarn_beta_fast: z.number().optional(),
  dump_kv_cache: z.boolean().optional(),
  no_kv_offload: z.boolean().optional(),
  cache_type_k: z.string().optional(),
  cache_type_v: z.string().optional(),
  defrag_thold: z.number().optional(),
  parallel: z.number().optional(),
  mlock: z.boolean().optional(),
  no_mmap: z.boolean().optional(),
  numa: z.string().optional(),
  device: z.string().optional(),
  override_tensor: z.array(z.string()).optional(),
  gpu_layers: z.number().optional(),
  split_mode: z.string().optional(),
  tensor_split: z.string().optional(),
  main_gpu: z.number().optional(),
  check_tensors: z.boolean().optional(),
  override_kv: z.array(z.string()).optional(),
  lora: z.array(z.string()).optional(),
  lora_scaled: z.array(z.string()).optional(),
  control_vector: z.array(z.string()).optional(),
  control_vector_scaled: z.array(z.string()).optional(),
  control_vector_layer_range: z.string().optional(),
  model: z.string().optional(),
  model_url: z.string().optional(),
  hf_repo: z.string().optional(),
  hf_repo_draft: z.string().optional(),
  hf_file: z.string().optional(),
  hf_repo_v: z.string().optional(),
  hf_file_v: z.string().optional(),
  hf_token: z.string().optional(),
  log_disable: z.boolean().optional(),
  log_file: z.string().optional(),
  log_colors: z.boolean().optional(),
  verbose: z.boolean().optional(),
  verbosity: z.number().optional(),
  log_prefix: z.boolean().optional(),
  log_timestamps: z.boolean().optional(),
  // Sampling params
  samplers: z.string().optional(),
  seed: z.number().optional(),
  sampling_seq: z.string().optional(),
  ignore_eos: z.boolean().optional(),
  temp: z.number().optional(),
  top_k: z.number().optional(),
  top_p: z.number().optional(),
  min_p: z.number().optional(),
  xtc_probability: z.number().optional(),
  xtc_threshold: z.number().optional(),
  typical: z.number().optional(),
  repeat_last_n: z.number().optional(),
  repeat_penalty: z.number().optional(),
  presence_penalty: z.number().optional(),
  frequency_penalty: z.number().optional(),
  dry_multiplier: z.number().optional(),
  dry_base: z.number().optional(),
  dry_allowed_length: z.number().optional(),
  dry_penalty_last_n: z.number().optional(),
  dry_sequence_breaker: z.array(z.string()).optional(),
  dynatemp_range: z.number().optional(),
  dynatemp_exp: z.number().optional(),
  mirostat: z.number().optional(),
  mirostat_lr: z.number().optional(),
  mirostat_ent: z.number().optional(),
  logit_bias: z.array(z.string()).optional(),
  grammar: z.string().optional(),
  grammar_file: z.string().optional(),
  json_schema: z.string().optional(),
  json_schema_file: z.string().optional(),
  // Example-specific params
  no_context_shift: z.boolean().optional(),
  special: z.boolean().optional(),
  no_warmup: z.boolean().optional(),
  spm_infill: z.boolean().optional(),
  pooling: z.string().optional(),
  cont_batching: z.boolean().optional(),
  no_cont_batching: z.boolean().optional(),
  mmproj: z.string().optional(),
  mmproj_url: z.string().optional(),
  no_mmproj: z.boolean().optional(),
  no_mmproj_offload: z.boolean().optional(),
  alias: z.string().optional(),
  host: z.string().optional(),
  port: z.number().optional(),
  path: z.string().optional(),
  no_webui: z.boolean().optional(),
  embedding: z.boolean().optional(),
  reranking: z.boolean().optional(),
  api_key: z.string().optional(),
  api_key_file: z.string().optional(),
  ssl_key_file: z.string().optional(),
  ssl_cert_file: z.string().optional(),
  chat_template_kwargs: z.string().optional(),
  timeout: z.number().optional(),
  threads_http: z.number().optional(),
  cache_reuse: z.number().optional(),
  metrics: z.boolean().optional(),
  slots: z.boolean().optional(),
  props: z.boolean().optional(),
  no_slots: z.boolean().optional(),
  slot_save_path: z.string().optional(),
  jinja: z.boolean().optional(),
  reasoning_format: z.string().optional(),
  reasoning_budget: z.number().optional(),
  chat_template: z.string().optional(),
  chat_template_file: z.string().optional(),
  no_prefill_assistant: z.boolean().optional(),
  slot_prompt_similarity: z.number().optional(),
  lora_init_without_apply: z.boolean().optional(),
  draft_max: z.number().optional(),
  draft_min: z.number().optional(),
  draft_p_min: z.number().optional(),
  ctx_size_draft: z.number().optional(),
  device_draft: z.string().optional(),
  gpu_layers_draft: z.number().optional(),
  model_draft: z.string().optional(),
  cache_type_k_draft: z.string().optional(),
  cache_type_v_draft: z.string().optional(),
  // Audio/TTS params
  model_vocoder: z.string().optional(),
  tts_use_guide_tokens: z.boolean().optional(),
  // Default model params
  embd_bge_small_en_default: z.boolean().optional(),
  embd_e5_small_en_default: z.boolean().optional(),
  embd_gte_small_default: z.boolean().optional(),
  fim_qwen_1_5b_default: z.boolean().optional(),
  fim_qwen_3b_default: z.boolean().optional(),
  fim_qwen_7b_default: z.boolean().optional(),
  fim_qwen_7b_spec: z.boolean().optional(),
  fim_qwen_14b_spec: z.boolean().optional(),
 })
 // Define the MLX backend options schema
 export const MlxBackendOptionsSchema = z.object({
  // Basic connection options
  model: z.string().optional(),
  host: z.string().optional(),
  port: z.number().optional(),
  // Model and adapter options
  adapter_path: z.string().optional(),
  draft_model: z.string().optional(),
  num_draft_tokens: z.number().optional(),
  trust_remote_code: z.boolean().optional(),
  // Logging and templates
  log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
  chat_template: z.string().optional(),
  use_default_chat_template: z.boolean().optional(),
  chat_template_args: z.string().optional(), // JSON string
  // Sampling defaults
  temp: z.number().optional(),     // Note: MLX uses "temp" not "temperature"
  top_p: z.number().optional(),
  top_k: z.number().optional(),
  min_p: z.number().optional(),
  max_tokens: z.number().optional(),
 })
 // Backend options union
 export const BackendOptionsSchema = z.union([
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
  VllmBackendOptionsSchema,
 ])
 // Define the main create instance options schema
@@ -213,13 +34,27 @@ export const CreateInstanceOptionsSchema = z.object({
  on_demand_start: z.boolean().optional(),
  // Backend configuration
-  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
+  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
  backend_options: BackendOptionsSchema.optional(),
 })
 // Re-export types and schemas from backend files
 export {
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
  VllmBackendOptionsSchema,
  type LlamaCppBackendOptions,
  type MlxBackendOptions,
  type VllmBackendOptions,
  getAllLlamaCppFieldKeys,
  getAllMlxFieldKeys,
  getAllVllmFieldKeys,
  getLlamaCppFieldType,
  getMlxFieldType,
  getVllmFieldType
 }
 // Infer the TypeScript types from the schemas
 export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
 export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
 export type BackendOptions = z.infer<typeof BackendOptionsSchema>
 export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
@@ -228,16 +63,6 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
  return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
 }
 // Helper to get all LlamaCpp backend option field keys
 export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
  return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
 }
 // Helper to get all MLX backend option field keys
 export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
  return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
 }
 // Get field type from Zod schema
 export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
  const fieldSchema = CreateInstanceOptionsSchema.shape[key]
@@ -252,32 +77,3 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
  if (innerSchema instanceof z.ZodObject) return 'object'
  return 'text' // ZodString and others default to text
 }
 // Get field type for LlamaCpp backend options
 export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
  const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  return 'text' // ZodString and others default to text
 }
 // Get field type for MLX backend options
 export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
  const fieldSchema = MlxBackendOptionsSchema.shape[key]
  if (!fieldSchema) return 'text'
  // Handle ZodOptional wrapper
  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
  if (innerSchema instanceof z.ZodNumber) return 'number'
  if (innerSchema instanceof z.ZodArray) return 'array'
  if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
  return 'text' // ZodString and others default to text
 }
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -5,6 +5,7 @@ export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
 export const BackendType = {
  LLAMA_CPP: 'llama_cpp',
  MLX_LM: 'mlx_lm',
  VLLM: 'vllm',
  // MLX_VLM: 'mlx_vlm',  // Future expansion
 } as const