Merge pull request #34 from lordmathis/feat/vllm-backend

feat: Implement vLLM backend
2025-11-06 00:54:23 +00:00 · 2025-09-22 21:58:19 +02:00
parent 02fdae24ee 48b3a39dfe
commit ebc82c37aa
53 changed files with 3078 additions and 2968 deletions
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@

 ### 🔗 Universal Compatibility
 - **OpenAI API Compatible**: Drop-in replacement - route requests by model name
- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
+- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM

 ### 🌐 User-Friendly Interface
 - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -31,6 +31,7 @@
 # 1. Install backend (one-time setup)
 # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
 # For MLX on macOS: pip install mlx-lm
+# For vLLM: pip install vllm

 # 2. Download and run llamactl
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -47,7 +48,7 @@ llamactl
 ### Create and manage instances via web dashboard:
 1. Open http://localhost:8080
 2. Click "Create Instance"
-3. Choose backend type (llama.cpp or MLX)
+3. Choose backend type (llama.cpp, MLX, or vLLM)
 4. Set model path and backend-specific options
 5. Start or stop the instance

@@ -63,6 +64,11 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'

+# Create vLLM instance
+curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
+  -H "Authorization: Bearer your-key" \
+  -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
+
 # Use with OpenAI SDK
 curl -X POST localhost:8080/v1/chat/completions \
  -H "Authorization: Bearer your-key" \
@@ -121,6 +127,21 @@ source mlx-env/bin/activate
 pip install mlx-lm
 ```

+**For vLLM backend:**
+You need vLLM installed:
+
+```bash
+# Install via pip (requires Python 3.8+, GPU required)
+pip install vllm
+
+# Or in a virtual environment (recommended)
+python -m venv vllm-env
+source vllm-env/bin/activate
+pip install vllm
+
+# For production deployments, consider container-based installation
+```
+
 ## Configuration

 llamactl works out of the box with sensible defaults.
@@ -135,6 +156,7 @@ server:
 backends:
  llama_executable: llama-server # Path to llama-server executable
  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
+  vllm_executable: vllm # Path to vllm executable

 instances:
  port_range: [8000, 9000]       # Port range for instances
--- a/apidocs/docs.go
+++ b/apidocs/docs.go
@@ -19,6 +19,159 @@ const docTemplate = `{
    "host": "{{.Host}}",
    "basePath": "{{.BasePath}}",
    "paths": {
+        "/backends/llama-cpp/parse-command": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Parses a llama-server command string into instance options",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "backends"
+                ],
+                "summary": "Parse llama-server command",
+                "parameters": [
+                    {
+                        "description": "Command to parse",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/server.ParseCommandRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Parsed options",
+                        "schema": {
+                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request or command",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/backends/mlx/parse-command": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Parses MLX-LM server command string into instance options",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "backends"
+                ],
+                "summary": "Parse mlx_lm.server command",
+                "parameters": [
+                    {
+                        "description": "Command to parse",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/server.ParseCommandRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Parsed options",
+                        "schema": {
+                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request or command",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/backends/vllm/parse-command": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Parses a vLLM serve command string into instance options",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "backends"
+                ],
+                "summary": "Parse vllm serve command",
+                "parameters": [
+                    {
+                        "description": "Command to parse",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/server.ParseCommandRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Parsed options",
+                        "schema": {
+                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request or command",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
        "/instances": {
            "get": {
                "security": [
@@ -681,522 +834,46 @@ const docTemplate = `{
        }
    },
    "definitions": {
+        "backends.BackendType": {
+            "type": "string",
+            "enum": [
+                "llama_cpp",
+                "mlx_lm",
+                "vllm"
+            ],
+            "x-enum-varnames": [
+                "BackendTypeLlamaCpp",
+                "BackendTypeMlxLm",
+                "BackendTypeVllm"
+            ]
+        },
        "instance.CreateInstanceOptions": {
            "type": "object",
            "properties": {
-                "alias": {
-                    "type": "string"
-                },
-                "api_key": {
-                    "type": "string"
-                },
-                "api_key_file": {
-                    "type": "string"
-                },
                "auto_restart": {
                    "description": "Auto restart",
                    "type": "boolean"
                },
-                "batch_size": {
-                    "type": "integer"
+                "backend_options": {
+                    "type": "object",
+                    "additionalProperties": {}
                },
-                "cache_reuse": {
-                    "type": "integer"
-                },
-                "cache_type_k": {
-                    "type": "string"
-                },
-                "cache_type_k_draft": {
-                    "type": "string"
-                },
-                "cache_type_v": {
-                    "type": "string"
-                },
-                "cache_type_v_draft": {
-                    "type": "string"
-                },
-                "chat_template": {
-                    "type": "string"
-                },
-                "chat_template_file": {
-                    "type": "string"
-                },
-                "chat_template_kwargs": {
-                    "type": "string"
-                },
-                "check_tensors": {
-                    "type": "boolean"
-                },
-                "cont_batching": {
-                    "type": "boolean"
-                },
-                "control_vector": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "control_vector_layer_range": {
-                    "type": "string"
-                },
-                "control_vector_scaled": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "cpu_mask": {
-                    "type": "string"
-                },
-                "cpu_mask_batch": {
-                    "type": "string"
-                },
-                "cpu_range": {
-                    "type": "string"
-                },
-                "cpu_range_batch": {
-                    "type": "string"
-                },
-                "cpu_strict": {
-                    "type": "integer"
-                },
-                "cpu_strict_batch": {
-                    "type": "integer"
-                },
-                "ctx_size": {
-                    "type": "integer"
-                },
-                "ctx_size_draft": {
-                    "type": "integer"
-                },
-                "defrag_thold": {
-                    "type": "number"
-                },
-                "device": {
-                    "type": "string"
-                },
-                "device_draft": {
-                    "type": "string"
-                },
-                "draft_max": {
-                    "type": "integer"
-                },
-                "draft_min": {
-                    "type": "integer"
-                },
-                "draft_p_min": {
-                    "type": "number"
-                },
-                "dry_allowed_length": {
-                    "type": "integer"
-                },
-                "dry_base": {
-                    "type": "number"
-                },
-                "dry_multiplier": {
-                    "type": "number"
-                },
-                "dry_penalty_last_n": {
-                    "type": "integer"
-                },
-                "dry_sequence_breaker": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "dump_kv_cache": {
-                    "type": "boolean"
-                },
-                "dynatemp_exp": {
-                    "type": "number"
-                },
-                "dynatemp_range": {
-                    "type": "number"
-                },
-                "embd_bge_small_en_default": {
-                    "description": "Default model params",
-                    "type": "boolean"
-                },
-                "embd_e5_small_en_default": {
-                    "type": "boolean"
-                },
-                "embd_gte_small_default": {
-                    "type": "boolean"
-                },
-                "embedding": {
-                    "type": "boolean"
-                },
-                "escape": {
-                    "type": "boolean"
-                },
-                "fim_qwen_14b_spec": {
-                    "type": "boolean"
-                },
-                "fim_qwen_1_5b_default": {
-                    "type": "boolean"
-                },
-                "fim_qwen_3b_default": {
-                    "type": "boolean"
-                },
-                "fim_qwen_7b_default": {
-                    "type": "boolean"
-                },
-                "fim_qwen_7b_spec": {
-                    "type": "boolean"
-                },
-                "flash_attn": {
-                    "type": "boolean"
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "gpu_layers": {
-                    "type": "integer"
-                },
-                "gpu_layers_draft": {
-                    "type": "integer"
-                },
-                "grammar": {
-                    "type": "string"
-                },
-                "grammar_file": {
-                    "type": "string"
-                },
-                "hf_file": {
-                    "type": "string"
-                },
-                "hf_file_v": {
-                    "type": "string"
-                },
-                "hf_repo": {
-                    "type": "string"
-                },
-                "hf_repo_draft": {
-                    "type": "string"
-                },
-                "hf_repo_v": {
-                    "type": "string"
-                },
-                "hf_token": {
-                    "type": "string"
-                },
-                "host": {
-                    "type": "string"
+                "backend_type": {
+                    "$ref": "#/definitions/backends.BackendType"
                },
                "idle_timeout": {
                    "description": "Idle timeout",
                    "type": "integer"
                },
-                "ignore_eos": {
-                    "type": "boolean"
-                },
-                "jinja": {
-                    "type": "boolean"
-                },
-                "json_schema": {
-                    "type": "string"
-                },
-                "json_schema_file": {
-                    "type": "string"
-                },
-                "keep": {
-                    "type": "integer"
-                },
-                "log_colors": {
-                    "type": "boolean"
-                },
-                "log_disable": {
-                    "type": "boolean"
-                },
-                "log_file": {
-                    "type": "string"
-                },
-                "log_prefix": {
-                    "type": "boolean"
-                },
-                "log_timestamps": {
-                    "type": "boolean"
-                },
-                "logit_bias": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "lora": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "lora_init_without_apply": {
-                    "type": "boolean"
-                },
-                "lora_scaled": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "main_gpu": {
-                    "type": "integer"
-                },
                "max_restarts": {
                    "type": "integer"
                },
-                "metrics": {
-                    "type": "boolean"
-                },
-                "min_p": {
-                    "type": "number"
-                },
-                "mirostat": {
-                    "type": "integer"
-                },
-                "mirostat_ent": {
-                    "type": "number"
-                },
-                "mirostat_lr": {
-                    "type": "number"
-                },
-                "mlock": {
-                    "type": "boolean"
-                },
-                "mmproj": {
-                    "type": "string"
-                },
-                "mmproj_url": {
-                    "type": "string"
-                },
-                "model": {
-                    "type": "string"
-                },
-                "model_draft": {
-                    "type": "string"
-                },
-                "model_url": {
-                    "type": "string"
-                },
-                "model_vocoder": {
-                    "description": "Audio/TTS params",
-                    "type": "string"
-                },
-                "no_cont_batching": {
-                    "type": "boolean"
-                },
-                "no_context_shift": {
-                    "description": "Example-specific params",
-                    "type": "boolean"
-                },
-                "no_escape": {
-                    "type": "boolean"
-                },
-                "no_kv_offload": {
-                    "type": "boolean"
-                },
-                "no_mmap": {
-                    "type": "boolean"
-                },
-                "no_mmproj": {
-                    "type": "boolean"
-                },
-                "no_mmproj_offload": {
-                    "type": "boolean"
-                },
-                "no_perf": {
-                    "type": "boolean"
-                },
-                "no_prefill_assistant": {
-                    "type": "boolean"
-                },
-                "no_slots": {
-                    "type": "boolean"
-                },
-                "no_warmup": {
-                    "type": "boolean"
-                },
-                "no_webui": {
-                    "type": "boolean"
-                },
-                "numa": {
-                    "type": "string"
-                },
                "on_demand_start": {
                    "description": "On demand start",
                    "type": "boolean"
                },
-                "override_kv": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "override_tensor": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "parallel": {
-                    "type": "integer"
-                },
-                "path": {
-                    "type": "string"
-                },
-                "poll": {
-                    "type": "integer"
-                },
-                "poll_batch": {
-                    "type": "integer"
-                },
-                "pooling": {
-                    "type": "string"
-                },
-                "port": {
-                    "type": "integer"
-                },
-                "predict": {
-                    "type": "integer"
-                },
-                "presence_penalty": {
-                    "type": "number"
-                },
-                "prio": {
-                    "type": "integer"
-                },
-                "prio_batch": {
-                    "type": "integer"
-                },
-                "props": {
-                    "type": "boolean"
-                },
-                "reasoning_budget": {
-                    "type": "integer"
-                },
-                "reasoning_format": {
-                    "type": "string"
-                },
-                "repeat_last_n": {
-                    "type": "integer"
-                },
-                "repeat_penalty": {
-                    "type": "number"
-                },
-                "reranking": {
-                    "type": "boolean"
-                },
                "restart_delay": {
-                    "type": "integer"
-                },
-                "rope_freq_base": {
-                    "type": "number"
-                },
-                "rope_freq_scale": {
-                    "type": "number"
-                },
-                "rope_scale": {
-                    "type": "number"
-                },
-                "rope_scaling": {
-                    "type": "string"
-                },
-                "samplers": {
-                    "description": "Sampling params",
-                    "type": "string"
-                },
-                "sampling_seq": {
-                    "type": "string"
-                },
-                "seed": {
-                    "type": "integer"
-                },
-                "slot_prompt_similarity": {
-                    "type": "number"
-                },
-                "slot_save_path": {
-                    "type": "string"
-                },
-                "slots": {
-                    "type": "boolean"
-                },
-                "special": {
-                    "type": "boolean"
-                },
-                "split_mode": {
-                    "type": "string"
-                },
-                "spm_infill": {
-                    "type": "boolean"
-                },
-                "ssl_cert_file": {
-                    "type": "string"
-                },
-                "ssl_key_file": {
-                    "type": "string"
-                },
-                "temp": {
-                    "type": "number"
-                },
-                "tensor_split": {
-                    "type": "string"
-                },
-                "threads": {
-                    "type": "integer"
-                },
-                "threads_batch": {
-                    "type": "integer"
-                },
-                "threads_http": {
-                    "type": "integer"
-                },
-                "timeout": {
-                    "type": "integer"
-                },
-                "top_k": {
-                    "type": "integer"
-                },
-                "top_p": {
-                    "type": "number"
-                },
-                "tts_use_guide_tokens": {
-                    "type": "boolean"
-                },
-                "typical": {
-                    "type": "number"
-                },
-                "ubatch_size": {
-                    "type": "integer"
-                },
-                "verbose": {
-                    "type": "boolean"
-                },
-                "verbose_prompt": {
-                    "description": "Common params",
-                    "type": "boolean"
-                },
-                "verbosity": {
-                    "type": "integer"
-                },
-                "xtc_probability": {
-                    "type": "number"
-                },
-                "xtc_threshold": {
-                    "type": "number"
-                },
-                "yarn_attn_factor": {
-                    "type": "number"
-                },
-                "yarn_beta_fast": {
-                    "type": "number"
-                },
-                "yarn_beta_slow": {
-                    "type": "number"
-                },
-                "yarn_ext_factor": {
-                    "type": "number"
-                },
-                "yarn_orig_ctx": {
+                    "description": "seconds",
                    "type": "integer"
                }
            }
@@ -1264,6 +941,14 @@ const docTemplate = `{
                    "type": "string"
                }
            }
+        },
+        "server.ParseCommandRequest": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "string"
+                }
+            }
        }
    }
 }`
--- a/apidocs/swagger.json
+++ b/apidocs/swagger.json
@@ -12,6 +12,159 @@
    },
    "basePath": "/api/v1",
    "paths": {
+        "/backends/llama-cpp/parse-command": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Parses a llama-server command string into instance options",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "backends"
+                ],
+                "summary": "Parse llama-server command",
+                "parameters": [
+                    {
+                        "description": "Command to parse",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/server.ParseCommandRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Parsed options",
+                        "schema": {
+                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request or command",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/backends/mlx/parse-command": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Parses MLX-LM server command string into instance options",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "backends"
+                ],
+                "summary": "Parse mlx_lm.server command",
+                "parameters": [
+                    {
+                        "description": "Command to parse",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/server.ParseCommandRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Parsed options",
+                        "schema": {
+                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request or command",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/backends/vllm/parse-command": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Parses a vLLM serve command string into instance options",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "backends"
+                ],
+                "summary": "Parse vllm serve command",
+                "parameters": [
+                    {
+                        "description": "Command to parse",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/server.ParseCommandRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Parsed options",
+                        "schema": {
+                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request or command",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
        "/instances": {
            "get": {
                "security": [
@@ -674,522 +827,46 @@
        }
    },
    "definitions": {
+        "backends.BackendType": {
+            "type": "string",
+            "enum": [
+                "llama_cpp",
+                "mlx_lm",
+                "vllm"
+            ],
+            "x-enum-varnames": [
+                "BackendTypeLlamaCpp",
+                "BackendTypeMlxLm",
+                "BackendTypeVllm"
+            ]
+        },
        "instance.CreateInstanceOptions": {
            "type": "object",
            "properties": {
-                "alias": {
-                    "type": "string"
-                },
-                "api_key": {
-                    "type": "string"
-                },
-                "api_key_file": {
-                    "type": "string"
-                },
                "auto_restart": {
                    "description": "Auto restart",
                    "type": "boolean"
                },
-                "batch_size": {
-                    "type": "integer"
+                "backend_options": {
+                    "type": "object",
+                    "additionalProperties": {}
                },
-                "cache_reuse": {
-                    "type": "integer"
-                },
-                "cache_type_k": {
-                    "type": "string"
-                },
-                "cache_type_k_draft": {
-                    "type": "string"
-                },
-                "cache_type_v": {
-                    "type": "string"
-                },
-                "cache_type_v_draft": {
-                    "type": "string"
-                },
-                "chat_template": {
-                    "type": "string"
-                },
-                "chat_template_file": {
-                    "type": "string"
-                },
-                "chat_template_kwargs": {
-                    "type": "string"
-                },
-                "check_tensors": {
-                    "type": "boolean"
-                },
-                "cont_batching": {
-                    "type": "boolean"
-                },
-                "control_vector": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "control_vector_layer_range": {
-                    "type": "string"
-                },
-                "control_vector_scaled": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "cpu_mask": {
-                    "type": "string"
-                },
-                "cpu_mask_batch": {
-                    "type": "string"
-                },
-                "cpu_range": {
-                    "type": "string"
-                },
-                "cpu_range_batch": {
-                    "type": "string"
-                },
-                "cpu_strict": {
-                    "type": "integer"
-                },
-                "cpu_strict_batch": {
-                    "type": "integer"
-                },
-                "ctx_size": {
-                    "type": "integer"
-                },
-                "ctx_size_draft": {
-                    "type": "integer"
-                },
-                "defrag_thold": {
-                    "type": "number"
-                },
-                "device": {
-                    "type": "string"
-                },
-                "device_draft": {
-                    "type": "string"
-                },
-                "draft_max": {
-                    "type": "integer"
-                },
-                "draft_min": {
-                    "type": "integer"
-                },
-                "draft_p_min": {
-                    "type": "number"
-                },
-                "dry_allowed_length": {
-                    "type": "integer"
-                },
-                "dry_base": {
-                    "type": "number"
-                },
-                "dry_multiplier": {
-                    "type": "number"
-                },
-                "dry_penalty_last_n": {
-                    "type": "integer"
-                },
-                "dry_sequence_breaker": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "dump_kv_cache": {
-                    "type": "boolean"
-                },
-                "dynatemp_exp": {
-                    "type": "number"
-                },
-                "dynatemp_range": {
-                    "type": "number"
-                },
-                "embd_bge_small_en_default": {
-                    "description": "Default model params",
-                    "type": "boolean"
-                },
-                "embd_e5_small_en_default": {
-                    "type": "boolean"
-                },
-                "embd_gte_small_default": {
-                    "type": "boolean"
-                },
-                "embedding": {
-                    "type": "boolean"
-                },
-                "escape": {
-                    "type": "boolean"
-                },
-                "fim_qwen_14b_spec": {
-                    "type": "boolean"
-                },
-                "fim_qwen_1_5b_default": {
-                    "type": "boolean"
-                },
-                "fim_qwen_3b_default": {
-                    "type": "boolean"
-                },
-                "fim_qwen_7b_default": {
-                    "type": "boolean"
-                },
-                "fim_qwen_7b_spec": {
-                    "type": "boolean"
-                },
-                "flash_attn": {
-                    "type": "boolean"
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "gpu_layers": {
-                    "type": "integer"
-                },
-                "gpu_layers_draft": {
-                    "type": "integer"
-                },
-                "grammar": {
-                    "type": "string"
-                },
-                "grammar_file": {
-                    "type": "string"
-                },
-                "hf_file": {
-                    "type": "string"
-                },
-                "hf_file_v": {
-                    "type": "string"
-                },
-                "hf_repo": {
-                    "type": "string"
-                },
-                "hf_repo_draft": {
-                    "type": "string"
-                },
-                "hf_repo_v": {
-                    "type": "string"
-                },
-                "hf_token": {
-                    "type": "string"
-                },
-                "host": {
-                    "type": "string"
+                "backend_type": {
+                    "$ref": "#/definitions/backends.BackendType"
                },
                "idle_timeout": {
                    "description": "Idle timeout",
                    "type": "integer"
                },
-                "ignore_eos": {
-                    "type": "boolean"
-                },
-                "jinja": {
-                    "type": "boolean"
-                },
-                "json_schema": {
-                    "type": "string"
-                },
-                "json_schema_file": {
-                    "type": "string"
-                },
-                "keep": {
-                    "type": "integer"
-                },
-                "log_colors": {
-                    "type": "boolean"
-                },
-                "log_disable": {
-                    "type": "boolean"
-                },
-                "log_file": {
-                    "type": "string"
-                },
-                "log_prefix": {
-                    "type": "boolean"
-                },
-                "log_timestamps": {
-                    "type": "boolean"
-                },
-                "logit_bias": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "lora": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "lora_init_without_apply": {
-                    "type": "boolean"
-                },
-                "lora_scaled": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "main_gpu": {
-                    "type": "integer"
-                },
                "max_restarts": {
                    "type": "integer"
                },
-                "metrics": {
-                    "type": "boolean"
-                },
-                "min_p": {
-                    "type": "number"
-                },
-                "mirostat": {
-                    "type": "integer"
-                },
-                "mirostat_ent": {
-                    "type": "number"
-                },
-                "mirostat_lr": {
-                    "type": "number"
-                },
-                "mlock": {
-                    "type": "boolean"
-                },
-                "mmproj": {
-                    "type": "string"
-                },
-                "mmproj_url": {
-                    "type": "string"
-                },
-                "model": {
-                    "type": "string"
-                },
-                "model_draft": {
-                    "type": "string"
-                },
-                "model_url": {
-                    "type": "string"
-                },
-                "model_vocoder": {
-                    "description": "Audio/TTS params",
-                    "type": "string"
-                },
-                "no_cont_batching": {
-                    "type": "boolean"
-                },
-                "no_context_shift": {
-                    "description": "Example-specific params",
-                    "type": "boolean"
-                },
-                "no_escape": {
-                    "type": "boolean"
-                },
-                "no_kv_offload": {
-                    "type": "boolean"
-                },
-                "no_mmap": {
-                    "type": "boolean"
-                },
-                "no_mmproj": {
-                    "type": "boolean"
-                },
-                "no_mmproj_offload": {
-                    "type": "boolean"
-                },
-                "no_perf": {
-                    "type": "boolean"
-                },
-                "no_prefill_assistant": {
-                    "type": "boolean"
-                },
-                "no_slots": {
-                    "type": "boolean"
-                },
-                "no_warmup": {
-                    "type": "boolean"
-                },
-                "no_webui": {
-                    "type": "boolean"
-                },
-                "numa": {
-                    "type": "string"
-                },
                "on_demand_start": {
                    "description": "On demand start",
                    "type": "boolean"
                },
-                "override_kv": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "override_tensor": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "parallel": {
-                    "type": "integer"
-                },
-                "path": {
-                    "type": "string"
-                },
-                "poll": {
-                    "type": "integer"
-                },
-                "poll_batch": {
-                    "type": "integer"
-                },
-                "pooling": {
-                    "type": "string"
-                },
-                "port": {
-                    "type": "integer"
-                },
-                "predict": {
-                    "type": "integer"
-                },
-                "presence_penalty": {
-                    "type": "number"
-                },
-                "prio": {
-                    "type": "integer"
-                },
-                "prio_batch": {
-                    "type": "integer"
-                },
-                "props": {
-                    "type": "boolean"
-                },
-                "reasoning_budget": {
-                    "type": "integer"
-                },
-                "reasoning_format": {
-                    "type": "string"
-                },
-                "repeat_last_n": {
-                    "type": "integer"
-                },
-                "repeat_penalty": {
-                    "type": "number"
-                },
-                "reranking": {
-                    "type": "boolean"
-                },
                "restart_delay": {
-                    "type": "integer"
-                },
-                "rope_freq_base": {
-                    "type": "number"
-                },
-                "rope_freq_scale": {
-                    "type": "number"
-                },
-                "rope_scale": {
-                    "type": "number"
-                },
-                "rope_scaling": {
-                    "type": "string"
-                },
-                "samplers": {
-                    "description": "Sampling params",
-                    "type": "string"
-                },
-                "sampling_seq": {
-                    "type": "string"
-                },
-                "seed": {
-                    "type": "integer"
-                },
-                "slot_prompt_similarity": {
-                    "type": "number"
-                },
-                "slot_save_path": {
-                    "type": "string"
-                },
-                "slots": {
-                    "type": "boolean"
-                },
-                "special": {
-                    "type": "boolean"
-                },
-                "split_mode": {
-                    "type": "string"
-                },
-                "spm_infill": {
-                    "type": "boolean"
-                },
-                "ssl_cert_file": {
-                    "type": "string"
-                },
-                "ssl_key_file": {
-                    "type": "string"
-                },
-                "temp": {
-                    "type": "number"
-                },
-                "tensor_split": {
-                    "type": "string"
-                },
-                "threads": {
-                    "type": "integer"
-                },
-                "threads_batch": {
-                    "type": "integer"
-                },
-                "threads_http": {
-                    "type": "integer"
-                },
-                "timeout": {
-                    "type": "integer"
-                },
-                "top_k": {
-                    "type": "integer"
-                },
-                "top_p": {
-                    "type": "number"
-                },
-                "tts_use_guide_tokens": {
-                    "type": "boolean"
-                },
-                "typical": {
-                    "type": "number"
-                },
-                "ubatch_size": {
-                    "type": "integer"
-                },
-                "verbose": {
-                    "type": "boolean"
-                },
-                "verbose_prompt": {
-                    "description": "Common params",
-                    "type": "boolean"
-                },
-                "verbosity": {
-                    "type": "integer"
-                },
-                "xtc_probability": {
-                    "type": "number"
-                },
-                "xtc_threshold": {
-                    "type": "number"
-                },
-                "yarn_attn_factor": {
-                    "type": "number"
-                },
-                "yarn_beta_fast": {
-                    "type": "number"
-                },
-                "yarn_beta_slow": {
-                    "type": "number"
-                },
-                "yarn_ext_factor": {
-                    "type": "number"
-                },
-                "yarn_orig_ctx": {
+                    "description": "seconds",
                    "type": "integer"
                }
            }
@@ -1257,6 +934,14 @@
                    "type": "string"
                }
            }
+        },
+        "server.ParseCommandRequest": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "string"
+                }
+            }
        }
    }
 }
--- a/apidocs/swagger.yaml
+++ b/apidocs/swagger.yaml
@@ -1,352 +1,35 @@
 basePath: /api/v1
 definitions:
+  backends.BackendType:
+    enum:
+    - llama_cpp
+    - mlx_lm
+    - vllm
+    type: string
+    x-enum-varnames:
+    - BackendTypeLlamaCpp
+    - BackendTypeMlxLm
+    - BackendTypeVllm
  instance.CreateInstanceOptions:
    properties:
-      alias:
-        type: string
-      api_key:
-        type: string
-      api_key_file:
-        type: string
      auto_restart:
        description: Auto restart
        type: boolean
-      batch_size:
-        type: integer
-      cache_reuse:
-        type: integer
-      cache_type_k:
-        type: string
-      cache_type_k_draft:
-        type: string
-      cache_type_v:
-        type: string
-      cache_type_v_draft:
-        type: string
-      chat_template:
-        type: string
-      chat_template_file:
-        type: string
-      chat_template_kwargs:
-        type: string
-      check_tensors:
-        type: boolean
-      cont_batching:
-        type: boolean
-      control_vector:
-        items:
-          type: string
-        type: array
-      control_vector_layer_range:
-        type: string
-      control_vector_scaled:
-        items:
-          type: string
-        type: array
-      cpu_mask:
-        type: string
-      cpu_mask_batch:
-        type: string
-      cpu_range:
-        type: string
-      cpu_range_batch:
-        type: string
-      cpu_strict:
-        type: integer
-      cpu_strict_batch:
-        type: integer
-      ctx_size:
-        type: integer
-      ctx_size_draft:
-        type: integer
-      defrag_thold:
-        type: number
-      device:
-        type: string
-      device_draft:
-        type: string
-      draft_max:
-        type: integer
-      draft_min:
-        type: integer
-      draft_p_min:
-        type: number
-      dry_allowed_length:
-        type: integer
-      dry_base:
-        type: number
-      dry_multiplier:
-        type: number
-      dry_penalty_last_n:
-        type: integer
-      dry_sequence_breaker:
-        items:
-          type: string
-        type: array
-      dump_kv_cache:
-        type: boolean
-      dynatemp_exp:
-        type: number
-      dynatemp_range:
-        type: number
-      embd_bge_small_en_default:
-        description: Default model params
-        type: boolean
-      embd_e5_small_en_default:
-        type: boolean
-      embd_gte_small_default:
-        type: boolean
-      embedding:
-        type: boolean
-      escape:
-        type: boolean
-      fim_qwen_1_5b_default:
-        type: boolean
-      fim_qwen_3b_default:
-        type: boolean
-      fim_qwen_7b_default:
-        type: boolean
-      fim_qwen_7b_spec:
-        type: boolean
-      fim_qwen_14b_spec:
-        type: boolean
-      flash_attn:
-        type: boolean
-      frequency_penalty:
-        type: number
-      gpu_layers:
-        type: integer
-      gpu_layers_draft:
-        type: integer
-      grammar:
-        type: string
-      grammar_file:
-        type: string
-      hf_file:
-        type: string
-      hf_file_v:
-        type: string
-      hf_repo:
-        type: string
-      hf_repo_draft:
-        type: string
-      hf_repo_v:
-        type: string
-      hf_token:
-        type: string
-      host:
-        type: string
+      backend_options:
+        additionalProperties: {}
+        type: object
+      backend_type:
+        $ref: '#/definitions/backends.BackendType'
      idle_timeout:
        description: Idle timeout
        type: integer
-      ignore_eos:
-        type: boolean
-      jinja:
-        type: boolean
-      json_schema:
-        type: string
-      json_schema_file:
-        type: string
-      keep:
-        type: integer
-      log_colors:
-        type: boolean
-      log_disable:
-        type: boolean
-      log_file:
-        type: string
-      log_prefix:
-        type: boolean
-      log_timestamps:
-        type: boolean
-      logit_bias:
-        items:
-          type: string
-        type: array
-      lora:
-        items:
-          type: string
-        type: array
-      lora_init_without_apply:
-        type: boolean
-      lora_scaled:
-        items:
-          type: string
-        type: array
-      main_gpu:
-        type: integer
      max_restarts:
        type: integer
-      metrics:
-        type: boolean
-      min_p:
-        type: number
-      mirostat:
-        type: integer
-      mirostat_ent:
-        type: number
-      mirostat_lr:
-        type: number
-      mlock:
-        type: boolean
-      mmproj:
-        type: string
-      mmproj_url:
-        type: string
-      model:
-        type: string
-      model_draft:
-        type: string
-      model_url:
-        type: string
-      model_vocoder:
-        description: Audio/TTS params
-        type: string
-      no_cont_batching:
-        type: boolean
-      no_context_shift:
-        description: Example-specific params
-        type: boolean
-      no_escape:
-        type: boolean
-      no_kv_offload:
-        type: boolean
-      no_mmap:
-        type: boolean
-      no_mmproj:
-        type: boolean
-      no_mmproj_offload:
-        type: boolean
-      no_perf:
-        type: boolean
-      no_prefill_assistant:
-        type: boolean
-      no_slots:
-        type: boolean
-      no_warmup:
-        type: boolean
-      no_webui:
-        type: boolean
-      numa:
-        type: string
      on_demand_start:
        description: On demand start
        type: boolean
-      override_kv:
-        items:
-          type: string
-        type: array
-      override_tensor:
-        items:
-          type: string
-        type: array
-      parallel:
-        type: integer
-      path:
-        type: string
-      poll:
-        type: integer
-      poll_batch:
-        type: integer
-      pooling:
-        type: string
-      port:
-        type: integer
-      predict:
-        type: integer
-      presence_penalty:
-        type: number
-      prio:
-        type: integer
-      prio_batch:
-        type: integer
-      props:
-        type: boolean
-      reasoning_budget:
-        type: integer
-      reasoning_format:
-        type: string
-      repeat_last_n:
-        type: integer
-      repeat_penalty:
-        type: number
-      reranking:
-        type: boolean
      restart_delay:
-        type: integer
-      rope_freq_base:
-        type: number
-      rope_freq_scale:
-        type: number
-      rope_scale:
-        type: number
-      rope_scaling:
-        type: string
-      samplers:
-        description: Sampling params
-        type: string
-      sampling_seq:
-        type: string
-      seed:
-        type: integer
-      slot_prompt_similarity:
-        type: number
-      slot_save_path:
-        type: string
-      slots:
-        type: boolean
-      special:
-        type: boolean
-      split_mode:
-        type: string
-      spm_infill:
-        type: boolean
-      ssl_cert_file:
-        type: string
-      ssl_key_file:
-        type: string
-      temp:
-        type: number
-      tensor_split:
-        type: string
-      threads:
-        type: integer
-      threads_batch:
-        type: integer
-      threads_http:
-        type: integer
-      timeout:
-        type: integer
-      top_k:
-        type: integer
-      top_p:
-        type: number
-      tts_use_guide_tokens:
-        type: boolean
-      typical:
-        type: number
-      ubatch_size:
-        type: integer
-      verbose:
-        type: boolean
-      verbose_prompt:
-        description: Common params
-        type: boolean
-      verbosity:
-        type: integer
-      xtc_probability:
-        type: number
-      xtc_threshold:
-        type: number
-      yarn_attn_factor:
-        type: number
-      yarn_beta_fast:
-        type: number
-      yarn_beta_slow:
-        type: number
-      yarn_ext_factor:
-        type: number
-      yarn_orig_ctx:
+        description: seconds
        type: integer
    type: object
  instance.InstanceStatus:
@@ -391,6 +74,11 @@ definitions:
      object:
        type: string
    type: object
+  server.ParseCommandRequest:
+    properties:
+      command:
+        type: string
+    type: object
 info:
  contact: {}
  description: llamactl is a control server for managing Llama Server instances.
@@ -400,6 +88,102 @@ info:
  title: llamactl API
  version: "1.0"
 paths:
+  /backends/llama-cpp/parse-command:
+    post:
+      consumes:
+      - application/json
+      description: Parses a llama-server command string into instance options
+      parameters:
+      - description: Command to parse
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/server.ParseCommandRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Parsed options
+          schema:
+            $ref: '#/definitions/instance.CreateInstanceOptions'
+        "400":
+          description: Invalid request or command
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "500":
+          description: Internal Server Error
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Parse llama-server command
+      tags:
+      - backends
+  /backends/mlx/parse-command:
+    post:
+      consumes:
+      - application/json
+      description: Parses MLX-LM server command string into instance options
+      parameters:
+      - description: Command to parse
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/server.ParseCommandRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Parsed options
+          schema:
+            $ref: '#/definitions/instance.CreateInstanceOptions'
+        "400":
+          description: Invalid request or command
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Parse mlx_lm.server command
+      tags:
+      - backends
+  /backends/vllm/parse-command:
+    post:
+      consumes:
+      - application/json
+      description: Parses a vLLM serve command string into instance options
+      parameters:
+      - description: Command to parse
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/server.ParseCommandRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Parsed options
+          schema:
+            $ref: '#/definitions/instance.CreateInstanceOptions'
+        "400":
+          description: Invalid request or command
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Parse vllm serve command
+      tags:
+      - backends
  /instances:
    get:
      description: Returns a list of all instances managed by the server
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -22,6 +22,7 @@ server:
 backends:
  llama_executable: llama-server # Path to llama-server executable
  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
+  vllm_executable: vllm # Path to vllm executable

 instances:
  port_range: [8000, 9000]       # Port range for instances
@@ -94,11 +95,13 @@ server:
 backends:
  llama_executable: "llama-server"     # Path to llama-server executable (default: "llama-server")
  mlx_lm_executable: "mlx_lm.server"   # Path to mlx_lm.server executable (default: "mlx_lm.server")
+  vllm_executable: "vllm"              # Path to vllm executable (default: "vllm")
 ```

 **Environment Variables:**
 - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
 - `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
+- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable

 ### Instance Configuration

--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -37,6 +37,22 @@ pip install mlx-lm

 Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)

+**For vLLM backend:**
+
+vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
+
+```bash
+# Install via pip (requires Python 3.8+, GPU required)
+pip install vllm
+
+# Or in a virtual environment (recommended)
+python -m venv vllm-env
+source vllm-env/bin/activate
+pip install vllm
+
+# For production deployments, consider container-based installation
+```
+
 ## Installation Methods

 ### Option 1: Download Binary (Recommended)
--- a/docs/getting-started/quick-start.md
+++ b/docs/getting-started/quick-start.md
@@ -29,8 +29,9 @@ You should see the Llamactl web interface.
 1. Click the "Add Instance" button
 2. Fill in the instance configuration:
   - **Name**: Give your instance a descriptive name
-   - **Model Path**: Path to your Llama.cpp model file
-   - **Additional Options**: Any extra Llama.cpp parameters
+   - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
+   - **Model**: Model path or identifier for your chosen backend
+   - **Additional Options**: Backend-specific parameters

 3. Click "Create Instance"

@@ -43,17 +44,46 @@ Once created, you can:
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed

-## Example Configuration
+## Example Configurations

-Here's a basic example configuration for a Llama 2 model:
+Here are basic example configurations for each backend:

+**llama.cpp backend:**
 ```json
 {
  "name": "llama2-7b",
-  "model_path": "/path/to/llama-2-7b-chat.gguf",
-  "options": {
+  "backend_type": "llama_cpp",
+  "backend_options": {
+    "model": "/path/to/llama-2-7b-chat.gguf",
    "threads": 4,
-    "context_size": 2048
+    "ctx_size": 2048,
+    "gpu_layers": 32
+  }
+}
+```
+
+**MLX backend (macOS only):**
+```json
+{
+  "name": "mistral-mlx",
+  "backend_type": "mlx_lm",
+  "backend_options": {
+    "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
+    "temp": 0.7,
+    "max_tokens": 2048
+  }
+}
+```
+
+**vLLM backend:**
+```json
+{
+  "name": "dialogpt-vllm",
+  "backend_type": "vllm",
+  "backend_options": {
+    "model": "microsoft/DialoGPT-medium",
+    "tensor_parallel_size": 2,
+    "gpu_memory_utilization": 0.9
  }
 }
 ```
@@ -66,12 +96,14 @@ You can also manage instances via the REST API:
 # List all instances
 curl http://localhost:8080/api/instances

-# Create a new instance
-curl -X POST http://localhost:8080/api/instances \
+# Create a new llama.cpp instance
+curl -X POST http://localhost:8080/api/instances/my-model \
  -H "Content-Type: application/json" \
  -d '{
-    "name": "my-model",
-    "model_path": "/path/to/model.gguf",
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/path/to/model.gguf"
+    }
  }'

 # Start an instance
--- a/docs/user-guide/api-reference.md
+++ b/docs/user-guide/api-reference.md
@@ -170,7 +170,7 @@ POST /api/v1/instances/{name}/start
 ```json
 {
  "name": "llama2-7b",
-  "status": "starting",
+  "status": "running",
  "created": 1705312200
 }
 ```
@@ -191,7 +191,7 @@ POST /api/v1/instances/{name}/stop
 ```json
 {
  "name": "llama2-7b",
-  "status": "stopping",
+  "status": "stopped",
  "created": 1705312200
 }
 ```
@@ -208,7 +208,7 @@ POST /api/v1/instances/{name}/restart
 ```json
 {
  "name": "llama2-7b",
-  "status": "restarting",
+  "status": "running",
  "created": 1705312200
 }
 ```
@@ -401,6 +401,102 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
  }'
 ```

+## Backend-Specific Endpoints
+
+### Parse Commands
+
+Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
+
+#### Parse Llama.cpp Command
+
+Parse a llama-server command string into instance options.
+
+```http
+POST /api/v1/backends/llama-cpp/parse-command
+```
+
+**Request Body:**
+```json
+{
+  "command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
+}
+```
+
+**Response:**
+```json
+{
+  "backend_type": "llama_cpp",
+  "llama_server_options": {
+    "model": "/path/to/model.gguf",
+    "ctx_size": 2048,
+    "port": 8080
+  }
+}
+```
+
+#### Parse MLX-LM Command
+
+Parse an MLX-LM server command string into instance options.
+
+```http
+POST /api/v1/backends/mlx/parse-command
+```
+
+**Request Body:**
+```json
+{
+  "command": "mlx_lm.server --model /path/to/model --port 8080"
+}
+```
+
+**Response:**
+```json
+{
+  "backend_type": "mlx_lm",
+  "mlx_server_options": {
+    "model": "/path/to/model",
+    "port": 8080
+  }
+}
+```
+
+#### Parse vLLM Command
+
+Parse a vLLM serve command string into instance options.
+
+```http
+POST /api/v1/backends/vllm/parse-command
+```
+
+**Request Body:**
+```json
+{
+  "command": "vllm serve /path/to/model --port 8080"
+}
+```
+
+**Response:**
+```json
+{
+  "backend_type": "vllm",
+  "vllm_server_options": {
+    "model": "/path/to/model",
+    "port": 8080
+  }
+}
+```
+
+**Error Responses for Parse Commands:**
+- `400 Bad Request`: Invalid request body, empty command, or parse error
+- `500 Internal Server Error`: Encoding error
+
+## Auto-Generated Documentation
+
+The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
+
+1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
+2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
+
 ## Swagger Documentation

 If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -1,6 +1,6 @@
 # Managing Instances

-Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.
+Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.

 ## Overview

@@ -42,9 +42,11 @@ Each instance is displayed as a card showing:
 3. **Choose Backend Type**:
    - **llama.cpp**: For GGUF models using llama-server
    - **MLX**: For MLX-optimized models (macOS only)
+    - **vLLM**: For distributed serving and high-throughput inference
 4. Configure model source:
    - **For llama.cpp**: GGUF model path or HuggingFace repo
    - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
+    - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
 5. Configure optional instance management settings:
    - **Auto Restart**: Automatically restart instance on failure
    - **Max Restarts**: Maximum number of restart attempts
@@ -54,6 +56,7 @@ Each instance is displayed as a card showing:
 6. Configure backend-specific options:
    - **llama.cpp**: Threads, context size, GPU layers, port, etc.
    - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
+    - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
 7. Click **"Create"** to save the instance  

 ### Via API
@@ -87,6 +90,20 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
    "max_restarts": 3
  }'

+# Create vLLM instance
+curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "vllm",
+    "backend_options": {
+      "model": "microsoft/DialoGPT-medium",
+      "tensor_parallel_size": 2,
+      "gpu_memory_utilization": 0.9
+    },
+    "auto_restart": true,
+    "on_demand_start": true
+  }'
+
 # Create llama.cpp instance with HuggingFace model
 curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
  -H "Content-Type: application/json" \
@@ -179,16 +196,17 @@ curl -X DELETE http://localhost:8080/api/instances/{name}

 ## Instance Proxy

-Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).
+Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).

 ```bash
 # Get instance details
 curl http://localhost:8080/api/instances/{name}/proxy/
 ```

-Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
+All backends provide OpenAI-compatible endpoints. Check the respective documentation:
 - [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
 - [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
+- [vLLM docs](https://docs.vllm.ai/en/latest/)

 ### Instance Health

--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -5,5 +5,6 @@ type BackendType string
 const (
 	BackendTypeLlamaCpp BackendType = "llama_cpp"
 	BackendTypeMlxLm    BackendType = "mlx_lm"
+	BackendTypeVllm     BackendType = "vllm"
 	// BackendTypeMlxVlm BackendType = "mlx_vlm"  // Future expansion
 )
--- a/pkg/backends/builder.go
+++ b/pkg/backends/builder.go
@@ -0,0 +1,70 @@
+package backends
+
+import (
+	"reflect"
+	"strconv"
+	"strings"
+)
+
+// BuildCommandArgs converts a struct to command line arguments
+func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
+	var args []string
+
+	v := reflect.ValueOf(options).Elem()
+	t := v.Type()
+
+	for i := 0; i < v.NumField(); i++ {
+		field := v.Field(i)
+		fieldType := t.Field(i)
+
+		if !field.CanInterface() {
+			continue
+		}
+
+		jsonTag := fieldType.Tag.Get("json")
+		if jsonTag == "" || jsonTag == "-" {
+			continue
+		}
+
+		// Get flag name from JSON tag
+		flagName := strings.Split(jsonTag, ",")[0]
+		flagName = strings.ReplaceAll(flagName, "_", "-")
+
+		switch field.Kind() {
+		case reflect.Bool:
+			if field.Bool() {
+				args = append(args, "--"+flagName)
+			}
+		case reflect.Int:
+			if field.Int() != 0 {
+				args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
+			}
+		case reflect.Float64:
+			if field.Float() != 0 {
+				args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
+			}
+		case reflect.String:
+			if field.String() != "" {
+				args = append(args, "--"+flagName, field.String())
+			}
+		case reflect.Slice:
+			if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
+				if multipleFlags[flagName] {
+					// Multiple flags: --flag value1 --flag value2
+					for j := 0; j < field.Len(); j++ {
+						args = append(args, "--"+flagName, field.Index(j).String())
+					}
+				} else {
+					// Comma-separated: --flag value1,value2
+					var values []string
+					for j := 0; j < field.Len(); j++ {
+						values = append(values, field.Index(j).String())
+					}
+					args = append(args, "--"+flagName, strings.Join(values, ","))
+				}
+			}
+		}
+	}
+
+	return args
+}
--- a/pkg/backends/llamacpp/llama.go
+++ b/pkg/backends/llamacpp/llama.go
@@ -2,9 +2,9 @@ package llamacpp

 import (
 	"encoding/json"
+	"llamactl/pkg/backends"
 	"reflect"
 	"strconv"
-	"strings"
 )

 type LlamaServerOptions struct {
@@ -315,62 +315,44 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {

 // BuildCommandArgs converts InstanceOptions to command line arguments
 func (o *LlamaServerOptions) BuildCommandArgs() []string {
-	var args []string
+	// Llama uses multiple flags for arrays by default (not comma-separated)
+	multipleFlags := map[string]bool{
+		"override-tensor":       true,
+		"override-kv":           true,
+		"lora":                  true,
+		"lora-scaled":           true,
+		"control-vector":        true,
+		"control-vector-scaled": true,
+		"dry-sequence-breaker":  true,
+		"logit-bias":            true,
+	}
+	return backends.BuildCommandArgs(o, multipleFlags)
+}

-	v := reflect.ValueOf(o).Elem()
-	t := v.Type()
-
-	for i := 0; i < v.NumField(); i++ {
-		field := v.Field(i)
-		fieldType := t.Field(i)
-
-		// Skip unexported fields
-		if !field.CanInterface() {
-			continue
-		}
-
-		// Get the JSON tag to determine the flag name
-		jsonTag := fieldType.Tag.Get("json")
-		if jsonTag == "" || jsonTag == "-" {
-			continue
-		}
-
-		// Remove ",omitempty" from the tag
-		flagName := jsonTag
-		if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
-			flagName = jsonTag[:commaIndex]
-		}
-
-		// Convert snake_case to kebab-case for CLI flags
-		flagName = strings.ReplaceAll(flagName, "_", "-")
-
-		// Add the appropriate arguments based on field type and value
-		switch field.Kind() {
-		case reflect.Bool:
-			if field.Bool() {
-				args = append(args, "--"+flagName)
-			}
-		case reflect.Int:
-			if field.Int() != 0 {
-				args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
-			}
-		case reflect.Float64:
-			if field.Float() != 0 {
-				args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
-			}
-		case reflect.String:
-			if field.String() != "" {
-				args = append(args, "--"+flagName, field.String())
-			}
-		case reflect.Slice:
-			if field.Type().Elem().Kind() == reflect.String {
-				// Handle []string fields
-				for j := 0; j < field.Len(); j++ {
-					args = append(args, "--"+flagName, field.Index(j).String())
-				}
-			}
-		}
+// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
+// Supports multiple formats:
+// 1. Full command: "llama-server --model file.gguf"
+// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
+// 3. Args only: "--model file.gguf --gpu-layers 32"
+// 4. Multiline commands with backslashes
+func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
+	executableNames := []string{"llama-server"}
+	var subcommandNames []string // Llama has no subcommands
+	multiValuedFlags := map[string]bool{
+		"override_tensor":       true,
+		"override_kv":           true,
+		"lora":                  true,
+		"lora_scaled":           true,
+		"control_vector":        true,
+		"control_vector_scaled": true,
+		"dry_sequence_breaker":  true,
+		"logit_bias":            true,
 	}

-	return args
+	var llamaOptions LlamaServerOptions
+	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
+		return nil, err
+	}
+
+	return &llamaOptions, nil
 }
--- a/pkg/backends/llamacpp/llama_test.go
+++ b/pkg/backends/llamacpp/llama_test.go
@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
 	}
 }

+func TestParseLlamaCommand(t *testing.T) {
+	tests := []struct {
+		name      string
+		command   string
+		expectErr bool
+	}{
+		{
+			name:      "basic command",
+			command:   "llama-server --model /path/to/model.gguf --gpu-layers 32",
+			expectErr: false,
+		},
+		{
+			name:      "args only",
+			command:   "--model /path/to/model.gguf --ctx-size 4096",
+			expectErr: false,
+		},
+		{
+			name:      "mixed flag formats",
+			command:   "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
+			expectErr: false,
+		},
+		{
+			name:      "quoted strings",
+			command:   `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
+			expectErr: false,
+		},
+		{
+			name:      "empty command",
+			command:   "",
+			expectErr: true,
+		},
+		{
+			name:      "unterminated quote",
+			command:   `llama-server --model test.gguf --api-key "unterminated`,
+			expectErr: true,
+		},
+		{
+			name:      "malformed flag",
+			command:   "llama-server ---model test.gguf",
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := llamacpp.ParseLlamaCommand(tt.command)
+
+			if tt.expectErr {
+				if err == nil {
+					t.Errorf("expected error but got none")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("unexpected error: %v", err)
+				return
+			}
+
+			if result == nil {
+				t.Errorf("expected result but got nil")
+			}
+		})
+	}
+}
+
+func TestParseLlamaCommandValues(t *testing.T) {
+	command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
+	result, err := llamacpp.ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "/test/model.gguf" {
+		t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
+	}
+
+	if result.GPULayers != 32 {
+		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
+	}
+
+	if result.Temperature != 0.7 {
+		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
+	}
+
+	if !result.Verbose {
+		t.Errorf("expected verbose to be true")
+	}
+
+	if !result.NoMmap {
+		t.Errorf("expected no_mmap to be true")
+	}
+}
+
+func TestParseLlamaCommandArrays(t *testing.T) {
+	command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
+	result, err := llamacpp.ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Lora) != 2 {
+		t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
+	}
+
+	expected := []string{"adapter1.bin", "adapter2.bin"}
+	for i, v := range expected {
+		if result.Lora[i] != v {
+			t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
+		}
+	}
+}
+
 // Helper functions
 func contains(slice []string, item string) bool {
 	return slices.Contains(slice, item)
--- a/pkg/backends/llamacpp/parser.go
+++ b/pkg/backends/llamacpp/parser.go
@@ -1,286 +0,0 @@
-package llamacpp
-
-import (
-	"encoding/json"
-	"errors"
-	"fmt"
-	"path/filepath"
-	"regexp"
-	"strconv"
-	"strings"
-)
-
-// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
-// Supports multiple formats:
-// 1. Full command: "llama-server --model file.gguf"
-// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
-// 3. Args only: "--model file.gguf --gpu-layers 32"
-// 4. Multiline commands with backslashes
-func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
-	// 1. Normalize the command - handle multiline with backslashes
-	trimmed := normalizeMultilineCommand(command)
-	if trimmed == "" {
-		return nil, fmt.Errorf("command cannot be empty")
-	}
-
-	// 2. Extract arguments from command
-	args, err := extractArgumentsFromCommand(trimmed)
-	if err != nil {
-		return nil, err
-	}
-
-	// 3. Parse arguments into map
-	options := make(map[string]any)
-
-	// Known multi-valued flags (snake_case form)
-	multiValued := map[string]struct{}{
-		"override_tensor":       {},
-		"override_kv":           {},
-		"lora":                  {},
-		"lora_scaled":           {},
-		"control_vector":        {},
-		"control_vector_scaled": {},
-		"dry_sequence_breaker":  {},
-		"logit_bias":            {},
-	}
-
-	i := 0
-	for i < len(args) {
-		arg := args[i]
-
-		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
-			i++
-			continue
-		}
-
-		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
-		if strings.HasPrefix(arg, "---") {
-			return nil, fmt.Errorf("malformed flag: %s", arg)
-		}
-
-		// Unified parsing for --flag=value vs --flag value
-		var rawFlag, rawValue string
-		hasEquals := false
-		if strings.Contains(arg, "=") {
-			parts := strings.SplitN(arg, "=", 2)
-			rawFlag = parts[0]
-			rawValue = parts[1] // may be empty string
-			hasEquals = true
-		} else {
-			rawFlag = arg
-		}
-
-		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
-		flagName := strings.ReplaceAll(flagCore, "-", "_")
-
-		// Detect value if not in equals form
-		valueProvided := hasEquals
-		if !hasEquals {
-			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
-				rawValue = args[i+1]
-				valueProvided = true
-			}
-		}
-
-		// Determine if multi-valued flag
-		_, isMulti := multiValued[flagName]
-
-		// Normalization helper: ensure slice for multi-valued flags
-		appendValue := func(valStr string) {
-			if existing, ok := options[flagName]; ok {
-				// Existing value; ensure slice semantics for multi-valued flags or repeated occurrences
-				if slice, ok := existing.([]string); ok {
-					options[flagName] = append(slice, valStr)
-					return
-				}
-				// Convert scalar to slice
-				options[flagName] = []string{fmt.Sprintf("%v", existing), valStr}
-				return
-			}
-			// First value
-			if isMulti {
-				options[flagName] = []string{valStr}
-			} else {
-				// We'll parse type below for single-valued flags
-				options[flagName] = valStr
-			}
-		}
-
-		if valueProvided {
-			// Use raw token for multi-valued flags; else allow typed parsing
-			appendValue(rawValue)
-			if !isMulti { // convert to typed value if scalar
-				if strVal, ok := options[flagName].(string); ok { // still scalar
-					options[flagName] = parseValue(strVal)
-				}
-			}
-			// Advance index: if we consumed a following token as value (non equals form), skip it
-			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
-				i += 2
-			} else {
-				i++
-			}
-			continue
-		}
-
-		// Boolean flag (no value)
-		options[flagName] = true
-		i++
-	}
-
-	// 4. Convert to LlamaServerOptions using existing UnmarshalJSON
-	jsonData, err := json.Marshal(options)
-	if err != nil {
-		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
-	}
-
-	var llamaOptions LlamaServerOptions
-	if err := json.Unmarshal(jsonData, &llamaOptions); err != nil {
-		return nil, fmt.Errorf("failed to parse command options: %w", err)
-	}
-
-	// 5. Return LlamaServerOptions
-	return &llamaOptions, nil
-}
-
-// parseValue attempts to parse a string value into the most appropriate type
-func parseValue(value string) any {
-	// Surrounding matching quotes (single or double)
-	if l := len(value); l >= 2 {
-		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
-			value = value[1 : l-1]
-		}
-	}
-
-	lower := strings.ToLower(value)
-	if lower == "true" {
-		return true
-	}
-	if lower == "false" {
-		return false
-	}
-
-	if intVal, err := strconv.Atoi(value); err == nil {
-		return intVal
-	}
-	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
-		return floatVal
-	}
-	return value
-}
-
-// normalizeMultilineCommand handles multiline commands with backslashes
-func normalizeMultilineCommand(command string) string {
-	// Handle escaped newlines (backslash followed by newline)
-	re := regexp.MustCompile(`\\\s*\n\s*`)
-	normalized := re.ReplaceAllString(command, " ")
-
-	// Clean up extra whitespace
-	re = regexp.MustCompile(`\s+`)
-	normalized = re.ReplaceAllString(normalized, " ")
-
-	return strings.TrimSpace(normalized)
-}
-
-// extractArgumentsFromCommand extracts arguments from various command formats
-func extractArgumentsFromCommand(command string) ([]string, error) {
-	// Split command into tokens respecting quotes
-	tokens, err := splitCommandTokens(command)
-	if err != nil {
-		return nil, err
-	}
-
-	if len(tokens) == 0 {
-		return nil, fmt.Errorf("no command tokens found")
-	}
-
-	// Check if first token looks like an executable
-	firstToken := tokens[0]
-
-	// Case 1: Full path to executable (contains path separator or ends with llama-server)
-	if strings.Contains(firstToken, string(filepath.Separator)) ||
-		strings.HasSuffix(filepath.Base(firstToken), "llama-server") {
-		return tokens[1:], nil // Return everything except the executable
-	}
-
-	// Case 2: Just "llama-server" command
-	if strings.ToLower(firstToken) == "llama-server" {
-		return tokens[1:], nil // Return everything except the command
-	}
-
-	// Case 3: Arguments only (starts with a flag)
-	if strings.HasPrefix(firstToken, "-") {
-		return tokens, nil // Return all tokens as arguments
-	}
-
-	// Case 4: Unknown format - might be a different executable name
-	// Be permissive and assume it's the executable
-	return tokens[1:], nil
-}
-
-// splitCommandTokens splits a command string into tokens, respecting quotes
-func splitCommandTokens(command string) ([]string, error) {
-	var tokens []string
-	var current strings.Builder
-	inQuotes := false
-	quoteChar := byte(0)
-	escaped := false
-
-	for i := 0; i < len(command); i++ {
-		c := command[i]
-
-		if escaped {
-			current.WriteByte(c)
-			escaped = false
-			continue
-		}
-
-		if c == '\\' {
-			escaped = true
-			current.WriteByte(c)
-			continue
-		}
-
-		if !inQuotes && (c == '"' || c == '\'') {
-			inQuotes = true
-			quoteChar = c
-			current.WriteByte(c)
-		} else if inQuotes && c == quoteChar {
-			inQuotes = false
-			quoteChar = 0
-			current.WriteByte(c)
-		} else if !inQuotes && (c == ' ' || c == '\t') {
-			if current.Len() > 0 {
-				tokens = append(tokens, current.String())
-				current.Reset()
-			}
-		} else {
-			current.WriteByte(c)
-		}
-	}
-
-	if inQuotes {
-		return nil, errors.New("unterminated quoted string")
-	}
-
-	if current.Len() > 0 {
-		tokens = append(tokens, current.String())
-	}
-
-	return tokens, nil
-}
-
-// isFlag determines if a string is a command line flag or a value
-// Handles the special case where negative numbers should be treated as values, not flags
-func isFlag(arg string) bool {
-	if !strings.HasPrefix(arg, "-") {
-		return false
-	}
-
-	// Special case: if it's a negative number, treat it as a value
-	if _, err := strconv.ParseFloat(arg, 64); err == nil {
-		return false
-	}
-
-	return true
-}
--- a/pkg/backends/llamacpp/parser_test.go
+++ b/pkg/backends/llamacpp/parser_test.go
@@ -1,413 +0,0 @@
-package llamacpp
-
-import (
-	"testing"
-)
-
-func TestParseLlamaCommand(t *testing.T) {
-	tests := []struct {
-		name      string
-		command   string
-		expectErr bool
-	}{
-		{
-			name:      "basic command with model",
-			command:   "llama-server --model /path/to/model.gguf",
-			expectErr: false,
-		},
-		{
-			name:      "command with multiple flags",
-			command:   "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
-			expectErr: false,
-		},
-		{
-			name:      "command with short flags",
-			command:   "llama-server -m /path/to/model.gguf -ngl 32 -c 4096",
-			expectErr: false,
-		},
-		{
-			name:      "command with equals format",
-			command:   "llama-server --model=/path/to/model.gguf --gpu-layers=32",
-			expectErr: false,
-		},
-		{
-			name:      "command with boolean flags",
-			command:   "llama-server --model /path/to/model.gguf --verbose --no-mmap",
-			expectErr: false,
-		},
-		{
-			name:      "empty command",
-			command:   "",
-			expectErr: true,
-		},
-		{
-			name:      "case insensitive command",
-			command:   "LLAMA-SERVER --model /path/to/model.gguf",
-			expectErr: false,
-		},
-		// New test cases for improved functionality
-		{
-			name:      "args only without llama-server",
-			command:   "--model /path/to/model.gguf --gpu-layers 32",
-			expectErr: false,
-		},
-		{
-			name:      "full path to executable",
-			command:   "/usr/local/bin/llama-server --model /path/to/model.gguf",
-			expectErr: false,
-		},
-		{
-			name:      "negative number handling",
-			command:   "llama-server --gpu-layers -1 --model test.gguf",
-			expectErr: false,
-		},
-		{
-			name:      "multiline command with backslashes",
-			command:   "llama-server --model /path/to/model.gguf \\\n  --ctx-size 4096 \\\n  --batch-size 512",
-			expectErr: false,
-		},
-		{
-			name:      "quoted string with special characters",
-			command:   `llama-server --model test.gguf --chat-template "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"`,
-			expectErr: false,
-		},
-		{
-			name:      "unterminated quoted string",
-			command:   `llama-server --model test.gguf --chat-template "unterminated quote`,
-			expectErr: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := ParseLlamaCommand(tt.command)
-
-			if tt.expectErr {
-				if err == nil {
-					t.Errorf("expected error but got none")
-				}
-				return
-			}
-
-			if err != nil {
-				t.Errorf("unexpected error: %v", err)
-				return
-			}
-
-			if result == nil {
-				t.Errorf("expected result but got nil")
-				return
-			}
-		})
-	}
-}
-
-func TestParseLlamaCommandSpecificValues(t *testing.T) {
-	// Test specific value parsing
-	command := "llama-server --model /test/model.gguf --gpu-layers 32 --ctx-size 4096 --verbose"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "/test/model.gguf" {
-		t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
-	}
-
-	if result.GPULayers != 32 {
-		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
-	}
-
-	if result.CtxSize != 4096 {
-		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
-	}
-
-	if !result.Verbose {
-		t.Errorf("expected verbose to be true, got %v", result.Verbose)
-	}
-}
-
-func TestParseLlamaCommandArrayFlags(t *testing.T) {
-	// Test array flag handling (critical for lora, override-tensor, etc.)
-	command := "llama-server --model test.gguf --lora adapter1.bin --lora adapter2.bin"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if len(result.Lora) != 2 {
-		t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
-	}
-
-	if result.Lora[0] != "adapter1.bin" || result.Lora[1] != "adapter2.bin" {
-		t.Errorf("expected lora adapters [adapter1.bin, adapter2.bin], got %v", result.Lora)
-	}
-}
-
-func TestParseLlamaCommandMixedFormats(t *testing.T) {
-	// Test mixing --flag=value and --flag value formats
-	command := "llama-server --model=/path/model.gguf --gpu-layers 16 --batch-size=512 --verbose"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "/path/model.gguf" {
-		t.Errorf("expected model '/path/model.gguf', got '%s'", result.Model)
-	}
-
-	if result.GPULayers != 16 {
-		t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
-	}
-
-	if result.BatchSize != 512 {
-		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
-	}
-
-	if !result.Verbose {
-		t.Errorf("expected verbose to be true, got %v", result.Verbose)
-	}
-}
-
-func TestParseLlamaCommandTypeConversion(t *testing.T) {
-	// Test that values are converted to appropriate types
-	command := "llama-server --model test.gguf --temp 0.7 --top-k 40 --no-mmap"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Temperature != 0.7 {
-		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
-	}
-
-	if result.TopK != 40 {
-		t.Errorf("expected top_k 40, got %d", result.TopK)
-	}
-
-	if !result.NoMmap {
-		t.Errorf("expected no_mmap to be true, got %v", result.NoMmap)
-	}
-}
-
-func TestParseLlamaCommandArgsOnly(t *testing.T) {
-	// Test parsing arguments without llama-server command
-	command := "--model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "/path/to/model.gguf" {
-		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
-	}
-
-	if result.GPULayers != 32 {
-		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
-	}
-
-	if result.CtxSize != 4096 {
-		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
-	}
-}
-
-func TestParseLlamaCommandFullPath(t *testing.T) {
-	// Test full path to executable
-	command := "/usr/local/bin/llama-server --model test.gguf --gpu-layers 16"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "test.gguf" {
-		t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
-	}
-
-	if result.GPULayers != 16 {
-		t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
-	}
-}
-
-func TestParseLlamaCommandNegativeNumbers(t *testing.T) {
-	// Test negative number parsing
-	command := "llama-server --model test.gguf --gpu-layers -1 --seed -12345"
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.GPULayers != -1 {
-		t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
-	}
-
-	if result.Seed != -12345 {
-		t.Errorf("expected seed -12345, got %d", result.Seed)
-	}
-}
-
-func TestParseLlamaCommandMultiline(t *testing.T) {
-	// Test multiline command with backslashes
-	command := `llama-server --model /path/to/model.gguf \
-  --ctx-size 4096 \
-  --batch-size 512 \
-  --gpu-layers 32`
-
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "/path/to/model.gguf" {
-		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
-	}
-
-	if result.CtxSize != 4096 {
-		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
-	}
-
-	if result.BatchSize != 512 {
-		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
-	}
-
-	if result.GPULayers != 32 {
-		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
-	}
-}
-
-func TestParseLlamaCommandQuotedStrings(t *testing.T) {
-	// Test quoted strings with special characters
-	command := `llama-server --model test.gguf --api-key "sk-1234567890abcdef" --chat-template "User: {user}\nAssistant: "`
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	if result.Model != "test.gguf" {
-		t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
-	}
-
-	if result.APIKey != "sk-1234567890abcdef" {
-		t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
-	}
-
-	expectedTemplate := "User: {user}\\nAssistant: "
-	if result.ChatTemplate != expectedTemplate {
-		t.Errorf("expected chat_template '%s', got '%s'", expectedTemplate, result.ChatTemplate)
-	}
-}
-
-func TestParseLlamaCommandUnslothExample(t *testing.T) {
-	// Test with realistic unsloth-style command
-	command := `llama-server --model /path/to/model.gguf \
-  --ctx-size 4096 \
-  --batch-size 512 \
-  --gpu-layers -1 \
-  --temp 0.7 \
-  --repeat-penalty 1.1 \
-  --top-k 40 \
-  --top-p 0.95 \
-  --host 0.0.0.0 \
-  --port 8000 \
-  --api-key "sk-1234567890abcdef"`
-
-	result, err := ParseLlamaCommand(command)
-
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-
-	// Verify key fields
-	if result.Model != "/path/to/model.gguf" {
-		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
-	}
-
-	if result.CtxSize != 4096 {
-		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
-	}
-
-	if result.BatchSize != 512 {
-		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
-	}
-
-	if result.GPULayers != -1 {
-		t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
-	}
-
-	if result.Temperature != 0.7 {
-		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
-	}
-
-	if result.RepeatPenalty != 1.1 {
-		t.Errorf("expected repeat_penalty 1.1, got %f", result.RepeatPenalty)
-	}
-
-	if result.TopK != 40 {
-		t.Errorf("expected top_k 40, got %d", result.TopK)
-	}
-
-	if result.TopP != 0.95 {
-		t.Errorf("expected top_p 0.95, got %f", result.TopP)
-	}
-
-	if result.Host != "0.0.0.0" {
-		t.Errorf("expected host '0.0.0.0', got '%s'", result.Host)
-	}
-
-	if result.Port != 8000 {
-		t.Errorf("expected port 8000, got %d", result.Port)
-	}
-
-	if result.APIKey != "sk-1234567890abcdef" {
-		t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
-	}
-}
-
-// Focused additional edge case tests (kept minimal per guidance)
-func TestParseLlamaCommandSingleQuotedValue(t *testing.T) {
-	cmd := "llama-server --model 'my model.gguf' --alias 'Test Alias'"
-	result, err := ParseLlamaCommand(cmd)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if result.Model != "my model.gguf" {
-		t.Errorf("expected model 'my model.gguf', got '%s'", result.Model)
-	}
-	if result.Alias != "Test Alias" {
-		t.Errorf("expected alias 'Test Alias', got '%s'", result.Alias)
-	}
-}
-
-func TestParseLlamaCommandMixedArrayForms(t *testing.T) {
-	// Same multi-value flag using --flag value and --flag=value forms
-	cmd := "llama-server --lora adapter1.bin --lora=adapter2.bin --lora adapter3.bin"
-	result, err := ParseLlamaCommand(cmd)
-	if err != nil {
-		t.Fatalf("unexpected error: %v", err)
-	}
-	if len(result.Lora) != 3 {
-		t.Fatalf("expected 3 lora values, got %d (%v)", len(result.Lora), result.Lora)
-	}
-	expected := []string{"adapter1.bin", "adapter2.bin", "adapter3.bin"}
-	for i, v := range expected {
-		if result.Lora[i] != v {
-			t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
-		}
-	}
-}
-
-func TestParseLlamaCommandMalformedFlag(t *testing.T) {
-	cmd := "llama-server ---model test.gguf"
-	_, err := ParseLlamaCommand(cmd)
-	if err == nil {
-		t.Fatalf("expected error for malformed flag but got none")
-	}
-}
--- a/pkg/backends/mlx/mlx.go
+++ b/pkg/backends/mlx/mlx.go
@@ -1,16 +1,14 @@
 package mlx

 import (
-	"encoding/json"
-	"reflect"
-	"strconv"
+	"llamactl/pkg/backends"
 )

 type MlxServerOptions struct {
 	// Basic connection options
-	Model       string `json:"model,omitempty"`
-	Host        string `json:"host,omitempty"`
-	Port        int    `json:"port,omitempty"`
+	Model string `json:"model,omitempty"`
+	Host  string `json:"host,omitempty"`
+	Port  int    `json:"port,omitempty"`

 	// Model and adapter options
 	AdapterPath     string `json:"adapter_path,omitempty"`
@@ -19,187 +17,40 @@ type MlxServerOptions struct {
 	TrustRemoteCode bool   `json:"trust_remote_code,omitempty"`

 	// Logging and templates
-	LogLevel                 string `json:"log_level,omitempty"`
-	ChatTemplate             string `json:"chat_template,omitempty"`
-	UseDefaultChatTemplate   bool   `json:"use_default_chat_template,omitempty"`
-	ChatTemplateArgs         string `json:"chat_template_args,omitempty"` // JSON string
+	LogLevel               string `json:"log_level,omitempty"`
+	ChatTemplate           string `json:"chat_template,omitempty"`
+	UseDefaultChatTemplate bool   `json:"use_default_chat_template,omitempty"`
+	ChatTemplateArgs       string `json:"chat_template_args,omitempty"` // JSON string

 	// Sampling defaults
-	Temp     float64 `json:"temp,omitempty"`      // Note: MLX uses "temp" not "temperature"
-	TopP     float64 `json:"top_p,omitempty"`
-	TopK     int     `json:"top_k,omitempty"`
-	MinP     float64 `json:"min_p,omitempty"`
-	MaxTokens int    `json:"max_tokens,omitempty"`
-}
-
-// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
-func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
-	// First unmarshal into a map to handle multiple field names
-	var raw map[string]any
-	if err := json.Unmarshal(data, &raw); err != nil {
-		return err
-	}
-
-	// Create a temporary struct for standard unmarshaling
-	type tempOptions MlxServerOptions
-	temp := tempOptions{}
-
-	// Standard unmarshal first
-	if err := json.Unmarshal(data, &temp); err != nil {
-		return err
-	}
-
-	// Copy to our struct
-	*o = MlxServerOptions(temp)
-
-	// Handle alternative field names
-	fieldMappings := map[string]string{
-		// Basic connection options
-		"m":            "model",
-		"host":         "host",
-		"port":         "port",
-//     "python_path":  "python_path", // removed
-		
-		// Model and adapter options
-		"adapter-path":      "adapter_path",
-		"draft-model":       "draft_model",
-		"num-draft-tokens":  "num_draft_tokens",
-		"trust-remote-code": "trust_remote_code",
-		
-		// Logging and templates
-		"log-level":                   "log_level",
-		"chat-template":               "chat_template",
-		"use-default-chat-template":   "use_default_chat_template",
-		"chat-template-args":          "chat_template_args",
-		
-		// Sampling defaults
-		"temperature": "temp",        // Support both temp and temperature
-		"top-p":       "top_p",
-		"top-k":       "top_k",
-		"min-p":       "min_p",
-		"max-tokens":  "max_tokens",
-	}
-
-	// Process alternative field names
-	for altName, canonicalName := range fieldMappings {
-		if value, exists := raw[altName]; exists {
-			// Use reflection to set the field value
-			v := reflect.ValueOf(o).Elem()
-			field := v.FieldByNameFunc(func(fieldName string) bool {
-				field, _ := v.Type().FieldByName(fieldName)
-				jsonTag := field.Tag.Get("json")
-				return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
-			})
-
-			if field.IsValid() && field.CanSet() {
-				switch field.Kind() {
-				case reflect.Int:
-					if intVal, ok := value.(float64); ok {
-						field.SetInt(int64(intVal))
-					} else if strVal, ok := value.(string); ok {
-						if intVal, err := strconv.Atoi(strVal); err == nil {
-							field.SetInt(int64(intVal))
-						}
-					}
-				case reflect.Float64:
-					if floatVal, ok := value.(float64); ok {
-						field.SetFloat(floatVal)
-					} else if strVal, ok := value.(string); ok {
-						if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
-							field.SetFloat(floatVal)
-						}
-					}
-				case reflect.String:
-					if strVal, ok := value.(string); ok {
-						field.SetString(strVal)
-					}
-				case reflect.Bool:
-					if boolVal, ok := value.(bool); ok {
-						field.SetBool(boolVal)
-					}
-				}
-			}
-		}
-	}
-
-	return nil
-}
-
-// NewMlxServerOptions creates MlxServerOptions with MLX defaults
-func NewMlxServerOptions() *MlxServerOptions {
-	return &MlxServerOptions{
-		Host:           "127.0.0.1",  // MLX default (different from llama-server)
-		Port:           8080,         // MLX default
-		NumDraftTokens: 3,            // MLX default for speculative decoding
-		LogLevel:       "INFO",       // MLX default
-		Temp:           0.0,          // MLX default
-		TopP:           1.0,          // MLX default  
-		TopK:           0,            // MLX default (disabled)
-		MinP:           0.0,          // MLX default (disabled)
-		MaxTokens:      512,          // MLX default
-		ChatTemplateArgs: "{}",       // MLX default (empty JSON object)
-	}
+	Temp      float64 `json:"temp,omitempty"`
+	TopP      float64 `json:"top_p,omitempty"`
+	TopK      int     `json:"top_k,omitempty"`
+	MinP      float64 `json:"min_p,omitempty"`
+	MaxTokens int     `json:"max_tokens,omitempty"`
 }

 // BuildCommandArgs converts to command line arguments
 func (o *MlxServerOptions) BuildCommandArgs() []string {
-	var args []string
-		
-	// Required and basic options
-	if o.Model != "" {
-		args = append(args, "--model", o.Model)
-	}
-	if o.Host != "" {
-		args = append(args, "--host", o.Host)
-	}
-	if o.Port != 0 {
-		args = append(args, "--port", strconv.Itoa(o.Port))
-	}
-	
-	// Model and adapter options
-	if o.AdapterPath != "" {
-		args = append(args, "--adapter-path", o.AdapterPath)
-	}
-	if o.DraftModel != "" {
-		args = append(args, "--draft-model", o.DraftModel)
-	}
-	if o.NumDraftTokens != 0 {
-		args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
-	}
-	if o.TrustRemoteCode {
-		args = append(args, "--trust-remote-code")
-	}
-	
-	// Logging and templates
-	if o.LogLevel != "" {
-		args = append(args, "--log-level", o.LogLevel)
-	}
-	if o.ChatTemplate != "" {
-		args = append(args, "--chat-template", o.ChatTemplate)
-	}
-	if o.UseDefaultChatTemplate {
-		args = append(args, "--use-default-chat-template")
-	}
-	if o.ChatTemplateArgs != "" {
-		args = append(args, "--chat-template-args", o.ChatTemplateArgs)
-	}
-	
-	// Sampling defaults
-	if o.Temp != 0 {
-		args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
-	}
-	if o.TopP != 0 {
-		args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
-	}
-	if o.TopK != 0 {
-		args = append(args, "--top-k", strconv.Itoa(o.TopK))
-	}
-	if o.MinP != 0 {
-		args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
-	}
-	if o.MaxTokens != 0 {
-		args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
-	}
-	
-	return args
+	multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
+	return backends.BuildCommandArgs(o, multipleFlags)
+}
+
+// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
+// Supports multiple formats:
+// 1. Full command: "mlx_lm.server --model model/path"
+// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
+// 3. Args only: "--model model/path --host 0.0.0.0"
+// 4. Multiline commands with backslashes
+func ParseMlxCommand(command string) (*MlxServerOptions, error) {
+	executableNames := []string{"mlx_lm.server"}
+	var subcommandNames []string          // MLX has no subcommands
+	multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
+
+	var mlxOptions MlxServerOptions
+	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
+		return nil, err
+	}
+
+	return &mlxOptions, nil
 }
--- a/pkg/backends/mlx/mlx_test.go
+++ b/pkg/backends/mlx/mlx_test.go
@@ -0,0 +1,157 @@
+package mlx_test
+
+import (
+	"llamactl/pkg/backends/mlx"
+	"testing"
+)
+
+func TestParseMlxCommand(t *testing.T) {
+	tests := []struct {
+		name      string
+		command   string
+		expectErr bool
+	}{
+		{
+			name:      "basic command",
+			command:   "mlx_lm.server --model /path/to/model --host 0.0.0.0",
+			expectErr: false,
+		},
+		{
+			name:      "args only",
+			command:   "--model /path/to/model --port 8080",
+			expectErr: false,
+		},
+		{
+			name:      "mixed flag formats",
+			command:   "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
+			expectErr: false,
+		},
+		{
+			name:      "quoted strings",
+			command:   `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
+			expectErr: false,
+		},
+		{
+			name:      "empty command",
+			command:   "",
+			expectErr: true,
+		},
+		{
+			name:      "unterminated quote",
+			command:   `mlx_lm.server --model test.mlx --chat-template "unterminated`,
+			expectErr: true,
+		},
+		{
+			name:      "malformed flag",
+			command:   "mlx_lm.server ---model test.mlx",
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := mlx.ParseMlxCommand(tt.command)
+
+			if tt.expectErr {
+				if err == nil {
+					t.Errorf("expected error but got none")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("unexpected error: %v", err)
+				return
+			}
+
+			if result == nil {
+				t.Errorf("expected result but got nil")
+			}
+		})
+	}
+}
+
+func TestParseMlxCommandValues(t *testing.T) {
+	command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
+	result, err := mlx.ParseMlxCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "/test/model.mlx" {
+		t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
+	}
+
+	if result.Port != 8080 {
+		t.Errorf("expected port 8080, got %d", result.Port)
+	}
+
+	if result.Temp != 0.7 {
+		t.Errorf("expected temp 0.7, got %f", result.Temp)
+	}
+
+	if !result.TrustRemoteCode {
+		t.Errorf("expected trust_remote_code to be true")
+	}
+
+	if result.LogLevel != "DEBUG" {
+		t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
+	}
+}
+
+func TestBuildCommandArgs(t *testing.T) {
+	options := &mlx.MlxServerOptions{
+		Model:           "/test/model.mlx",
+		Host:            "127.0.0.1",
+		Port:            8080,
+		Temp:            0.7,
+		TopP:            0.9,
+		TopK:            40,
+		MaxTokens:       2048,
+		TrustRemoteCode: true,
+		LogLevel:        "DEBUG",
+		ChatTemplate:    "custom template",
+	}
+
+	args := options.BuildCommandArgs()
+
+	// Check that all expected flags are present
+	expectedFlags := map[string]string{
+		"--model":         "/test/model.mlx",
+		"--host":          "127.0.0.1",
+		"--port":          "8080",
+		"--log-level":     "DEBUG",
+		"--chat-template": "custom template",
+		"--temp":          "0.7",
+		"--top-p":         "0.9",
+		"--top-k":         "40",
+		"--max-tokens":    "2048",
+	}
+
+	for i := 0; i < len(args); i++ {
+		if args[i] == "--trust-remote-code" {
+			continue // Boolean flag with no value
+		}
+		if args[i] == "--use-default-chat-template" {
+			continue // Boolean flag with no value
+		}
+
+		if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
+			if args[i+1] != expectedValue {
+				t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
+			}
+		}
+	}
+
+	// Check boolean flags
+	foundTrustRemoteCode := false
+	for _, arg := range args {
+		if arg == "--trust-remote-code" {
+			foundTrustRemoteCode = true
+		}
+	}
+	if !foundTrustRemoteCode {
+		t.Errorf("expected --trust-remote-code flag to be present")
+	}
+}
--- a/pkg/backends/mlx/parser.go
+++ b/pkg/backends/mlx/parser.go
@@ -1,254 +0,0 @@
-package mlx
-
-import (
-	"encoding/json"
-	"fmt"
-	"path/filepath"
-	"regexp"
-	"strconv"
-	"strings"
-)
-
-// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
-// Supports multiple formats:
-// 1. Full command: "mlx_lm.server --model model/path"
-// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
-// 3. Args only: "--model model/path --host 0.0.0.0"
-// 4. Multiline commands with backslashes
-func ParseMlxCommand(command string) (*MlxServerOptions, error) {
-	// 1. Normalize the command - handle multiline with backslashes
-	trimmed := normalizeMultilineCommand(command)
-	if trimmed == "" {
-		return nil, fmt.Errorf("command cannot be empty")
-	}
-
-	// 2. Extract arguments from command
-	args, err := extractArgumentsFromCommand(trimmed)
-	if err != nil {
-		return nil, err
-	}
-
-	// 3. Parse arguments into map
-	options := make(map[string]any)
-
-	i := 0
-	for i < len(args) {
-		arg := args[i]
-
-		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
-			i++
-			continue
-		}
-
-		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
-		if strings.HasPrefix(arg, "---") {
-			return nil, fmt.Errorf("malformed flag: %s", arg)
-		}
-
-		// Unified parsing for --flag=value vs --flag value
-		var rawFlag, rawValue string
-		hasEquals := false
-		if strings.Contains(arg, "=") {
-			parts := strings.SplitN(arg, "=", 2)
-			rawFlag = parts[0]
-			rawValue = parts[1] // may be empty string
-			hasEquals = true
-		} else {
-			rawFlag = arg
-		}
-
-		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
-		flagName := strings.ReplaceAll(flagCore, "-", "_")
-
-		// Detect value if not in equals form
-		valueProvided := hasEquals
-		if !hasEquals {
-			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
-				rawValue = args[i+1]
-				valueProvided = true
-			}
-		}
-
-		if valueProvided {
-			// MLX-specific validation for certain flags
-			if flagName == "log_level" && !isValidLogLevel(rawValue) {
-				return nil, fmt.Errorf("invalid log level: %s", rawValue)
-			}
-			
-			options[flagName] = parseValue(rawValue)
-			
-			// Advance index: if we consumed a following token as value (non equals form), skip it
-			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
-				i += 2
-			} else {
-				i++
-			}
-			continue
-		}
-
-		// Boolean flag (no value) - MLX specific boolean flags
-		if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
-			options[flagName] = true
-		} else {
-			options[flagName] = true
-		}
-		i++
-	}
-
-	// 4. Convert to MlxServerOptions using existing UnmarshalJSON
-	jsonData, err := json.Marshal(options)
-	if err != nil {
-		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
-	}
-
-	var mlxOptions MlxServerOptions
-	if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
-		return nil, fmt.Errorf("failed to parse command options: %w", err)
-	}
-
-	// 5. Return MlxServerOptions
-	return &mlxOptions, nil
-}
-
-// isValidLogLevel validates MLX log levels
-func isValidLogLevel(level string) bool {
-	validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
-	for _, valid := range validLevels {
-		if level == valid {
-			return true
-		}
-	}
-	return false
-}
-
-// parseValue attempts to parse a string value into the most appropriate type
-func parseValue(value string) any {
-	// Surrounding matching quotes (single or double)
-	if l := len(value); l >= 2 {
-		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
-			value = value[1 : l-1]
-		}
-	}
-
-	lower := strings.ToLower(value)
-	if lower == "true" {
-		return true
-	}
-	if lower == "false" {
-		return false
-	}
-
-	if intVal, err := strconv.Atoi(value); err == nil {
-		return intVal
-	}
-	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
-		return floatVal
-	}
-	return value
-}
-
-// normalizeMultilineCommand handles multiline commands with backslashes
-func normalizeMultilineCommand(command string) string {
-	// Handle escaped newlines (backslash followed by newline)
-	re := regexp.MustCompile(`\\\s*\n\s*`)
-	normalized := re.ReplaceAllString(command, " ")
-
-	// Clean up extra whitespace
-	re = regexp.MustCompile(`\s+`)
-	normalized = re.ReplaceAllString(normalized, " ")
-
-	return strings.TrimSpace(normalized)
-}
-
-// extractArgumentsFromCommand extracts arguments from various command formats
-func extractArgumentsFromCommand(command string) ([]string, error) {
-	// Split command into tokens respecting quotes
-	tokens, err := splitCommandTokens(command)
-	if err != nil {
-		return nil, err
-	}
-
-	if len(tokens) == 0 {
-		return nil, fmt.Errorf("no command tokens found")
-	}
-
-	// Check if first token looks like an executable
-	firstToken := tokens[0]
-
-	// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
-	if strings.Contains(firstToken, string(filepath.Separator)) ||
-		strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
-		return tokens[1:], nil // Return everything except the executable
-	}
-
-	// Case 2: Just "mlx_lm.server" command
-	if strings.ToLower(firstToken) == "mlx_lm.server" {
-		return tokens[1:], nil // Return everything except the command
-	}
-
-	// Case 3: Arguments only (starts with a flag)
-	if strings.HasPrefix(firstToken, "-") {
-		return tokens, nil // Return all tokens as arguments
-	}
-
-	// Case 4: Unknown format - might be a different executable name
-	// Be permissive and assume it's the executable
-	return tokens[1:], nil
-}
-
-// splitCommandTokens splits a command string into tokens, respecting quotes
-func splitCommandTokens(command string) ([]string, error) {
-	var tokens []string
-	var current strings.Builder
-	inQuotes := false
-	quoteChar := byte(0)
-	escaped := false
-
-	for i := 0; i < len(command); i++ {
-		c := command[i]
-
-		if escaped {
-			current.WriteByte(c)
-			escaped = false
-			continue
-		}
-
-		if c == '\\' {
-			escaped = true
-			current.WriteByte(c)
-			continue
-		}
-
-		if !inQuotes && (c == '"' || c == '\'') {
-			inQuotes = true
-			quoteChar = c
-			current.WriteByte(c)
-		} else if inQuotes && c == quoteChar {
-			inQuotes = false
-			quoteChar = 0
-			current.WriteByte(c)
-		} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
-			if current.Len() > 0 {
-				tokens = append(tokens, current.String())
-				current.Reset()
-			}
-		} else {
-			current.WriteByte(c)
-		}
-	}
-
-	if inQuotes {
-		return nil, fmt.Errorf("unclosed quote in command")
-	}
-
-	if current.Len() > 0 {
-		tokens = append(tokens, current.String())
-	}
-
-	return tokens, nil
-}
-
-// isFlag checks if a string looks like a command line flag
-func isFlag(s string) bool {
-	return strings.HasPrefix(s, "-")
-}
--- a/pkg/backends/parser.go
+++ b/pkg/backends/parser.go
@@ -0,0 +1,213 @@
+package backends
+
+import (
+	"encoding/json"
+	"fmt"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// ParseCommand parses a command string into a target struct
+func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
+	// Normalize multiline commands
+	command = normalizeCommand(command)
+	if command == "" {
+		return fmt.Errorf("command cannot be empty")
+	}
+
+	// Extract arguments and positional model
+	args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
+	if err != nil {
+		return err
+	}
+
+	// Parse flags into map
+	options, err := parseFlags(args, multiValuedFlags)
+	if err != nil {
+		return err
+	}
+
+	// If we found a positional model and no --model flag was provided, set the model
+	if modelFromPositional != "" {
+		if _, hasModelFlag := options["model"]; !hasModelFlag {
+			options["model"] = modelFromPositional
+		}
+	}
+
+	// Convert to target struct via JSON
+	jsonData, err := json.Marshal(options)
+	if err != nil {
+		return fmt.Errorf("failed to marshal options: %w", err)
+	}
+
+	if err := json.Unmarshal(jsonData, target); err != nil {
+		return fmt.Errorf("failed to unmarshal to target: %w", err)
+	}
+
+	return nil
+}
+
+// normalizeCommand handles multiline commands with backslashes
+func normalizeCommand(command string) string {
+	re := regexp.MustCompile(`\\\s*\n\s*`)
+	normalized := re.ReplaceAllString(command, " ")
+	re = regexp.MustCompile(`\s+`)
+	return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
+}
+
+// extractArgs extracts arguments from command, removing executable and subcommands
+// Returns: args, modelFromPositional, error
+func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
+	// Check for unterminated quotes
+	if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
+		return nil, "", fmt.Errorf("unterminated quoted string")
+	}
+
+	tokens := strings.Fields(command)
+	if len(tokens) == 0 {
+		return nil, "", fmt.Errorf("no tokens found")
+	}
+
+	// Skip executable
+	start := 0
+	firstToken := tokens[0]
+
+	// Check for executable name (with or without path)
+	if strings.Contains(firstToken, string(filepath.Separator)) {
+		baseName := filepath.Base(firstToken)
+		for _, execName := range executableNames {
+			if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
+				start = 1
+				break
+			}
+		}
+	} else {
+		for _, execName := range executableNames {
+			if strings.EqualFold(firstToken, execName) {
+				start = 1
+				break
+			}
+		}
+	}
+
+	// Skip subcommand if present
+	if start < len(tokens) {
+		for _, subCmd := range subcommandNames {
+			if strings.EqualFold(tokens[start], subCmd) {
+				start++
+				break
+			}
+		}
+	}
+
+	// Handle case where command starts with subcommand (no executable)
+	if start == 0 {
+		for _, subCmd := range subcommandNames {
+			if strings.EqualFold(firstToken, subCmd) {
+				start = 1
+				break
+			}
+		}
+	}
+
+	args := tokens[start:]
+
+	// Extract first positional argument (model) if present and not a flag
+	var modelFromPositional string
+	if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
+		modelFromPositional = args[0]
+		args = args[1:] // Remove the model from args to process remaining flags
+	}
+
+	return args, modelFromPositional, nil
+}
+
+// parseFlags parses command line flags into a map
+func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
+	options := make(map[string]any)
+
+	for i := 0; i < len(args); i++ {
+		arg := args[i]
+
+		if !strings.HasPrefix(arg, "-") {
+			continue
+		}
+
+		// Check for malformed flags (more than two leading dashes)
+		if strings.HasPrefix(arg, "---") {
+			return nil, fmt.Errorf("malformed flag: %s", arg)
+		}
+
+		// Get flag name and value
+		var flagName, value string
+		var hasValue bool
+
+		if strings.Contains(arg, "=") {
+			parts := strings.SplitN(arg, "=", 2)
+			flagName = strings.TrimLeft(parts[0], "-")
+			value = parts[1]
+			hasValue = true
+		} else {
+			flagName = strings.TrimLeft(arg, "-")
+			if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
+				value = args[i+1]
+				hasValue = true
+				i++ // Skip next arg since we consumed it
+			}
+		}
+
+		// Convert kebab-case to snake_case for JSON
+		flagName = strings.ReplaceAll(flagName, "-", "_")
+
+		if hasValue {
+			// Handle multi-valued flags
+			if multiValuedFlags[flagName] {
+				if existing, ok := options[flagName].([]string); ok {
+					options[flagName] = append(existing, value)
+				} else {
+					options[flagName] = []string{value}
+				}
+			} else {
+				options[flagName] = parseValue(value)
+			}
+		} else {
+			// Boolean flag
+			options[flagName] = true
+		}
+	}
+
+	return options, nil
+}
+
+// parseValue converts string to appropriate type
+func parseValue(value string) any {
+	// Remove quotes
+	if len(value) >= 2 {
+		if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
+			value = value[1 : len(value)-1]
+		}
+	}
+
+	// Try boolean
+	switch strings.ToLower(value) {
+	case "true":
+		return true
+	case "false":
+		return false
+	}
+
+	// Try integer
+	if intVal, err := strconv.Atoi(value); err == nil {
+		return intVal
+	}
+
+	// Try float
+	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
+		return floatVal
+	}
+
+	// Return as string
+	return value
+}
--- a/pkg/backends/vllm/vllm.go
+++ b/pkg/backends/vllm/vllm.go
@@ -0,0 +1,189 @@
+package vllm
+
+import (
+	"llamactl/pkg/backends"
+)
+
+type VllmServerOptions struct {
+	// Basic connection options (auto-assigned by llamactl)
+	Host string `json:"host,omitempty"`
+	Port int    `json:"port,omitempty"`
+
+	// Model and engine configuration
+	Model                      string `json:"model,omitempty"`
+	Tokenizer                  string `json:"tokenizer,omitempty"`
+	SkipTokenizerInit          bool   `json:"skip_tokenizer_init,omitempty"`
+	Revision                   string `json:"revision,omitempty"`
+	CodeRevision               string `json:"code_revision,omitempty"`
+	TokenizerRevision          string `json:"tokenizer_revision,omitempty"`
+	TokenizerMode              string `json:"tokenizer_mode,omitempty"`
+	TrustRemoteCode            bool   `json:"trust_remote_code,omitempty"`
+	DownloadDir                string `json:"download_dir,omitempty"`
+	LoadFormat                 string `json:"load_format,omitempty"`
+	ConfigFormat               string `json:"config_format,omitempty"`
+	Dtype                      string `json:"dtype,omitempty"`
+	KVCacheDtype               string `json:"kv_cache_dtype,omitempty"`
+	QuantizationParamPath      string `json:"quantization_param_path,omitempty"`
+	Seed                       int    `json:"seed,omitempty"`
+	MaxModelLen                int    `json:"max_model_len,omitempty"`
+	GuidedDecodingBackend      string `json:"guided_decoding_backend,omitempty"`
+	DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
+	WorkerUseRay               bool   `json:"worker_use_ray,omitempty"`
+	RayWorkersUseNSight        bool   `json:"ray_workers_use_nsight,omitempty"`
+
+	// Performance and serving configuration
+	BlockSize                int     `json:"block_size,omitempty"`
+	EnablePrefixCaching      bool    `json:"enable_prefix_caching,omitempty"`
+	DisableSlidingWindow     bool    `json:"disable_sliding_window,omitempty"`
+	UseV2BlockManager        bool    `json:"use_v2_block_manager,omitempty"`
+	NumLookaheadSlots        int     `json:"num_lookahead_slots,omitempty"`
+	SwapSpace                int     `json:"swap_space,omitempty"`
+	CPUOffloadGB             int     `json:"cpu_offload_gb,omitempty"`
+	GPUMemoryUtilization     float64 `json:"gpu_memory_utilization,omitempty"`
+	NumGPUBlocksOverride     int     `json:"num_gpu_blocks_override,omitempty"`
+	MaxNumBatchedTokens      int     `json:"max_num_batched_tokens,omitempty"`
+	MaxNumSeqs               int     `json:"max_num_seqs,omitempty"`
+	MaxLogprobs              int     `json:"max_logprobs,omitempty"`
+	DisableLogStats          bool    `json:"disable_log_stats,omitempty"`
+	Quantization             string  `json:"quantization,omitempty"`
+	RopeScaling              string  `json:"rope_scaling,omitempty"`
+	RopeTheta                float64 `json:"rope_theta,omitempty"`
+	EnforceEager             bool    `json:"enforce_eager,omitempty"`
+	MaxContextLenToCapture   int     `json:"max_context_len_to_capture,omitempty"`
+	MaxSeqLenToCapture       int     `json:"max_seq_len_to_capture,omitempty"`
+	DisableCustomAllReduce   bool    `json:"disable_custom_all_reduce,omitempty"`
+	TokenizerPoolSize        int     `json:"tokenizer_pool_size,omitempty"`
+	TokenizerPoolType        string  `json:"tokenizer_pool_type,omitempty"`
+	TokenizerPoolExtraConfig string  `json:"tokenizer_pool_extra_config,omitempty"`
+	EnableLoraBias           bool    `json:"enable_lora_bias,omitempty"`
+	LoraExtraVocabSize       int     `json:"lora_extra_vocab_size,omitempty"`
+	LoraRank                 int     `json:"lora_rank,omitempty"`
+	PromptLookbackDistance   int     `json:"prompt_lookback_distance,omitempty"`
+	PreemptionMode           string  `json:"preemption_mode,omitempty"`
+
+	// Distributed and parallel processing
+	TensorParallelSize            int     `json:"tensor_parallel_size,omitempty"`
+	PipelineParallelSize          int     `json:"pipeline_parallel_size,omitempty"`
+	MaxParallelLoadingWorkers     int     `json:"max_parallel_loading_workers,omitempty"`
+	DisableAsyncOutputProc        bool    `json:"disable_async_output_proc,omitempty"`
+	WorkerClass                   string  `json:"worker_class,omitempty"`
+	EnabledLoraModules            string  `json:"enabled_lora_modules,omitempty"`
+	MaxLoraRank                   int     `json:"max_lora_rank,omitempty"`
+	FullyShardedLoras             bool    `json:"fully_sharded_loras,omitempty"`
+	LoraModules                   string  `json:"lora_modules,omitempty"`
+	PromptAdapters                string  `json:"prompt_adapters,omitempty"`
+	MaxPromptAdapterToken         int     `json:"max_prompt_adapter_token,omitempty"`
+	Device                        string  `json:"device,omitempty"`
+	SchedulerDelay                float64 `json:"scheduler_delay,omitempty"`
+	EnableChunkedPrefill          bool    `json:"enable_chunked_prefill,omitempty"`
+	SpeculativeModel              string  `json:"speculative_model,omitempty"`
+	SpeculativeModelQuantization  string  `json:"speculative_model_quantization,omitempty"`
+	SpeculativeRevision           string  `json:"speculative_revision,omitempty"`
+	SpeculativeMaxModelLen        int     `json:"speculative_max_model_len,omitempty"`
+	SpeculativeDisableByBatchSize int     `json:"speculative_disable_by_batch_size,omitempty"`
+	NgptSpeculativeLength         int     `json:"ngpt_speculative_length,omitempty"`
+	SpeculativeDisableMqa         bool    `json:"speculative_disable_mqa,omitempty"`
+	ModelLoaderExtraConfig        string  `json:"model_loader_extra_config,omitempty"`
+	IgnorePatterns                string  `json:"ignore_patterns,omitempty"`
+	PreloadedLoraModules          string  `json:"preloaded_lora_modules,omitempty"`
+
+	// OpenAI server specific options
+	UDS                            string   `json:"uds,omitempty"`
+	UvicornLogLevel                string   `json:"uvicorn_log_level,omitempty"`
+	ResponseRole                   string   `json:"response_role,omitempty"`
+	SSLKeyfile                     string   `json:"ssl_keyfile,omitempty"`
+	SSLCertfile                    string   `json:"ssl_certfile,omitempty"`
+	SSLCACerts                     string   `json:"ssl_ca_certs,omitempty"`
+	SSLCertReqs                    int      `json:"ssl_cert_reqs,omitempty"`
+	RootPath                       string   `json:"root_path,omitempty"`
+	Middleware                     []string `json:"middleware,omitempty"`
+	ReturnTokensAsTokenIDS         bool     `json:"return_tokens_as_token_ids,omitempty"`
+	DisableFrontendMultiprocessing bool     `json:"disable_frontend_multiprocessing,omitempty"`
+	EnableAutoToolChoice           bool     `json:"enable_auto_tool_choice,omitempty"`
+	ToolCallParser                 string   `json:"tool_call_parser,omitempty"`
+	ToolServer                     string   `json:"tool_server,omitempty"`
+	ChatTemplate                   string   `json:"chat_template,omitempty"`
+	ChatTemplateContentFormat      string   `json:"chat_template_content_format,omitempty"`
+	AllowCredentials               bool     `json:"allow_credentials,omitempty"`
+	AllowedOrigins                 []string `json:"allowed_origins,omitempty"`
+	AllowedMethods                 []string `json:"allowed_methods,omitempty"`
+	AllowedHeaders                 []string `json:"allowed_headers,omitempty"`
+	APIKey                         []string `json:"api_key,omitempty"`
+	EnableLogOutputs               bool     `json:"enable_log_outputs,omitempty"`
+	EnableTokenUsage               bool     `json:"enable_token_usage,omitempty"`
+	EnableAsyncEngineDebug         bool     `json:"enable_async_engine_debug,omitempty"`
+	EngineUseRay                   bool     `json:"engine_use_ray,omitempty"`
+	DisableLogRequests             bool     `json:"disable_log_requests,omitempty"`
+	MaxLogLen                      int      `json:"max_log_len,omitempty"`
+
+	// Additional engine configuration
+	Task                      string `json:"task,omitempty"`
+	MultiModalConfig          string `json:"multi_modal_config,omitempty"`
+	LimitMmPerPrompt          string `json:"limit_mm_per_prompt,omitempty"`
+	EnableSleepMode           bool   `json:"enable_sleep_mode,omitempty"`
+	EnableChunkingRequest     bool   `json:"enable_chunking_request,omitempty"`
+	CompilationConfig         string `json:"compilation_config,omitempty"`
+	DisableSlidingWindowMask  bool   `json:"disable_sliding_window_mask,omitempty"`
+	EnableTRTLLMEngineLatency bool   `json:"enable_trtllm_engine_latency,omitempty"`
+	OverridePoolingConfig     string `json:"override_pooling_config,omitempty"`
+	OverrideNeuronConfig      string `json:"override_neuron_config,omitempty"`
+	OverrideKVCacheALIGNSize  int    `json:"override_kv_cache_align_size,omitempty"`
+}
+
+// BuildCommandArgs converts VllmServerOptions to command line arguments
+// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
+// For vLLM, the model parameter is passed as a positional argument, not a --model flag
+func (o *VllmServerOptions) BuildCommandArgs() []string {
+	var args []string
+
+	// Add model as positional argument if specified
+	if o.Model != "" {
+		args = append(args, o.Model)
+	}
+
+	// Create a copy of the options without the Model field to avoid including it as --model flag
+	optionsCopy := *o
+	optionsCopy.Model = "" // Clear model field so it won't be included as a flag
+
+	multipleFlags := map[string]bool{
+		"api-key":         true,
+		"allowed-origins": true,
+		"allowed-methods": true,
+		"allowed-headers": true,
+		"middleware":      true,
+	}
+
+	// Build the rest of the arguments as flags
+	flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
+	args = append(args, flagArgs...)
+
+	return args
+}
+
+// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
+// Supports multiple formats:
+// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
+// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
+// 3. Serve only: "serve --model MODEL_NAME --other-args"
+// 4. Args only: "--model MODEL_NAME --other-args"
+// 5. Multiline commands with backslashes
+func ParseVllmCommand(command string) (*VllmServerOptions, error) {
+	executableNames := []string{"vllm"}
+	subcommandNames := []string{"serve"}
+	multiValuedFlags := map[string]bool{
+		"middleware":      true,
+		"api_key":         true,
+		"allowed_origins": true,
+		"allowed_methods": true,
+		"allowed_headers": true,
+		"lora_modules":    true,
+		"prompt_adapters": true,
+	}
+
+	var vllmOptions VllmServerOptions
+	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
+		return nil, err
+	}
+
+	return &vllmOptions, nil
+}
--- a/pkg/backends/vllm/vllm_test.go
+++ b/pkg/backends/vllm/vllm_test.go
@@ -0,0 +1,153 @@
+package vllm_test
+
+import (
+	"llamactl/pkg/backends/vllm"
+	"slices"
+	"testing"
+)
+
+func TestParseVllmCommand(t *testing.T) {
+	tests := []struct {
+		name      string
+		command   string
+		expectErr bool
+	}{
+		{
+			name:      "basic vllm serve command",
+			command:   "vllm serve microsoft/DialoGPT-medium",
+			expectErr: false,
+		},
+		{
+			name:      "serve only command",
+			command:   "serve microsoft/DialoGPT-medium",
+			expectErr: false,
+		},
+		{
+			name:      "positional model with flags",
+			command:   "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
+			expectErr: false,
+		},
+		{
+			name:      "model with path",
+			command:   "vllm serve /path/to/model --gpu-memory-utilization 0.8",
+			expectErr: false,
+		},
+		{
+			name:      "empty command",
+			command:   "",
+			expectErr: true,
+		},
+		{
+			name:      "unterminated quote",
+			command:   `vllm serve "unterminated`,
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := vllm.ParseVllmCommand(tt.command)
+
+			if tt.expectErr {
+				if err == nil {
+					t.Errorf("expected error but got none")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("unexpected error: %v", err)
+				return
+			}
+
+			if result == nil {
+				t.Errorf("expected result but got nil")
+			}
+		})
+	}
+}
+
+func TestParseVllmCommandValues(t *testing.T) {
+	command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
+	result, err := vllm.ParseVllmCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "test-model" {
+		t.Errorf("expected model 'test-model', got '%s'", result.Model)
+	}
+	if result.TensorParallelSize != 4 {
+		t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
+	}
+	if result.GPUMemoryUtilization != 0.8 {
+		t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
+	}
+	if !result.EnableLogOutputs {
+		t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
+	}
+}
+
+func TestBuildCommandArgs(t *testing.T) {
+	options := vllm.VllmServerOptions{
+		Model:                "microsoft/DialoGPT-medium",
+		Port:                 8080,
+		Host:                 "localhost",
+		TensorParallelSize:   2,
+		GPUMemoryUtilization: 0.8,
+		EnableLogOutputs:     true,
+		AllowedOrigins:       []string{"http://localhost:3000", "https://example.com"},
+	}
+
+	args := options.BuildCommandArgs()
+
+	// Check that model is the first positional argument (not a --model flag)
+	if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
+		t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
+	}
+
+	// Check that --model flag is NOT present (since model should be positional)
+	if contains(args, "--model") {
+		t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
+	}
+
+	// Check other flags
+	if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
+		t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
+	}
+	if !contains(args, "--enable-log-outputs") {
+		t.Errorf("Expected --enable-log-outputs not found in %v", args)
+	}
+	if !contains(args, "--host") {
+		t.Errorf("Expected --host not found in %v", args)
+	}
+	if !contains(args, "--port") {
+		t.Errorf("Expected --port not found in %v", args)
+	}
+
+	// Check array handling (multiple flags)
+	allowedOriginsCount := 0
+	for i := range args {
+		if args[i] == "--allowed-origins" {
+			allowedOriginsCount++
+		}
+	}
+	if allowedOriginsCount != 2 {
+		t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
+	}
+}
+
+// Helper functions
+func contains(slice []string, item string) bool {
+	return slices.Contains(slice, item)
+}
+
+func containsFlagWithValue(args []string, flag, value string) bool {
+	for i, arg := range args {
+		if arg == flag && i+1 < len(args) && args[i+1] == value {
+			return true
+		}
+	}
+	return false
+}
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -17,6 +17,9 @@ type BackendConfig struct {

 	// Path to mlx_lm executable (MLX-LM backend)
 	MLXLMExecutable string `yaml:"mlx_lm_executable"`
+
+	// Path to vllm executable (vLLM backend)
+	VllmExecutable string `yaml:"vllm_executable"`
 }

 // AppConfig represents the configuration for llamactl
@@ -122,6 +125,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
 		Backends: BackendConfig{
 			LlamaExecutable: "llama-server",
 			MLXLMExecutable: "mlx_lm.server",
+			VllmExecutable:  "vllm",
 		},
 		Instances: InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
@@ -246,6 +250,9 @@ func loadEnvVars(cfg *AppConfig) {
 	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
 		cfg.Backends.MLXLMExecutable = mlxLMExec
 	}
+	if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
+		cfg.Backends.VllmExecutable = vllmExec
+	}
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
 			cfg.Instances.DefaultAutoRestart = b
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -105,6 +105,10 @@ func (i *Process) GetPort() int {
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Port
 			}
+		case backends.BackendTypeVllm:
+			if i.options.VllmServerOptions != nil {
+				return i.options.VllmServerOptions.Port
+			}
 		}
 	}
 	return 0
@@ -123,6 +127,10 @@ func (i *Process) GetHost() string {
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Host
 			}
+		case backends.BackendTypeVllm:
+			if i.options.VllmServerOptions != nil {
+				return i.options.VllmServerOptions.Host
+			}
 		}
 	}
 	return ""
@@ -176,6 +184,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 			host = i.options.MlxServerOptions.Host
 			port = i.options.MlxServerOptions.Port
 		}
+	case backends.BackendTypeVllm:
+		if i.options.VllmServerOptions != nil {
+			host = i.options.VllmServerOptions.Host
+			port = i.options.VllmServerOptions.Port
+		}
 	}

 	targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -52,6 +52,8 @@ func (i *Process) Start() error {
 		executable = i.globalBackendSettings.LlamaExecutable
 	case backends.BackendTypeMlxLm:
 		executable = i.globalBackendSettings.MLXLMExecutable
+	case backends.BackendTypeVllm:
+		executable = i.globalBackendSettings.VllmExecutable
 	default:
 		return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
 	}
@@ -200,6 +202,11 @@ func (i *Process) WaitForHealthy(timeout int) error {
 			host = opts.MlxServerOptions.Host
 			port = opts.MlxServerOptions.Port
 		}
+	case backends.BackendTypeVllm:
+		if opts.VllmServerOptions != nil {
+			host = opts.VllmServerOptions.Host
+			port = opts.VllmServerOptions.Port
+		}
 	}
 	if host == "" {
 		host = "localhost"
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -6,6 +6,7 @@ import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
+	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
 	"log"
 )
@@ -26,6 +27,7 @@ type CreateInstanceOptions struct {
 	// Backend-specific options
 	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
 	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
+	VllmServerOptions  *vllm.VllmServerOptions      `json:"-"`
 }

 // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -69,6 +71,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 				return fmt.Errorf("failed to unmarshal MLX options: %w", err)
 			}
 		}
+	case backends.BackendTypeVllm:
+		if c.BackendOptions != nil {
+			optionsData, err := json.Marshal(c.BackendOptions)
+			if err != nil {
+				return fmt.Errorf("failed to marshal backend options: %w", err)
+			}
+
+			c.VllmServerOptions = &vllm.VllmServerOptions{}
+			if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
+				return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
+			}
+		}
 	default:
 		return fmt.Errorf("unknown backend type: %s", c.BackendType)
 	}
@@ -114,6 +128,20 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}

+			aux.BackendOptions = backendOpts
+		}
+	case backends.BackendTypeVllm:
+		if c.VllmServerOptions != nil {
+			data, err := json.Marshal(c.VllmServerOptions)
+			if err != nil {
+				return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
+			}
+
+			var backendOpts map[string]any
+			if err := json.Unmarshal(data, &backendOpts); err != nil {
+				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
+			}
+
 			aux.BackendOptions = backendOpts
 		}
 	}
@@ -171,6 +199,13 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
 		if c.MlxServerOptions != nil {
 			return c.MlxServerOptions.BuildCommandArgs()
 		}
+	case backends.BackendTypeVllm:
+		if c.VllmServerOptions != nil {
+			// Prepend "serve" as first argument
+			args := []string{"serve"}
+			args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
+			return args
+		}
 	}
 	return []string{}
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -264,6 +264,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
 		if options.MlxServerOptions != nil {
 			return options.MlxServerOptions.Port
 		}
+	case backends.BackendTypeVllm:
+		if options.VllmServerOptions != nil {
+			return options.VllmServerOptions.Port
+		}
 	}
 	return 0
 }
@@ -279,6 +283,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
 		if options.MlxServerOptions != nil {
 			options.MlxServerOptions.Port = port
 		}
+	case backends.BackendTypeVllm:
+		if options.VllmServerOptions != nil {
+			options.VllmServerOptions.Port = port
+		}
 	}
 }

--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -8,6 +8,7 @@ import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
+	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
@@ -739,3 +740,56 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 		}
 	}
 }
+
+// ParseVllmCommand godoc
+// @Summary Parse vllm serve command
+// @Description Parses a vLLM serve command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Router /backends/vllm/parse-command [post]
+func (h *Handler) ParseVllmCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+
+		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+
+		backendType := backends.BackendTypeVllm
+
+		options := &instance.CreateInstanceOptions{
+			BackendType:       backendType,
+			VllmServerOptions: vllmOptions,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -58,6 +58,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			r.Route("/mlx", func(r chi.Router) {
 				r.Post("/parse-command", handler.ParseMlxCommand())
 			})
+			r.Route("/vllm", func(r chi.Router) {
+				r.Post("/parse-command", handler.ParseVllmCommand())
+			})
 		})

 		// Instance management endpoints
--- a/pkg/validation/validation.go
+++ b/pkg/validation/validation.go
@@ -46,6 +46,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
 		return validateLlamaCppOptions(options)
 	case backends.BackendTypeMlxLm:
 		return validateMlxOptions(options)
+	case backends.BackendTypeVllm:
+		return validateVllmOptions(options)
 	default:
 		return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
 	}
@@ -88,6 +90,25 @@ func validateMlxOptions(options *instance.CreateInstanceOptions) error {
 	return nil
 }

+// validateVllmOptions validates vLLM backend specific options
+func validateVllmOptions(options *instance.CreateInstanceOptions) error {
+	if options.VllmServerOptions == nil {
+		return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
+	}
+
+	// Use reflection to check all string fields for injection patterns
+	if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
+		return err
+	}
+
+	// Basic network validation for port
+	if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
+		return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
+	}
+
+	return nil
+}
+
 // validateStructStrings recursively validates all string fields in a struct
 func validateStructStrings(v any, fieldPath string) error {
 	val := reflect.ValueOf(v)
--- a/webui/src/components/BackendBadge.tsx
+++ b/webui/src/components/BackendBadge.tsx
@@ -0,0 +1,65 @@
+import React from "react";
+import { Badge } from "@/components/ui/badge";
+import { BackendType, type BackendTypeValue } from "@/types/instance";
+import { Cpu, Zap, Server } from "lucide-react";
+
+interface BackendBadgeProps {
+  backend?: BackendTypeValue;
+}
+
+const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
+  if (!backend) {
+    return null;
+  }
+
+  const getIcon = () => {
+    switch (backend) {
+      case BackendType.LLAMA_CPP:
+        return <Cpu className="h-3 w-3" />;
+      case BackendType.MLX_LM:
+        return <Zap className="h-3 w-3" />;
+      case BackendType.VLLM:
+        return <Server className="h-3 w-3" />;
+      default:
+        return <Server className="h-3 w-3" />;
+    }
+  };
+
+  const getText = () => {
+    switch (backend) {
+      case BackendType.LLAMA_CPP:
+        return "llama.cpp";
+      case BackendType.MLX_LM:
+        return "MLX";
+      case BackendType.VLLM:
+        return "vLLM";
+      default:
+        return backend;
+    }
+  };
+
+  const getVariant = () => {
+    switch (backend) {
+      case BackendType.LLAMA_CPP:
+        return "secondary";
+      case BackendType.MLX_LM:
+        return "outline";
+      case BackendType.VLLM:
+        return "default";
+      default:
+        return "secondary";
+    }
+  };
+
+  return (
+    <Badge
+      variant={getVariant()}
+      className="flex items-center gap-1.5"
+    >
+      {getIcon()}
+      <span className="text-xs">{getText()}</span>
+    </Badge>
+  );
+};
+
+export default BackendBadge;
--- a/webui/src/components/BackendFormField.tsx
+++ b/webui/src/components/BackendFormField.tsx
@@ -45,7 +45,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
-              {config.required && <span className="text-red-500 ml-1">*</span>}
            </Label>
            <Input
              id={fieldKey}
@@ -72,7 +71,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
-              {config.required && <span className="text-red-500 ml-1">*</span>}
            </Label>
            <Input
              id={fieldKey}
@@ -99,7 +97,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
-              {config.required && <span className="text-red-500 ml-1">*</span>}
            </Label>
            <Input
              id={fieldKey}
--- a/webui/src/components/InstanceCard.tsx
+++ b/webui/src/components/InstanceCard.tsx
@@ -5,6 +5,7 @@ import type { Instance } from "@/types/instance";
 import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
 import LogsDialog from "@/components/LogDialog";
 import HealthBadge from "@/components/HealthBadge";
+import BackendBadge from "@/components/BackendBadge";
 import { useState } from "react";
 import { useInstanceHealth } from "@/hooks/useInstanceHealth";

@@ -58,7 +59,10 @@ function InstanceCard({
        <CardHeader className="pb-3">
          <div className="flex items-center justify-between">
            <CardTitle className="text-lg">{instance.name}</CardTitle>
-            {running && <HealthBadge health={health} />}
+            <div className="flex flex-col items-end gap-2">
+              {running && <HealthBadge health={health} />}
+              <BackendBadge backend={instance.options?.backend_type} />
+            </div>
          </div>
        </CardHeader>

--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -11,11 +11,13 @@ import {
  DialogTitle,
 } from "@/components/ui/dialog";
 import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
-import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
+import { getAdvancedFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
 import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
-import ZodFormField from "@/components/ZodFormField";
-import BackendFormField from "@/components/BackendFormField";
 import ParseCommandDialog from "@/components/ParseCommandDialog";
+import AutoRestartConfiguration from "@/components/instance/AutoRestartConfiguration";
+import BasicInstanceFields from "@/components/instance/BasicInstanceFields";
+import BackendConfiguration from "@/components/instance/BackendConfiguration";
+import AdvancedInstanceFields from "@/components/instance/AdvancedInstanceFields";

 interface InstanceDialogProps {
  open: boolean;
@@ -39,9 +41,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  const [showParseDialog, setShowParseDialog] = useState(false);

  // Get field lists dynamically from the type
-  const basicFields = getBasicFields();
  const advancedFields = getAdvancedFields();
-  const basicBackendFields = getBasicBackendFields(formData.backend_type);
  const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);

  // Reset form when dialog opens/closes or when instance changes
@@ -163,8 +163,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
    setShowParseDialog(false);
  };

-  // Check if auto_restart is enabled
-  const isAutoRestartEnabled = formData.auto_restart === true;

  // Save button label logic
  let saveButtonLabel = "Create Instance";
@@ -212,70 +210,23 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
            </div>

            {/* Auto Restart Configuration Section */}
-            <div className="space-y-4">
-              <h3 className="text-lg font-medium">
-                Auto Restart Configuration
-              </h3>
+            <AutoRestartConfiguration
+              formData={formData}
+              onChange={handleFieldChange}
+            />

-              {/* Auto Restart Toggle */}
-              <ZodFormField
-                fieldKey="auto_restart"
-                value={formData.auto_restart}
-                onChange={handleFieldChange}
-              />
-
-              {/* Show restart options only when auto restart is enabled */}
-              {isAutoRestartEnabled && (
-                <div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
-                  <ZodFormField
-                    fieldKey="max_restarts"
-                    value={formData.max_restarts}
-                    onChange={handleFieldChange}
-                  />
-                  <ZodFormField
-                    fieldKey="restart_delay"
-                    value={formData.restart_delay}
-                    onChange={handleFieldChange}
-                  />
-                </div>
-              )}
-            </div>
-
-            {/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
-            <div className="space-y-4">
-              <h3 className="text-lg font-medium">Basic Configuration</h3>
-              {basicFields
-                .filter(
-                  (fieldKey) =>
-                    fieldKey !== "auto_restart" &&
-                    fieldKey !== "max_restarts" &&
-                    fieldKey !== "restart_delay" &&
-                    fieldKey !== "backend_options" // backend_options is handled separately
-                ) 
-                .map((fieldKey) => (
-                  <ZodFormField
-                    key={fieldKey}
-                    fieldKey={fieldKey}
-                    value={formData[fieldKey]}
-                    onChange={handleFieldChange}
-                  />
-                ))}
-            </div>
+            {/* Basic Fields */}
+            <BasicInstanceFields
+              formData={formData}
+              onChange={handleFieldChange}
+            />

            {/* Backend Configuration Section */}
-            <div className="space-y-4">
-              <h3 className="text-lg font-medium">Backend Configuration</h3>
-              
-              {/* Basic backend fields */}
-              {basicBackendFields.map((fieldKey) => (
-                <BackendFormField
-                  key={fieldKey}
-                  fieldKey={fieldKey}
-                  value={(formData.backend_options as any)?.[fieldKey]}
-                  onChange={handleBackendFieldChange}
-                />
-              ))}
-            </div>
+            <BackendConfiguration
+              formData={formData}
+              onBackendFieldChange={handleBackendFieldChange}
+              showAdvanced={showAdvanced}
+            />

            {/* Advanced Fields Toggle */}
            <div className="border-t pt-4">
@@ -314,54 +265,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
              </div>
            </div>

-            {/* Advanced Fields - Automatically generated from type (excluding restart options) */}
+            {/* Advanced Fields */}
            {showAdvanced && (
              <div className="space-y-4 pl-6 border-l-2 border-muted">
-                {/* Advanced instance fields */}
-                {advancedFields
-                  .filter(
-                    (fieldKey) =>
-                      !["max_restarts", "restart_delay", "backend_options"].includes(
-                        fieldKey as string
-                      )
-                  ).length > 0 && (
-                  <div className="space-y-4">
-                    <h4 className="text-md font-medium">Advanced Instance Configuration</h4>
-                    {advancedFields
-                      .filter(
-                        (fieldKey) =>
-                          !["max_restarts", "restart_delay", "backend_options"].includes(
-                            fieldKey as string
-                          )
-                      )
-                      .sort()
-                      .map((fieldKey) => (
-                        <ZodFormField
-                          key={fieldKey}
-                          fieldKey={fieldKey}
-                          value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
-                          onChange={handleFieldChange}
-                        />
-                      ))}
-                  </div>
-                )}
-
-                {/* Advanced backend fields */}
-                {advancedBackendFields.length > 0 && (
-                  <div className="space-y-4">
-                    <h4 className="text-md font-medium">Advanced Backend Configuration</h4>
-                    {advancedBackendFields
-                      .sort()
-                      .map((fieldKey) => (
-                        <BackendFormField
-                          key={fieldKey}
-                          fieldKey={fieldKey}
-                          value={(formData.backend_options as any)?.[fieldKey]}
-                          onChange={handleBackendFieldChange}
-                        />
-                      ))}
-                  </div>
-                )}
+                <AdvancedInstanceFields
+                  formData={formData}
+                  onChange={handleFieldChange}
+                />
              </div>
            )}
          </div>
--- a/webui/src/components/ParseCommandDialog.tsx
+++ b/webui/src/components/ParseCommandDialog.tsx
@@ -9,7 +9,7 @@ import {
  DialogHeader,
  DialogTitle,
 } from "@/components/ui/dialog";
-import { type CreateInstanceOptions } from "@/types/instance";
+import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
 import { backendsApi } from "@/lib/api";
 import { toast } from "sonner";

@@ -25,6 +25,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
  onParsed,
 }) => {
  const [command, setCommand] = useState('');
+  const [backendType, setBackendType] = useState<BackendTypeValue>(BackendType.LLAMA_CPP);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState<string | null>(null);

@@ -38,18 +39,31 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
    setError(null);

    try {
-      const options = await backendsApi.llamaCpp.parseCommand(command);
+      let options: CreateInstanceOptions;
+
+      // Parse based on selected backend type
+      switch (backendType) {
+        case BackendType.LLAMA_CPP:
+          options = await backendsApi.llamaCpp.parseCommand(command);
+          break;
+        case BackendType.MLX_LM:
+          options = await backendsApi.mlx.parseCommand(command);
+          break;
+        case BackendType.VLLM:
+          options = await backendsApi.vllm.parseCommand(command);
+          break;
+        default:
+          throw new Error(`Unsupported backend type: ${backendType}`);
+      }
+
      onParsed(options);
      onOpenChange(false);
-      // Reset form
      setCommand('');
      setError(null);
-      // Show success toast
      toast.success('Command parsed successfully');
    } catch (err) {
      const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
      setError(errorMessage);
-      // Show error toast
      toast.error('Failed to parse command', {
        description: errorMessage
      });
@@ -60,31 +74,55 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({

  const handleOpenChange = (open: boolean) => {
    if (!open) {
-      // Reset form when closing
      setCommand('');
+      setBackendType(BackendType.LLAMA_CPP);
      setError(null);
    }
    onOpenChange(open);
  };

+  const backendPlaceholders: Record<BackendTypeValue, string> = {
+    [BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
+    [BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
+    [BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
+  };
+
+  const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
+    return backendPlaceholders[backendType] || "Enter your command here...";
+  };
+
  return (
    <Dialog open={open} onOpenChange={handleOpenChange}>
      <DialogContent className="sm:max-w-[600px]">
        <DialogHeader>
-          <DialogTitle>Parse Llama Server Command</DialogTitle>
+          <DialogTitle>Parse Backend Command</DialogTitle>
          <DialogDescription>
-            Paste your llama-server command to automatically populate the form fields
+            Select your backend type and paste the command to automatically populate the form fields
          </DialogDescription>
        </DialogHeader>

        <div className="space-y-4">
+          <div>
+            <Label htmlFor="backend-type">Backend Type</Label>
+            <select
+              id="backend-type"
+              value={backendType}
+              onChange={(e) => setBackendType(e.target.value as BackendTypeValue)}
+              className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
+            >
+              <option value={BackendType.LLAMA_CPP}>Llama Server</option>
+              <option value={BackendType.MLX_LM}>MLX LM</option>
+              <option value={BackendType.VLLM}>vLLM</option>
+            </select>
+          </div>
+
          <div>
            <Label htmlFor="command">Command</Label>
            <textarea
              id="command"
              value={command}
              onChange={(e) => setCommand(e.target.value)}
-              placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
+              placeholder={getPlaceholderForBackend(backendType)}
              className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
            />
          </div>
--- a/webui/src/components/ZodFormField.tsx
+++ b/webui/src/components/ZodFormField.tsx
@@ -29,7 +29,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
        <div className="grid gap-2">
          <Label htmlFor={fieldKey}>
            {config.label}
-            {config.required && <span className="text-red-500 ml-1">*</span>}
          </Label>
          <select
            id={fieldKey}
@@ -39,6 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          >
            <option value={BackendType.LLAMA_CPP}>Llama Server</option>
            <option value={BackendType.MLX_LM}>MLX LM</option>
+            <option value={BackendType.VLLM}>vLLM</option>
          </select>
          {config.description && (
            <p className="text-sm text-muted-foreground">{config.description}</p>
@@ -70,8 +70,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
-              {config.required && <span className="text-red-500 ml-1">*</span>}
-            </Label>
+              </Label>
            <Input
              id={fieldKey}
              type="number"
@@ -97,8 +96,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
-              {config.required && <span className="text-red-500 ml-1">*</span>}
-            </Label>
+              </Label>
            <Input
              id={fieldKey}
              type="text"
@@ -124,8 +122,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
-              {config.required && <span className="text-red-500 ml-1">*</span>}
-            </Label>
+              </Label>
            <Input
              id={fieldKey}
              type="text"
--- a/webui/src/components/form/ArrayInput.tsx
+++ b/webui/src/components/form/ArrayInput.tsx
@@ -0,0 +1,62 @@
+import React from 'react'
+import { Input } from '@/components/ui/input'
+import { Label } from '@/components/ui/label'
+
+interface ArrayInputProps {
+  id: string
+  label: string
+  value: string[] | undefined
+  onChange: (value: string[] | undefined) => void
+  placeholder?: string
+  description?: string
+  disabled?: boolean
+  className?: string
+}
+
+const ArrayInput: React.FC<ArrayInputProps> = ({
+  id,
+  label,
+  value,
+  onChange,
+  placeholder = "item1, item2, item3",
+  description,
+  disabled = false,
+  className
+}) => {
+  const handleChange = (inputValue: string) => {
+    if (inputValue === '') {
+      onChange(undefined)
+      return
+    }
+
+    const arrayValue = inputValue
+      .split(',')
+      .map(s => s.trim())
+      .filter(Boolean)
+
+    onChange(arrayValue.length > 0 ? arrayValue : undefined)
+  }
+
+  return (
+    <div className="grid gap-2">
+      <Label htmlFor={id}>
+        {label}
+      </Label>
+      <Input
+        id={id}
+        type="text"
+        value={Array.isArray(value) ? value.join(', ') : ''}
+        onChange={(e) => handleChange(e.target.value)}
+        placeholder={placeholder}
+        disabled={disabled}
+        className={className}
+      />
+      {description && (
+        <p className="text-sm text-muted-foreground">{description}</p>
+      )}
+      <p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
+    </div>
+  )
+}
+
+export default ArrayInput
--- a/webui/src/components/form/CheckboxInput.tsx
+++ b/webui/src/components/form/CheckboxInput.tsx
@@ -0,0 +1,42 @@
+import React from 'react'
+import { Checkbox } from '@/components/ui/checkbox'
+import { Label } from '@/components/ui/label'
+
+interface CheckboxInputProps {
+  id: string
+  label: string
+  value: boolean | undefined
+  onChange: (value: boolean) => void
+  description?: string
+  disabled?: boolean
+  className?: string
+}
+
+const CheckboxInput: React.FC<CheckboxInputProps> = ({
+  id,
+  label,
+  value,
+  onChange,
+  description,
+  disabled = false,
+  className
+}) => {
+  return (
+    <div className={`flex items-center space-x-2 ${className || ''}`}>
+      <Checkbox
+        id={id}
+        checked={value === true}
+        onCheckedChange={(checked) => onChange(!!checked)}
+        disabled={disabled}
+      />
+      <Label htmlFor={id} className="text-sm font-normal">
+        {label}
+        {description && (
+          <span className="text-muted-foreground ml-1">- {description}</span>
+        )}
+      </Label>
+    </div>
+  )
+}
+
+export default CheckboxInput
--- a/webui/src/components/form/NumberInput.tsx
+++ b/webui/src/components/form/NumberInput.tsx
@@ -0,0 +1,60 @@
+import React from 'react'
+import { Input } from '@/components/ui/input'
+import { Label } from '@/components/ui/label'
+
+interface NumberInputProps {
+  id: string
+  label: string
+  value: number | undefined
+  onChange: (value: number | undefined) => void
+  placeholder?: string
+  description?: string
+  disabled?: boolean
+  className?: string
+}
+
+const NumberInput: React.FC<NumberInputProps> = ({
+  id,
+  label,
+  value,
+  onChange,
+  placeholder,
+  description,
+  disabled = false,
+  className
+}) => {
+  const handleChange = (inputValue: string) => {
+    if (inputValue === '') {
+      onChange(undefined)
+      return
+    }
+
+    const numValue = parseFloat(inputValue)
+    if (!isNaN(numValue)) {
+      onChange(numValue)
+    }
+  }
+
+  return (
+    <div className="grid gap-2">
+      <Label htmlFor={id}>
+        {label}
+      </Label>
+      <Input
+        id={id}
+        type="number"
+        step="any"
+        value={value !== undefined ? value : ''}
+        onChange={(e) => handleChange(e.target.value)}
+        placeholder={placeholder}
+        disabled={disabled}
+        className={className}
+      />
+      {description && (
+        <p className="text-sm text-muted-foreground">{description}</p>
+      )}
+    </div>
+  )
+}
+
+export default NumberInput
--- a/webui/src/components/form/SelectInput.tsx
+++ b/webui/src/components/form/SelectInput.tsx
@@ -0,0 +1,55 @@
+import React from 'react'
+import { Label } from '@/components/ui/label'
+
+interface SelectOption {
+  value: string
+  label: string
+}
+
+interface SelectInputProps {
+  id: string
+  label: string
+  value: string | undefined
+  onChange: (value: string | undefined) => void
+  options: SelectOption[]
+  description?: string
+  disabled?: boolean
+  className?: string
+}
+
+const SelectInput: React.FC<SelectInputProps> = ({
+  id,
+  label,
+  value,
+  onChange,
+  options,
+  description,
+  disabled = false,
+  className
+}) => {
+  return (
+    <div className="grid gap-2">
+      <Label htmlFor={id}>
+        {label}
+      </Label>
+      <select
+        id={id}
+        value={value || ''}
+        onChange={(e) => onChange(e.target.value || undefined)}
+        disabled={disabled}
+        className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
+      >
+        {options.map(option => (
+          <option key={option.value} value={option.value}>
+            {option.label}
+          </option>
+        ))}
+      </select>
+      {description && (
+        <p className="text-sm text-muted-foreground">{description}</p>
+      )}
+    </div>
+  )
+}
+
+export default SelectInput
--- a/webui/src/components/form/TextInput.tsx
+++ b/webui/src/components/form/TextInput.tsx
@@ -0,0 +1,47 @@
+import React from 'react'
+import { Input } from '@/components/ui/input'
+import { Label } from '@/components/ui/label'
+
+interface TextInputProps {
+  id: string
+  label: string
+  value: string | number | undefined
+  onChange: (value: string | undefined) => void
+  placeholder?: string
+  description?: string
+  disabled?: boolean
+  className?: string
+}
+
+const TextInput: React.FC<TextInputProps> = ({
+  id,
+  label,
+  value,
+  onChange,
+  placeholder,
+  description,
+  disabled = false,
+  className
+}) => {
+  return (
+    <div className="grid gap-2">
+      <Label htmlFor={id}>
+        {label}
+      </Label>
+      <Input
+        id={id}
+        type="text"
+        value={typeof value === 'string' || typeof value === 'number' ? value : ''}
+        onChange={(e) => onChange(e.target.value || undefined)}
+        placeholder={placeholder}
+        disabled={disabled}
+        className={className}
+      />
+      {description && (
+        <p className="text-sm text-muted-foreground">{description}</p>
+      )}
+    </div>
+  )
+}
+
+export default TextInput
--- a/webui/src/components/instance/AdvancedInstanceFields.tsx
+++ b/webui/src/components/instance/AdvancedInstanceFields.tsx
@@ -0,0 +1,98 @@
+import React from 'react'
+import type { CreateInstanceOptions } from '@/types/instance'
+import { getAdvancedFields, basicFieldsConfig } from '@/lib/zodFormUtils'
+import { getFieldType } from '@/schemas/instanceOptions'
+import TextInput from '@/components/form/TextInput'
+import NumberInput from '@/components/form/NumberInput'
+import CheckboxInput from '@/components/form/CheckboxInput'
+import ArrayInput from '@/components/form/ArrayInput'
+
+interface AdvancedInstanceFieldsProps {
+  formData: CreateInstanceOptions
+  onChange: (key: keyof CreateInstanceOptions, value: any) => void
+}
+
+const AdvancedInstanceFields: React.FC<AdvancedInstanceFieldsProps> = ({
+  formData,
+  onChange
+}) => {
+  const advancedFields = getAdvancedFields()
+
+  const renderField = (fieldKey: keyof CreateInstanceOptions) => {
+    const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
+    const fieldType = getFieldType(fieldKey)
+
+    switch (fieldType) {
+      case 'boolean':
+        return (
+          <CheckboxInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as boolean | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            description={config.description}
+          />
+        )
+
+      case 'number':
+        return (
+          <NumberInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as number | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            placeholder={config.placeholder}
+            description={config.description}
+          />
+        )
+
+      case 'array':
+        return (
+          <ArrayInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as string[] | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            placeholder={config.placeholder}
+            description={config.description}
+          />
+        )
+
+      default:
+        return (
+          <TextInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as string | number | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            placeholder={config.placeholder}
+            description={config.description}
+          />
+        )
+    }
+  }
+
+  // Filter out restart options and backend_options (handled separately)
+  const fieldsToRender = advancedFields.filter(
+    fieldKey => !['max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
+  )
+
+  if (fieldsToRender.length === 0) {
+    return null
+  }
+
+  return (
+    <div className="space-y-4">
+      <h4 className="text-md font-medium">Advanced Instance Configuration</h4>
+      {fieldsToRender
+        .sort()
+        .map(renderField)}
+    </div>
+  )
+}
+
+export default AdvancedInstanceFields
--- a/webui/src/components/instance/AutoRestartConfiguration.tsx
+++ b/webui/src/components/instance/AutoRestartConfiguration.tsx
@@ -0,0 +1,53 @@
+import React from 'react'
+import type { CreateInstanceOptions } from '@/types/instance'
+import CheckboxInput from '@/components/form/CheckboxInput'
+import NumberInput from '@/components/form/NumberInput'
+
+interface AutoRestartConfigurationProps {
+  formData: CreateInstanceOptions
+  onChange: (key: keyof CreateInstanceOptions, value: any) => void
+}
+
+const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
+  formData,
+  onChange
+}) => {
+  const isAutoRestartEnabled = formData.auto_restart === true
+
+  return (
+    <div className="space-y-4">
+      <h3 className="text-lg font-medium">Auto Restart Configuration</h3>
+
+      <CheckboxInput
+        id="auto_restart"
+        label="Auto Restart"
+        value={formData.auto_restart}
+        onChange={(value) => onChange('auto_restart', value)}
+        description="Automatically restart the instance on failure"
+      />
+
+      {isAutoRestartEnabled && (
+        <div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
+          <NumberInput
+            id="max_restarts"
+            label="Max Restarts"
+            value={formData.max_restarts}
+            onChange={(value) => onChange('max_restarts', value)}
+            placeholder="3"
+            description="Maximum number of restart attempts (0 = unlimited)"
+          />
+          <NumberInput
+            id="restart_delay"
+            label="Restart Delay (seconds)"
+            value={formData.restart_delay}
+            onChange={(value) => onChange('restart_delay', value)}
+            placeholder="5"
+            description="Delay in seconds before attempting restart"
+          />
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default AutoRestartConfiguration
--- a/webui/src/components/instance/BackendConfiguration.tsx
+++ b/webui/src/components/instance/BackendConfiguration.tsx
@@ -0,0 +1,54 @@
+import React from 'react'
+import type { CreateInstanceOptions } from '@/types/instance'
+import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
+import BackendFormField from '@/components/BackendFormField'
+
+interface BackendConfigurationProps {
+  formData: CreateInstanceOptions
+  onBackendFieldChange: (key: string, value: any) => void
+  showAdvanced?: boolean
+}
+
+const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
+  formData,
+  onBackendFieldChange,
+  showAdvanced = false
+}) => {
+  const basicBackendFields = getBasicBackendFields(formData.backend_type)
+  const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
+
+  return (
+    <div className="space-y-4">
+      <h3 className="text-lg font-medium">Backend Configuration</h3>
+
+      {/* Basic backend fields */}
+      {basicBackendFields.map((fieldKey) => (
+        <BackendFormField
+          key={fieldKey}
+          fieldKey={fieldKey}
+          value={(formData.backend_options as any)?.[fieldKey]}
+          onChange={onBackendFieldChange}
+        />
+      ))}
+
+      {/* Advanced backend fields */}
+      {showAdvanced && advancedBackendFields.length > 0 && (
+        <div className="space-y-4 pl-6 border-l-2 border-muted">
+          <h4 className="text-md font-medium">Advanced Backend Configuration</h4>
+          {advancedBackendFields
+            .sort()
+            .map((fieldKey) => (
+              <BackendFormField
+                key={fieldKey}
+                fieldKey={fieldKey}
+                value={(formData.backend_options as any)?.[fieldKey]}
+                onChange={onBackendFieldChange}
+              />
+            ))}
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default BackendConfiguration
--- a/webui/src/components/instance/BasicInstanceFields.tsx
+++ b/webui/src/components/instance/BasicInstanceFields.tsx
@@ -0,0 +1,99 @@
+import React from 'react'
+import { BackendType, type CreateInstanceOptions } from '@/types/instance'
+import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
+import { getFieldType } from '@/schemas/instanceOptions'
+import TextInput from '@/components/form/TextInput'
+import NumberInput from '@/components/form/NumberInput'
+import CheckboxInput from '@/components/form/CheckboxInput'
+import SelectInput from '@/components/form/SelectInput'
+
+interface BasicInstanceFieldsProps {
+  formData: CreateInstanceOptions
+  onChange: (key: keyof CreateInstanceOptions, value: any) => void
+}
+
+const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
+  formData,
+  onChange
+}) => {
+  const basicFields = getBasicFields()
+
+  const renderField = (fieldKey: keyof CreateInstanceOptions) => {
+    const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
+    const fieldType = getFieldType(fieldKey)
+
+    // Special handling for backend_type field
+    if (fieldKey === 'backend_type') {
+      return (
+        <SelectInput
+          key={fieldKey}
+          id={fieldKey}
+          label={config.label}
+          value={formData[fieldKey] || BackendType.LLAMA_CPP}
+          onChange={(value) => onChange(fieldKey, value)}
+          options={[
+            { value: BackendType.LLAMA_CPP, label: 'Llama Server' },
+            { value: BackendType.MLX_LM, label: 'MLX LM' },
+            { value: BackendType.VLLM, label: 'vLLM' }
+          ]}
+          description={config.description}
+        />
+      )
+    }
+
+    // Render based on field type
+    switch (fieldType) {
+      case 'boolean':
+        return (
+          <CheckboxInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as boolean | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            description={config.description}
+          />
+        )
+
+      case 'number':
+        return (
+          <NumberInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as number | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            placeholder={config.placeholder}
+            description={config.description}
+          />
+        )
+
+      default:
+        return (
+          <TextInput
+            key={fieldKey}
+            id={fieldKey}
+            label={config.label}
+            value={formData[fieldKey] as string | number | undefined}
+            onChange={(value) => onChange(fieldKey, value)}
+            placeholder={config.placeholder}
+            description={config.description}
+          />
+        )
+    }
+  }
+
+  // Filter out auto restart fields and backend_options (handled separately)
+  const fieldsToRender = basicFields.filter(
+    fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
+  )
+
+  return (
+    <div className="space-y-4">
+      <h3 className="text-lg font-medium">Basic Configuration</h3>
+      {fieldsToRender.map(renderField)}
+    </div>
+  )
+}
+
+export default BasicInstanceFields
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,4 +1,5 @@
 import type { CreateInstanceOptions, Instance } from "@/types/instance";
+import { handleApiError } from "./errorUtils";

 const API_BASE = "/api/v1";

@@ -30,25 +31,8 @@ async function apiCall<T>(
      headers,
    });

-    // Handle authentication errors
-    if (response.status === 401) {
-      throw new Error('Authentication required');
-    }
-
-    if (!response.ok) {
-      // Try to get error message from response
-      let errorMessage = `HTTP ${response.status}`;
-      try {
-        const errorText = await response.text();
-        if (errorText) {
-          errorMessage += `: ${errorText}`;
-        }
-      } catch {
-        // If we can't read the error, just use status
-      }
-
-      throw new Error(errorMessage);
-    }
+    // Handle errors using centralized error handler
+    await handleApiError(response);

    // Handle empty responses (like DELETE)
    if (response.status === 204) {
@@ -60,6 +44,14 @@ async function apiCall<T>(
      const text = await response.text();
      return text as T;
    } else {
+      // Handle empty responses for JSON endpoints
+      const contentLength = response.headers.get('content-length');
+      if (contentLength === '0' || contentLength === null) {
+        const text = await response.text();
+        if (text.trim() === '') {
+          return {} as T; // Return empty object for empty JSON responses
+        }
+      }
      const data = await response.json() as T;
      return data;
    }
@@ -101,6 +93,14 @@ export const backendsApi = {
        body: JSON.stringify({ command }),
      }),
  },
+  vllm: {
+    // POST /backends/vllm/parse-command
+    parseCommand: (command: string) =>
+      apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
+        method: 'POST',
+        body: JSON.stringify({ command }),
+      }),
+  },
 };

 // Instance API functions
--- a/webui/src/lib/errorUtils.ts
+++ b/webui/src/lib/errorUtils.ts
@@ -0,0 +1,32 @@
+/**
+ * Parses error response from API calls and returns a formatted error message
+ */
+export async function parseErrorResponse(response: Response): Promise<string> {
+  let errorMessage = `HTTP ${response.status}`
+
+  try {
+    const errorText = await response.text()
+    if (errorText) {
+      errorMessage += `: ${errorText}`
+    }
+  } catch {
+    // If we can't read the error, just use status
+  }
+
+  return errorMessage
+}
+
+/**
+ * Handles common API call errors and throws appropriate Error objects
+ */
+export async function handleApiError(response: Response): Promise<void> {
+  // Handle authentication errors
+  if (response.status === 401) {
+    throw new Error('Authentication required')
+  }
+
+  if (!response.ok) {
+    const errorMessage = await parseErrorResponse(response)
+    throw new Error(errorMessage)
+  }
+}
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -2,13 +2,17 @@ import {
  type CreateInstanceOptions,
  type LlamaCppBackendOptions,
  type MlxBackendOptions,
+  type VllmBackendOptions,
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
+  VllmBackendOptionsSchema,
  getAllFieldKeys,
  getAllLlamaCppFieldKeys,
  getAllMlxFieldKeys,
+  getAllVllmFieldKeys,
  getLlamaCppFieldType,
-  getMlxFieldType
+  getMlxFieldType,
+  getVllmFieldType
 } from '@/schemas/instanceOptions'

 // Instance-level basic fields (not backend-specific)
@@ -16,7 +20,6 @@ export const basicFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
-  required?: boolean
 }> = {
  auto_restart: {
    label: 'Auto Restart',
@@ -52,13 +55,11 @@ const basicLlamaCppFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
-  required?: boolean
 }> = {
  model: {
    label: 'Model Path',
    placeholder: '/path/to/model.gguf',
-    description: 'Path to the model file',
-    required: true
+    description: 'Path to the model file'
  },
  hf_repo: {
    label: 'Hugging Face Repository',
@@ -82,13 +83,11 @@ const basicMlxFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
-  required?: boolean
 }> = {
  model: {
    label: 'Model',
    placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
-    description: 'The path to the MLX model weights, tokenizer, and config',
-    required: true
+    description: 'The path to the MLX model weights, tokenizer, and config'
  },
  temp: {
    label: 'Temperature',
@@ -117,11 +116,46 @@ const basicMlxFieldsConfig: Record<string, {
  }
 }

+// vLLM backend-specific basic fields
+const basicVllmFieldsConfig: Record<string, {
+  label: string
+  description?: string
+  placeholder?: string
+}> = {
+  model: {
+    label: 'Model',
+    placeholder: 'microsoft/DialoGPT-medium',
+    description: 'The name or path of the Hugging Face model to use'
+  },
+  tensor_parallel_size: {
+    label: 'Tensor Parallel Size',
+    placeholder: '1',
+    description: 'Number of GPUs to use for distributed serving'
+  },
+  gpu_memory_utilization: {
+    label: 'GPU Memory Utilization',
+    placeholder: '0.9',
+    description: 'The fraction of GPU memory to be used for the model executor'
+  }
+}
+
+// Backend field configuration lookup
+const backendFieldConfigs = {
+  mlx_lm: basicMlxFieldsConfig,
+  vllm: basicVllmFieldsConfig,
+  llama_cpp: basicLlamaCppFieldsConfig,
+} as const
+
+const backendFieldGetters = {
+  mlx_lm: getAllMlxFieldKeys,
+  vllm: getAllVllmFieldKeys,
+  llama_cpp: getAllLlamaCppFieldKeys,
+} as const
+
 function isBasicField(key: keyof CreateInstanceOptions): boolean {
  return key in basicFieldsConfig
 }

-
 export function getBasicFields(): (keyof CreateInstanceOptions)[] {
  return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
 }
@@ -130,25 +164,18 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
  return getAllFieldKeys().filter(key => !isBasicField(key))
 }

-
 export function getBasicBackendFields(backendType?: string): string[] {
-  if (backendType === 'mlx_lm') {
-    return Object.keys(basicMlxFieldsConfig)
-  } else if (backendType === 'llama_cpp') {
-    return Object.keys(basicLlamaCppFieldsConfig)
-  }
-  // Default to LlamaCpp for backward compatibility
-  return Object.keys(basicLlamaCppFieldsConfig)
+  const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
+  const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
+  return Object.keys(config)
 }

 export function getAdvancedBackendFields(backendType?: string): string[] {
-  if (backendType === 'mlx_lm') {
-    return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
-  } else if (backendType === 'llama_cpp') {
-    return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
-  }
-  // Default to LlamaCpp for backward compatibility
-  return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
+  const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
+  const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
+  const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
+
+  return fieldGetter().filter(key => !(key in basicConfig))
 }

 // Combined backend fields config for use in BackendFormField
@@ -156,10 +183,10 @@ export const basicBackendFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
-  required?: boolean
 }> = {
  ...basicLlamaCppFieldsConfig,
-  ...basicMlxFieldsConfig
+  ...basicMlxFieldsConfig,
+  ...basicVllmFieldsConfig
 }

 // Get field type for any backend option (union type)
@@ -182,6 +209,15 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
    // Schema might not be available
  }

+  // Try vLLM schema
+  try {
+    if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
+      return getVllmFieldType(key as keyof VllmBackendOptions)
+    }
+  } catch {
+    // Schema might not be available
+  }
+
  // Default fallback
  return 'text'
 }
--- a/webui/src/schemas/backends/index.ts
+++ b/webui/src/schemas/backends/index.ts
@@ -0,0 +1,4 @@
+// Re-export all backend schemas from one place
+export * from './llamacpp'
+export * from './mlx'
+export * from './vllm'
--- a/webui/src/schemas/backends/llamacpp.ts
+++ b/webui/src/schemas/backends/llamacpp.ts
@@ -0,0 +1,192 @@
+import { z } from 'zod'
+
+// Define the LlamaCpp backend options schema
+export const LlamaCppBackendOptionsSchema = z.object({
+  // Common params
+  verbose_prompt: z.boolean().optional(),
+  threads: z.number().optional(),
+  threads_batch: z.number().optional(),
+  cpu_mask: z.string().optional(),
+  cpu_range: z.string().optional(),
+  cpu_strict: z.number().optional(),
+  prio: z.number().optional(),
+  poll: z.number().optional(),
+  cpu_mask_batch: z.string().optional(),
+  cpu_range_batch: z.string().optional(),
+  cpu_strict_batch: z.number().optional(),
+  prio_batch: z.number().optional(),
+  poll_batch: z.number().optional(),
+  ctx_size: z.number().optional(),
+  predict: z.number().optional(),
+  batch_size: z.number().optional(),
+  ubatch_size: z.number().optional(),
+  keep: z.number().optional(),
+  flash_attn: z.boolean().optional(),
+  no_perf: z.boolean().optional(),
+  escape: z.boolean().optional(),
+  no_escape: z.boolean().optional(),
+  rope_scaling: z.string().optional(),
+  rope_scale: z.number().optional(),
+  rope_freq_base: z.number().optional(),
+  rope_freq_scale: z.number().optional(),
+  yarn_orig_ctx: z.number().optional(),
+  yarn_ext_factor: z.number().optional(),
+  yarn_attn_factor: z.number().optional(),
+  yarn_beta_slow: z.number().optional(),
+  yarn_beta_fast: z.number().optional(),
+  dump_kv_cache: z.boolean().optional(),
+  no_kv_offload: z.boolean().optional(),
+  cache_type_k: z.string().optional(),
+  cache_type_v: z.string().optional(),
+  defrag_thold: z.number().optional(),
+  parallel: z.number().optional(),
+  mlock: z.boolean().optional(),
+  no_mmap: z.boolean().optional(),
+  numa: z.string().optional(),
+  device: z.string().optional(),
+  override_tensor: z.array(z.string()).optional(),
+  gpu_layers: z.number().optional(),
+  split_mode: z.string().optional(),
+  tensor_split: z.string().optional(),
+  main_gpu: z.number().optional(),
+  check_tensors: z.boolean().optional(),
+  override_kv: z.array(z.string()).optional(),
+  lora: z.array(z.string()).optional(),
+  lora_scaled: z.array(z.string()).optional(),
+  control_vector: z.array(z.string()).optional(),
+  control_vector_scaled: z.array(z.string()).optional(),
+  control_vector_layer_range: z.string().optional(),
+  model: z.string().optional(),
+  model_url: z.string().optional(),
+  hf_repo: z.string().optional(),
+  hf_repo_draft: z.string().optional(),
+  hf_file: z.string().optional(),
+  hf_repo_v: z.string().optional(),
+  hf_file_v: z.string().optional(),
+  hf_token: z.string().optional(),
+  log_disable: z.boolean().optional(),
+  log_file: z.string().optional(),
+  log_colors: z.boolean().optional(),
+  verbose: z.boolean().optional(),
+  verbosity: z.number().optional(),
+  log_prefix: z.boolean().optional(),
+  log_timestamps: z.boolean().optional(),
+
+  // Sampling params
+  samplers: z.string().optional(),
+  seed: z.number().optional(),
+  sampling_seq: z.string().optional(),
+  ignore_eos: z.boolean().optional(),
+  temp: z.number().optional(),
+  top_k: z.number().optional(),
+  top_p: z.number().optional(),
+  min_p: z.number().optional(),
+  xtc_probability: z.number().optional(),
+  xtc_threshold: z.number().optional(),
+  typical: z.number().optional(),
+  repeat_last_n: z.number().optional(),
+  repeat_penalty: z.number().optional(),
+  presence_penalty: z.number().optional(),
+  frequency_penalty: z.number().optional(),
+  dry_multiplier: z.number().optional(),
+  dry_base: z.number().optional(),
+  dry_allowed_length: z.number().optional(),
+  dry_penalty_last_n: z.number().optional(),
+  dry_sequence_breaker: z.array(z.string()).optional(),
+  dynatemp_range: z.number().optional(),
+  dynatemp_exp: z.number().optional(),
+  mirostat: z.number().optional(),
+  mirostat_lr: z.number().optional(),
+  mirostat_ent: z.number().optional(),
+  logit_bias: z.array(z.string()).optional(),
+  grammar: z.string().optional(),
+  grammar_file: z.string().optional(),
+  json_schema: z.string().optional(),
+  json_schema_file: z.string().optional(),
+
+  // Example-specific params
+  no_context_shift: z.boolean().optional(),
+  special: z.boolean().optional(),
+  no_warmup: z.boolean().optional(),
+  spm_infill: z.boolean().optional(),
+  pooling: z.string().optional(),
+  cont_batching: z.boolean().optional(),
+  no_cont_batching: z.boolean().optional(),
+  mmproj: z.string().optional(),
+  mmproj_url: z.string().optional(),
+  no_mmproj: z.boolean().optional(),
+  no_mmproj_offload: z.boolean().optional(),
+  alias: z.string().optional(),
+  host: z.string().optional(),
+  port: z.number().optional(),
+  path: z.string().optional(),
+  no_webui: z.boolean().optional(),
+  embedding: z.boolean().optional(),
+  reranking: z.boolean().optional(),
+  api_key: z.string().optional(),
+  api_key_file: z.string().optional(),
+  ssl_key_file: z.string().optional(),
+  ssl_cert_file: z.string().optional(),
+  chat_template_kwargs: z.string().optional(),
+  timeout: z.number().optional(),
+  threads_http: z.number().optional(),
+  cache_reuse: z.number().optional(),
+  metrics: z.boolean().optional(),
+  slots: z.boolean().optional(),
+  props: z.boolean().optional(),
+  no_slots: z.boolean().optional(),
+  slot_save_path: z.string().optional(),
+  jinja: z.boolean().optional(),
+  reasoning_format: z.string().optional(),
+  reasoning_budget: z.number().optional(),
+  chat_template: z.string().optional(),
+  chat_template_file: z.string().optional(),
+  no_prefill_assistant: z.boolean().optional(),
+  slot_prompt_similarity: z.number().optional(),
+  lora_init_without_apply: z.boolean().optional(),
+  draft_max: z.number().optional(),
+  draft_min: z.number().optional(),
+  draft_p_min: z.number().optional(),
+  ctx_size_draft: z.number().optional(),
+  device_draft: z.string().optional(),
+  gpu_layers_draft: z.number().optional(),
+  model_draft: z.string().optional(),
+  cache_type_k_draft: z.string().optional(),
+  cache_type_v_draft: z.string().optional(),
+
+  // Audio/TTS params
+  model_vocoder: z.string().optional(),
+  tts_use_guide_tokens: z.boolean().optional(),
+
+  // Default model params
+  embd_bge_small_en_default: z.boolean().optional(),
+  embd_e5_small_en_default: z.boolean().optional(),
+  embd_gte_small_default: z.boolean().optional(),
+  fim_qwen_1_5b_default: z.boolean().optional(),
+  fim_qwen_3b_default: z.boolean().optional(),
+  fim_qwen_7b_default: z.boolean().optional(),
+  fim_qwen_7b_spec: z.boolean().optional(),
+  fim_qwen_14b_spec: z.boolean().optional(),
+})
+
+// Infer the TypeScript type from the schema
+export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
+
+// Helper to get all LlamaCpp backend option field keys
+export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
+  return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
+}
+
+// Get field type for LlamaCpp backend options
+export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
+  const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
+  if (!fieldSchema) return 'text'
+
+  // Handle ZodOptional wrapper
+  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
+
+  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
+  if (innerSchema instanceof z.ZodNumber) return 'number'
+  if (innerSchema instanceof z.ZodArray) return 'array'
+  return 'text' // ZodString and others default to text
+}
--- a/webui/src/schemas/backends/mlx.ts
+++ b/webui/src/schemas/backends/mlx.ts
@@ -0,0 +1,51 @@
+import { z } from 'zod'
+
+// Define the MLX backend options schema
+export const MlxBackendOptionsSchema = z.object({
+  // Basic connection options
+  model: z.string().optional(),
+  host: z.string().optional(),
+  port: z.number().optional(),
+
+  // Model and adapter options
+  adapter_path: z.string().optional(),
+  draft_model: z.string().optional(),
+  num_draft_tokens: z.number().optional(),
+  trust_remote_code: z.boolean().optional(),
+
+  // Logging and templates
+  log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
+  chat_template: z.string().optional(),
+  use_default_chat_template: z.boolean().optional(),
+  chat_template_args: z.string().optional(), // JSON string
+
+  // Sampling defaults
+  temp: z.number().optional(),     // Note: MLX uses "temp" not "temperature"
+  top_p: z.number().optional(),
+  top_k: z.number().optional(),
+  min_p: z.number().optional(),
+  max_tokens: z.number().optional(),
+})
+
+// Infer the TypeScript type from the schema
+export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
+
+// Helper to get all MLX backend option field keys
+export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
+  return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
+}
+
+// Get field type for MLX backend options
+export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
+  const fieldSchema = MlxBackendOptionsSchema.shape[key]
+  if (!fieldSchema) return 'text'
+
+  // Handle ZodOptional wrapper
+  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
+
+  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
+  if (innerSchema instanceof z.ZodNumber) return 'number'
+  if (innerSchema instanceof z.ZodArray) return 'array'
+  if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
+  return 'text' // ZodString and others default to text
+}
--- a/webui/src/schemas/backends/vllm.ts
+++ b/webui/src/schemas/backends/vllm.ts
@@ -0,0 +1,150 @@
+import { z } from 'zod'
+
+// Define the vLLM backend options schema
+export const VllmBackendOptionsSchema = z.object({
+  // Basic connection options (auto-assigned by llamactl)
+  host: z.string().optional(),
+  port: z.number().optional(),
+
+  // Model and engine configuration
+  model: z.string().optional(),
+  tokenizer: z.string().optional(),
+  skip_tokenizer_init: z.boolean().optional(),
+  revision: z.string().optional(),
+  code_revision: z.string().optional(),
+  tokenizer_revision: z.string().optional(),
+  tokenizer_mode: z.string().optional(),
+  trust_remote_code: z.boolean().optional(),
+  download_dir: z.string().optional(),
+  load_format: z.string().optional(),
+  config_format: z.string().optional(),
+  dtype: z.string().optional(),
+  kv_cache_dtype: z.string().optional(),
+  quantization_param_path: z.string().optional(),
+  seed: z.number().optional(),
+  max_model_len: z.number().optional(),
+  guided_decoding_backend: z.string().optional(),
+  distributed_executor_backend: z.string().optional(),
+  worker_use_ray: z.boolean().optional(),
+  ray_workers_use_nsight: z.boolean().optional(),
+
+  // Performance and serving configuration
+  block_size: z.number().optional(),
+  enable_prefix_caching: z.boolean().optional(),
+  disable_sliding_window: z.boolean().optional(),
+  use_v2_block_manager: z.boolean().optional(),
+  num_lookahead_slots: z.number().optional(),
+  swap_space: z.number().optional(),
+  cpu_offload_gb: z.number().optional(),
+  gpu_memory_utilization: z.number().optional(),
+  num_gpu_blocks_override: z.number().optional(),
+  max_num_batched_tokens: z.number().optional(),
+  max_num_seqs: z.number().optional(),
+  max_logprobs: z.number().optional(),
+  disable_log_stats: z.boolean().optional(),
+  quantization: z.string().optional(),
+  rope_scaling: z.string().optional(),
+  rope_theta: z.number().optional(),
+  enforce_eager: z.boolean().optional(),
+  max_context_len_to_capture: z.number().optional(),
+  max_seq_len_to_capture: z.number().optional(),
+  disable_custom_all_reduce: z.boolean().optional(),
+  tokenizer_pool_size: z.number().optional(),
+  tokenizer_pool_type: z.string().optional(),
+  tokenizer_pool_extra_config: z.string().optional(),
+  enable_lora_bias: z.boolean().optional(),
+  lora_extra_vocab_size: z.number().optional(),
+  lora_rank: z.number().optional(),
+  prompt_lookback_distance: z.number().optional(),
+  preemption_mode: z.string().optional(),
+
+  // Distributed and parallel processing
+  tensor_parallel_size: z.number().optional(),
+  pipeline_parallel_size: z.number().optional(),
+  max_parallel_loading_workers: z.number().optional(),
+  disable_async_output_proc: z.boolean().optional(),
+  worker_class: z.string().optional(),
+  enabled_lora_modules: z.string().optional(),
+  max_lora_rank: z.number().optional(),
+  fully_sharded_loras: z.boolean().optional(),
+  lora_modules: z.string().optional(),
+  prompt_adapters: z.string().optional(),
+  max_prompt_adapter_token: z.number().optional(),
+  device: z.string().optional(),
+  scheduler_delay: z.number().optional(),
+  enable_chunked_prefill: z.boolean().optional(),
+  speculative_model: z.string().optional(),
+  speculative_model_quantization: z.string().optional(),
+  speculative_revision: z.string().optional(),
+  speculative_max_model_len: z.number().optional(),
+  speculative_disable_by_batch_size: z.number().optional(),
+  ngpt_speculative_length: z.number().optional(),
+  speculative_disable_mqa: z.boolean().optional(),
+  model_loader_extra_config: z.string().optional(),
+  ignore_patterns: z.string().optional(),
+  preloaded_lora_modules: z.string().optional(),
+
+  // OpenAI server specific options
+  uds: z.string().optional(),
+  uvicorn_log_level: z.string().optional(),
+  response_role: z.string().optional(),
+  ssl_keyfile: z.string().optional(),
+  ssl_certfile: z.string().optional(),
+  ssl_ca_certs: z.string().optional(),
+  ssl_cert_reqs: z.number().optional(),
+  root_path: z.string().optional(),
+  middleware: z.array(z.string()).optional(),
+  return_tokens_as_token_ids: z.boolean().optional(),
+  disable_frontend_multiprocessing: z.boolean().optional(),
+  enable_auto_tool_choice: z.boolean().optional(),
+  tool_call_parser: z.string().optional(),
+  tool_server: z.string().optional(),
+  chat_template: z.string().optional(),
+  chat_template_content_format: z.string().optional(),
+  allow_credentials: z.boolean().optional(),
+  allowed_origins: z.array(z.string()).optional(),
+  allowed_methods: z.array(z.string()).optional(),
+  allowed_headers: z.array(z.string()).optional(),
+  api_key: z.array(z.string()).optional(),
+  enable_log_outputs: z.boolean().optional(),
+  enable_token_usage: z.boolean().optional(),
+  enable_async_engine_debug: z.boolean().optional(),
+  engine_use_ray: z.boolean().optional(),
+  disable_log_requests: z.boolean().optional(),
+  max_log_len: z.number().optional(),
+
+  // Additional engine configuration
+  task: z.string().optional(),
+  multi_modal_config: z.string().optional(),
+  limit_mm_per_prompt: z.string().optional(),
+  enable_sleep_mode: z.boolean().optional(),
+  enable_chunking_request: z.boolean().optional(),
+  compilation_config: z.string().optional(),
+  disable_sliding_window_mask: z.boolean().optional(),
+  enable_trtllm_engine_latency: z.boolean().optional(),
+  override_pooling_config: z.string().optional(),
+  override_neuron_config: z.string().optional(),
+  override_kv_cache_align_size: z.number().optional(),
+})
+
+// Infer the TypeScript type from the schema
+export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
+
+// Helper to get all vLLM backend option field keys
+export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
+  return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
+}
+
+// Get field type for vLLM backend options
+export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
+  const fieldSchema = VllmBackendOptionsSchema.shape[key]
+  if (!fieldSchema) return 'text'
+
+  // Handle ZodOptional wrapper
+  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
+
+  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
+  if (innerSchema instanceof z.ZodNumber) return 'number'
+  if (innerSchema instanceof z.ZodArray) return 'array'
+  return 'text' // ZodString and others default to text
+}
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -1,206 +1,27 @@
 import { BackendType } from '@/types/instance'
 import { z } from 'zod'

-// Define the LlamaCpp backend options schema
-export const LlamaCppBackendOptionsSchema = z.object({
-  // Common params
-  verbose_prompt: z.boolean().optional(),
-  threads: z.number().optional(),
-  threads_batch: z.number().optional(),
-  cpu_mask: z.string().optional(),
-  cpu_range: z.string().optional(),
-  cpu_strict: z.number().optional(),
-  prio: z.number().optional(),
-  poll: z.number().optional(),
-  cpu_mask_batch: z.string().optional(),
-  cpu_range_batch: z.string().optional(),
-  cpu_strict_batch: z.number().optional(),
-  prio_batch: z.number().optional(),
-  poll_batch: z.number().optional(),
-  ctx_size: z.number().optional(),
-  predict: z.number().optional(),
-  batch_size: z.number().optional(),
-  ubatch_size: z.number().optional(),
-  keep: z.number().optional(),
-  flash_attn: z.boolean().optional(),
-  no_perf: z.boolean().optional(),
-  escape: z.boolean().optional(),
-  no_escape: z.boolean().optional(),
-  rope_scaling: z.string().optional(),
-  rope_scale: z.number().optional(),
-  rope_freq_base: z.number().optional(),
-  rope_freq_scale: z.number().optional(),
-  yarn_orig_ctx: z.number().optional(),
-  yarn_ext_factor: z.number().optional(),
-  yarn_attn_factor: z.number().optional(),
-  yarn_beta_slow: z.number().optional(),
-  yarn_beta_fast: z.number().optional(),
-  dump_kv_cache: z.boolean().optional(),
-  no_kv_offload: z.boolean().optional(),
-  cache_type_k: z.string().optional(),
-  cache_type_v: z.string().optional(),
-  defrag_thold: z.number().optional(),
-  parallel: z.number().optional(),
-  mlock: z.boolean().optional(),
-  no_mmap: z.boolean().optional(),
-  numa: z.string().optional(),
-  device: z.string().optional(),
-  override_tensor: z.array(z.string()).optional(),
-  gpu_layers: z.number().optional(),
-  split_mode: z.string().optional(),
-  tensor_split: z.string().optional(),
-  main_gpu: z.number().optional(),
-  check_tensors: z.boolean().optional(),
-  override_kv: z.array(z.string()).optional(),
-  lora: z.array(z.string()).optional(),
-  lora_scaled: z.array(z.string()).optional(),
-  control_vector: z.array(z.string()).optional(),
-  control_vector_scaled: z.array(z.string()).optional(),
-  control_vector_layer_range: z.string().optional(),
-  model: z.string().optional(),
-  model_url: z.string().optional(),
-  hf_repo: z.string().optional(),
-  hf_repo_draft: z.string().optional(),
-  hf_file: z.string().optional(),
-  hf_repo_v: z.string().optional(),
-  hf_file_v: z.string().optional(),
-  hf_token: z.string().optional(),
-  log_disable: z.boolean().optional(),
-  log_file: z.string().optional(),
-  log_colors: z.boolean().optional(),
-  verbose: z.boolean().optional(),
-  verbosity: z.number().optional(),
-  log_prefix: z.boolean().optional(),
-  log_timestamps: z.boolean().optional(),
-
-  // Sampling params
-  samplers: z.string().optional(),
-  seed: z.number().optional(),
-  sampling_seq: z.string().optional(),
-  ignore_eos: z.boolean().optional(),
-  temp: z.number().optional(),
-  top_k: z.number().optional(),
-  top_p: z.number().optional(),
-  min_p: z.number().optional(),
-  xtc_probability: z.number().optional(),
-  xtc_threshold: z.number().optional(),
-  typical: z.number().optional(),
-  repeat_last_n: z.number().optional(),
-  repeat_penalty: z.number().optional(),
-  presence_penalty: z.number().optional(),
-  frequency_penalty: z.number().optional(),
-  dry_multiplier: z.number().optional(),
-  dry_base: z.number().optional(),
-  dry_allowed_length: z.number().optional(),
-  dry_penalty_last_n: z.number().optional(),
-  dry_sequence_breaker: z.array(z.string()).optional(),
-  dynatemp_range: z.number().optional(),
-  dynatemp_exp: z.number().optional(),
-  mirostat: z.number().optional(),
-  mirostat_lr: z.number().optional(),
-  mirostat_ent: z.number().optional(),
-  logit_bias: z.array(z.string()).optional(),
-  grammar: z.string().optional(),
-  grammar_file: z.string().optional(),
-  json_schema: z.string().optional(),
-  json_schema_file: z.string().optional(),
-
-  // Example-specific params
-  no_context_shift: z.boolean().optional(),
-  special: z.boolean().optional(),
-  no_warmup: z.boolean().optional(),
-  spm_infill: z.boolean().optional(),
-  pooling: z.string().optional(),
-  cont_batching: z.boolean().optional(),
-  no_cont_batching: z.boolean().optional(),
-  mmproj: z.string().optional(),
-  mmproj_url: z.string().optional(),
-  no_mmproj: z.boolean().optional(),
-  no_mmproj_offload: z.boolean().optional(),
-  alias: z.string().optional(),
-  host: z.string().optional(),
-  port: z.number().optional(),
-  path: z.string().optional(),
-  no_webui: z.boolean().optional(),
-  embedding: z.boolean().optional(),
-  reranking: z.boolean().optional(),
-  api_key: z.string().optional(),
-  api_key_file: z.string().optional(),
-  ssl_key_file: z.string().optional(),
-  ssl_cert_file: z.string().optional(),
-  chat_template_kwargs: z.string().optional(),
-  timeout: z.number().optional(),
-  threads_http: z.number().optional(),
-  cache_reuse: z.number().optional(),
-  metrics: z.boolean().optional(),
-  slots: z.boolean().optional(),
-  props: z.boolean().optional(),
-  no_slots: z.boolean().optional(),
-  slot_save_path: z.string().optional(),
-  jinja: z.boolean().optional(),
-  reasoning_format: z.string().optional(),
-  reasoning_budget: z.number().optional(),
-  chat_template: z.string().optional(),
-  chat_template_file: z.string().optional(),
-  no_prefill_assistant: z.boolean().optional(),
-  slot_prompt_similarity: z.number().optional(),
-  lora_init_without_apply: z.boolean().optional(),
-  draft_max: z.number().optional(),
-  draft_min: z.number().optional(),
-  draft_p_min: z.number().optional(),
-  ctx_size_draft: z.number().optional(),
-  device_draft: z.string().optional(),
-  gpu_layers_draft: z.number().optional(),
-  model_draft: z.string().optional(),
-  cache_type_k_draft: z.string().optional(),
-  cache_type_v_draft: z.string().optional(),
-
-  // Audio/TTS params
-  model_vocoder: z.string().optional(),
-  tts_use_guide_tokens: z.boolean().optional(),
-
-  // Default model params
-  embd_bge_small_en_default: z.boolean().optional(),
-  embd_e5_small_en_default: z.boolean().optional(),
-  embd_gte_small_default: z.boolean().optional(),
-  fim_qwen_1_5b_default: z.boolean().optional(),
-  fim_qwen_3b_default: z.boolean().optional(),
-  fim_qwen_7b_default: z.boolean().optional(),
-  fim_qwen_7b_spec: z.boolean().optional(),
-  fim_qwen_14b_spec: z.boolean().optional(),
-})
-
-// Define the MLX backend options schema
-export const MlxBackendOptionsSchema = z.object({
-  // Basic connection options
-  model: z.string().optional(),
-  host: z.string().optional(),
-  port: z.number().optional(),
-  
-  // Model and adapter options
-  adapter_path: z.string().optional(),
-  draft_model: z.string().optional(),
-  num_draft_tokens: z.number().optional(),
-  trust_remote_code: z.boolean().optional(),
-  
-  // Logging and templates
-  log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
-  chat_template: z.string().optional(),
-  use_default_chat_template: z.boolean().optional(),
-  chat_template_args: z.string().optional(), // JSON string
-  
-  // Sampling defaults
-  temp: z.number().optional(),     // Note: MLX uses "temp" not "temperature"
-  top_p: z.number().optional(),
-  top_k: z.number().optional(),
-  min_p: z.number().optional(),
-  max_tokens: z.number().optional(),
-})
+// Import backend schemas from separate files
+import {
+  LlamaCppBackendOptionsSchema,
+  type LlamaCppBackendOptions,
+  getAllLlamaCppFieldKeys,
+  getLlamaCppFieldType,
+  MlxBackendOptionsSchema,
+  type MlxBackendOptions,
+  getAllMlxFieldKeys,
+  getMlxFieldType,
+  VllmBackendOptionsSchema,
+  type VllmBackendOptions,
+  getAllVllmFieldKeys,
+  getVllmFieldType
+} from './backends'

 // Backend options union
 export const BackendOptionsSchema = z.union([
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
+  VllmBackendOptionsSchema,
 ])

 // Define the main create instance options schema
@@ -213,13 +34,27 @@ export const CreateInstanceOptionsSchema = z.object({
  on_demand_start: z.boolean().optional(),

  // Backend configuration
-  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
+  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
  backend_options: BackendOptionsSchema.optional(),
 })

+// Re-export types and schemas from backend files
+export {
+  LlamaCppBackendOptionsSchema,
+  MlxBackendOptionsSchema,
+  VllmBackendOptionsSchema,
+  type LlamaCppBackendOptions,
+  type MlxBackendOptions,
+  type VllmBackendOptions,
+  getAllLlamaCppFieldKeys,
+  getAllMlxFieldKeys,
+  getAllVllmFieldKeys,
+  getLlamaCppFieldType,
+  getMlxFieldType,
+  getVllmFieldType
+}
+
 // Infer the TypeScript types from the schemas
-export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
-export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
 export type BackendOptions = z.infer<typeof BackendOptionsSchema>
 export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>

@@ -228,16 +63,6 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
  return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
 }

-// Helper to get all LlamaCpp backend option field keys
-export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
-  return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
-}
-
-// Helper to get all MLX backend option field keys
-export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
-  return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
-}
-
 // Get field type from Zod schema
 export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
  const fieldSchema = CreateInstanceOptionsSchema.shape[key]
@@ -252,32 +77,3 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
  if (innerSchema instanceof z.ZodObject) return 'object'
  return 'text' // ZodString and others default to text
 }
-
-// Get field type for LlamaCpp backend options
-export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
-  const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
-  if (!fieldSchema) return 'text'
-  
-  // Handle ZodOptional wrapper
-  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
-  
-  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
-  if (innerSchema instanceof z.ZodNumber) return 'number'
-  if (innerSchema instanceof z.ZodArray) return 'array'
-  return 'text' // ZodString and others default to text
-}
-
-// Get field type for MLX backend options
-export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
-  const fieldSchema = MlxBackendOptionsSchema.shape[key]
-  if (!fieldSchema) return 'text'
-  
-  // Handle ZodOptional wrapper
-  const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
-  
-  if (innerSchema instanceof z.ZodBoolean) return 'boolean'
-  if (innerSchema instanceof z.ZodNumber) return 'number'
-  if (innerSchema instanceof z.ZodArray) return 'array'
-  if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
-  return 'text' // ZodString and others default to text
-}
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -5,6 +5,7 @@ export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
 export const BackendType = {
  LLAMA_CPP: 'llama_cpp',
  MLX_LM: 'mlx_lm',
+  VLLM: 'vllm',
  // MLX_VLM: 'mlx_vlm',  // Future expansion
 } as const