Merge pull request #34 from lordmathis/feat/vllm-backend

feat: Implement vLLM backend
This commit is contained in:
2025-09-22 21:58:19 +02:00
committed by GitHub
53 changed files with 3078 additions and 2968 deletions

View File

@@ -13,7 +13,7 @@
### 🔗 Universal Compatibility ### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name - **OpenAI API Compatible**: Drop-in replacement - route requests by model name
- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized) - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
### 🌐 User-Friendly Interface ### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -31,6 +31,7 @@
# 1. Install backend (one-time setup) # 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm # For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# 2. Download and run llamactl # 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -47,7 +48,7 @@ llamactl
### Create and manage instances via web dashboard: ### Create and manage instances via web dashboard:
1. Open http://localhost:8080 1. Open http://localhost:8080
2. Click "Create Instance" 2. Click "Create Instance"
3. Choose backend type (llama.cpp or MLX) 3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options 4. Set model path and backend-specific options
5. Start or stop the instance 5. Start or stop the instance
@@ -63,6 +64,11 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
# Use with OpenAI SDK # Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \ curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
@@ -121,6 +127,21 @@ source mlx-env/bin/activate
pip install mlx-lm pip install mlx-lm
``` ```
**For vLLM backend:**
You need vLLM installed:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Configuration ## Configuration
llamactl works out of the box with sensible defaults. llamactl works out of the box with sensible defaults.
@@ -135,6 +156,7 @@ server:
backends: backends:
llama_executable: llama-server # Path to llama-server executable llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
vllm_executable: vllm # Path to vllm executable
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances

View File

@@ -19,6 +19,159 @@ const docTemplate = `{
"host": "{{.Host}}", "host": "{{.Host}}",
"basePath": "{{.BasePath}}", "basePath": "{{.BasePath}}",
"paths": { "paths": {
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": { "/instances": {
"get": { "get": {
"security": [ "security": [
@@ -681,522 +834,46 @@ const docTemplate = `{
} }
}, },
"definitions": { "definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": { "auto_restart": {
"description": "Auto restart", "description": "Auto restart",
"type": "boolean" "type": "boolean"
}, },
"batch_size": { "backend_options": {
"type": "integer" "type": "object",
"additionalProperties": {}
}, },
"cache_reuse": { "backend_type": {
"type": "integer" "$ref": "#/definitions/backends.BackendType"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
}, },
"idle_timeout": { "idle_timeout": {
"description": "Idle timeout", "description": "Idle timeout",
"type": "integer" "type": "integer"
}, },
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": { "max_restarts": {
"type": "integer" "type": "integer"
}, },
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"on_demand_start": { "on_demand_start": {
"description": "On demand start", "description": "On demand start",
"type": "boolean" "type": "boolean"
}, },
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay": { "restart_delay": {
"type": "integer" "description": "seconds",
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"type": "integer" "type": "integer"
} }
} }
@@ -1264,6 +941,14 @@ const docTemplate = `{
"type": "string" "type": "string"
} }
} }
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
} }
} }
}` }`

View File

@@ -12,6 +12,159 @@
}, },
"basePath": "/api/v1", "basePath": "/api/v1",
"paths": { "paths": {
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": { "/instances": {
"get": { "get": {
"security": [ "security": [
@@ -674,522 +827,46 @@
} }
}, },
"definitions": { "definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": { "auto_restart": {
"description": "Auto restart", "description": "Auto restart",
"type": "boolean" "type": "boolean"
}, },
"batch_size": { "backend_options": {
"type": "integer" "type": "object",
"additionalProperties": {}
}, },
"cache_reuse": { "backend_type": {
"type": "integer" "$ref": "#/definitions/backends.BackendType"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
}, },
"idle_timeout": { "idle_timeout": {
"description": "Idle timeout", "description": "Idle timeout",
"type": "integer" "type": "integer"
}, },
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": { "max_restarts": {
"type": "integer" "type": "integer"
}, },
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"on_demand_start": { "on_demand_start": {
"description": "On demand start", "description": "On demand start",
"type": "boolean" "type": "boolean"
}, },
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay": { "restart_delay": {
"type": "integer" "description": "seconds",
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"type": "integer" "type": "integer"
} }
} }
@@ -1257,6 +934,14 @@
"type": "string" "type": "string"
} }
} }
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
} }
} }
} }

View File

@@ -1,352 +1,35 @@
basePath: /api/v1 basePath: /api/v1
definitions: definitions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions: instance.CreateInstanceOptions:
properties: properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart: auto_restart:
description: Auto restart description: Auto restart
type: boolean type: boolean
batch_size: backend_options:
type: integer additionalProperties: {}
cache_reuse: type: object
type: integer backend_type:
cache_type_k: $ref: '#/definitions/backends.BackendType'
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
idle_timeout: idle_timeout:
description: Idle timeout description: Idle timeout
type: integer type: integer
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
type: integer
max_restarts: max_restarts:
type: integer type: integer
metrics:
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
on_demand_start: on_demand_start:
description: On demand start description: On demand start
type: boolean type: boolean
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
prio:
type: integer
prio_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay: restart_delay:
type: integer description: seconds
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temp:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
type: integer type: integer
type: object type: object
instance.InstanceStatus: instance.InstanceStatus:
@@ -391,6 +74,11 @@ definitions:
object: object:
type: string type: string
type: object type: object
server.ParseCommandRequest:
properties:
command:
type: string
type: object
info: info:
contact: {} contact: {}
description: llamactl is a control server for managing Llama Server instances. description: llamactl is a control server for managing Llama Server instances.
@@ -400,6 +88,102 @@ info:
title: llamactl API title: llamactl API
version: "1.0" version: "1.0"
paths: paths:
/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
description: Parses a llama-server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
"500":
description: Internal Server Error
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/mlx/parse-command:
post:
consumes:
- application/json
description: Parses MLX-LM server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
post:
consumes:
- application/json
description: Parses a vLLM serve command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances: /instances:
get: get:
description: Returns a list of all instances managed by the server description: Returns a list of all instances managed by the server

View File

@@ -22,6 +22,7 @@ server:
backends: backends:
llama_executable: llama-server # Path to llama-server executable llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
vllm_executable: vllm # Path to vllm executable
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
@@ -94,11 +95,13 @@ server:
backends: backends:
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server") llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server") mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
vllm_executable: "vllm" # Path to vllm executable (default: "vllm")
``` ```
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable - `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable - `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
### Instance Configuration ### Instance Configuration

View File

@@ -37,6 +37,22 @@ pip install mlx-lm
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.) Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods ## Installation Methods
### Option 1: Download Binary (Recommended) ### Option 1: Download Binary (Recommended)

View File

@@ -29,8 +29,9 @@ You should see the Llamactl web interface.
1. Click the "Add Instance" button 1. Click the "Add Instance" button
2. Fill in the instance configuration: 2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name - **Name**: Give your instance a descriptive name
- **Model Path**: Path to your Llama.cpp model file - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Additional Options**: Any extra Llama.cpp parameters - **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance" 3. Click "Create Instance"
@@ -43,17 +44,46 @@ Once created, you can:
- **View logs** by clicking the logs button - **View logs** by clicking the logs button
- **Stop** the instance when needed - **Stop** the instance when needed
## Example Configuration ## Example Configurations
Here's a basic example configuration for a Llama 2 model: Here are basic example configurations for each backend:
**llama.cpp backend:**
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"model_path": "/path/to/llama-2-7b-chat.gguf", "backend_type": "llama_cpp",
"options": { "backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4, "threads": 4,
"context_size": 2048 "ctx_size": 2048,
"gpu_layers": 32
}
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
} }
} }
``` ```
@@ -66,12 +96,14 @@ You can also manage instances via the REST API:
# List all instances # List all instances
curl http://localhost:8080/api/instances curl http://localhost:8080/api/instances
# Create a new instance # Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances \ curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"name": "my-model", "backend_type": "llama_cpp",
"model_path": "/path/to/model.gguf", "backend_options": {
"model": "/path/to/model.gguf"
}
}' }'
# Start an instance # Start an instance

View File

@@ -170,7 +170,7 @@ POST /api/v1/instances/{name}/start
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"status": "starting", "status": "running",
"created": 1705312200 "created": 1705312200
} }
``` ```
@@ -191,7 +191,7 @@ POST /api/v1/instances/{name}/stop
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"status": "stopping", "status": "stopped",
"created": 1705312200 "created": 1705312200
} }
``` ```
@@ -208,7 +208,7 @@ POST /api/v1/instances/{name}/restart
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"status": "restarting", "status": "running",
"created": 1705312200 "created": 1705312200
} }
``` ```
@@ -401,6 +401,102 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
}' }'
``` ```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation ## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at: If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:

View File

@@ -1,6 +1,6 @@
# Managing Instances # Managing Instances
Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API. Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview ## Overview
@@ -42,9 +42,11 @@ Each instance is displayed as a card showing:
3. **Choose Backend Type**: 3. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server - **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only) - **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source: 4. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo - **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`) - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings: 5. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure - **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts - **Max Restarts**: Maximum number of restart attempts
@@ -54,6 +56,7 @@ Each instance is displayed as a card showing:
6. Configure backend-specific options: 6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc. - **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance 7. Click **"Create"** to save the instance
### Via API ### Via API
@@ -87,6 +90,20 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
"max_restarts": 3 "max_restarts": 3
}' }'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true
}'
# Create llama.cpp instance with HuggingFace model # Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
@@ -179,16 +196,17 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
## Instance Proxy ## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server or MLX). Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash ```bash
# Get instance details # Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/ curl http://localhost:8080/api/instances/{name}/proxy/
``` ```
Both backends provide OpenAI-compatible endpoints. Check the respective documentation: All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) - [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md) - [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health ### Instance Health

View File

@@ -5,5 +5,6 @@ type BackendType string
const ( const (
BackendTypeLlamaCpp BackendType = "llama_cpp" BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm" BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion // BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
) )

70
pkg/backends/builder.go Normal file
View File

@@ -0,0 +1,70 @@
package backends
import (
"reflect"
"strconv"
"strings"
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
var args []string
v := reflect.ValueOf(options).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
if !field.CanInterface() {
continue
}
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
} else {
// Comma-separated: --flag value1,value2
var values []string
for j := 0; j < field.Len(); j++ {
values = append(values, field.Index(j).String())
}
args = append(args, "--"+flagName, strings.Join(values, ","))
}
}
}
}
return args
}

View File

@@ -2,9 +2,9 @@ package llamacpp
import ( import (
"encoding/json" "encoding/json"
"llamactl/pkg/backends"
"reflect" "reflect"
"strconv" "strconv"
"strings"
) )
type LlamaServerOptions struct { type LlamaServerOptions struct {
@@ -315,62 +315,44 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments // BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string { func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string // Llama uses multiple flags for arrays by default (not comma-separated)
multipleFlags := map[string]bool{
v := reflect.ValueOf(o).Elem() "override-tensor": true,
t := v.Type() "override-kv": true,
"lora": true,
for i := 0; i < v.NumField(); i++ { "lora-scaled": true,
field := v.Field(i) "control-vector": true,
fieldType := t.Field(i) "control-vector-scaled": true,
"dry-sequence-breaker": true,
// Skip unexported fields "logit-bias": true,
if !field.CanInterface() { }
continue return backends.BuildCommandArgs(o, multipleFlags)
} }
// Get the JSON tag to determine the flag name // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
jsonTag := fieldType.Tag.Get("json") // Supports multiple formats:
if jsonTag == "" || jsonTag == "-" { // 1. Full command: "llama-server --model file.gguf"
continue // 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
multiValuedFlags := map[string]bool{
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
} }
// Remove ",omitempty" from the tag var llamaOptions LlamaServerOptions
flagName := jsonTag if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 { return nil, err
flagName = jsonTag[:commaIndex]
} }
// Convert snake_case to kebab-case for CLI flags return &llamaOptions, nil
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
}
return args
} }

View File

@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
} }
} }
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
// Helper functions // Helper functions
func contains(slice []string, item string) bool { func contains(slice []string, item string) bool {
return slices.Contains(slice, item) return slices.Contains(slice, item)

View File

@@ -1,286 +0,0 @@
package llamacpp
import (
"encoding/json"
"errors"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
// 1. Normalize the command - handle multiline with backslashes
trimmed := normalizeMultilineCommand(command)
if trimmed == "" {
return nil, fmt.Errorf("command cannot be empty")
}
// 2. Extract arguments from command
args, err := extractArgumentsFromCommand(trimmed)
if err != nil {
return nil, err
}
// 3. Parse arguments into map
options := make(map[string]any)
// Known multi-valued flags (snake_case form)
multiValued := map[string]struct{}{
"override_tensor": {},
"override_kv": {},
"lora": {},
"lora_scaled": {},
"control_vector": {},
"control_vector_scaled": {},
"dry_sequence_breaker": {},
"logit_bias": {},
}
i := 0
for i < len(args) {
arg := args[i]
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
i++
continue
}
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Unified parsing for --flag=value vs --flag value
var rawFlag, rawValue string
hasEquals := false
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
rawFlag = parts[0]
rawValue = parts[1] // may be empty string
hasEquals = true
} else {
rawFlag = arg
}
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
flagName := strings.ReplaceAll(flagCore, "-", "_")
// Detect value if not in equals form
valueProvided := hasEquals
if !hasEquals {
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
rawValue = args[i+1]
valueProvided = true
}
}
// Determine if multi-valued flag
_, isMulti := multiValued[flagName]
// Normalization helper: ensure slice for multi-valued flags
appendValue := func(valStr string) {
if existing, ok := options[flagName]; ok {
// Existing value; ensure slice semantics for multi-valued flags or repeated occurrences
if slice, ok := existing.([]string); ok {
options[flagName] = append(slice, valStr)
return
}
// Convert scalar to slice
options[flagName] = []string{fmt.Sprintf("%v", existing), valStr}
return
}
// First value
if isMulti {
options[flagName] = []string{valStr}
} else {
// We'll parse type below for single-valued flags
options[flagName] = valStr
}
}
if valueProvided {
// Use raw token for multi-valued flags; else allow typed parsing
appendValue(rawValue)
if !isMulti { // convert to typed value if scalar
if strVal, ok := options[flagName].(string); ok { // still scalar
options[flagName] = parseValue(strVal)
}
}
// Advance index: if we consumed a following token as value (non equals form), skip it
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
i += 2
} else {
i++
}
continue
}
// Boolean flag (no value)
options[flagName] = true
i++
}
// 4. Convert to LlamaServerOptions using existing UnmarshalJSON
jsonData, err := json.Marshal(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
}
var llamaOptions LlamaServerOptions
if err := json.Unmarshal(jsonData, &llamaOptions); err != nil {
return nil, fmt.Errorf("failed to parse command options: %w", err)
}
// 5. Return LlamaServerOptions
return &llamaOptions, nil
}
// parseValue attempts to parse a string value into the most appropriate type
func parseValue(value string) any {
// Surrounding matching quotes (single or double)
if l := len(value); l >= 2 {
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
value = value[1 : l-1]
}
}
lower := strings.ToLower(value)
if lower == "true" {
return true
}
if lower == "false" {
return false
}
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
return value
}
// normalizeMultilineCommand handles multiline commands with backslashes
func normalizeMultilineCommand(command string) string {
// Handle escaped newlines (backslash followed by newline)
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
// Clean up extra whitespace
re = regexp.MustCompile(`\s+`)
normalized = re.ReplaceAllString(normalized, " ")
return strings.TrimSpace(normalized)
}
// extractArgumentsFromCommand extracts arguments from various command formats
func extractArgumentsFromCommand(command string) ([]string, error) {
// Split command into tokens respecting quotes
tokens, err := splitCommandTokens(command)
if err != nil {
return nil, err
}
if len(tokens) == 0 {
return nil, fmt.Errorf("no command tokens found")
}
// Check if first token looks like an executable
firstToken := tokens[0]
// Case 1: Full path to executable (contains path separator or ends with llama-server)
if strings.Contains(firstToken, string(filepath.Separator)) ||
strings.HasSuffix(filepath.Base(firstToken), "llama-server") {
return tokens[1:], nil // Return everything except the executable
}
// Case 2: Just "llama-server" command
if strings.ToLower(firstToken) == "llama-server" {
return tokens[1:], nil // Return everything except the command
}
// Case 3: Arguments only (starts with a flag)
if strings.HasPrefix(firstToken, "-") {
return tokens, nil // Return all tokens as arguments
}
// Case 4: Unknown format - might be a different executable name
// Be permissive and assume it's the executable
return tokens[1:], nil
}
// splitCommandTokens splits a command string into tokens, respecting quotes
func splitCommandTokens(command string) ([]string, error) {
var tokens []string
var current strings.Builder
inQuotes := false
quoteChar := byte(0)
escaped := false
for i := 0; i < len(command); i++ {
c := command[i]
if escaped {
current.WriteByte(c)
escaped = false
continue
}
if c == '\\' {
escaped = true
current.WriteByte(c)
continue
}
if !inQuotes && (c == '"' || c == '\'') {
inQuotes = true
quoteChar = c
current.WriteByte(c)
} else if inQuotes && c == quoteChar {
inQuotes = false
quoteChar = 0
current.WriteByte(c)
} else if !inQuotes && (c == ' ' || c == '\t') {
if current.Len() > 0 {
tokens = append(tokens, current.String())
current.Reset()
}
} else {
current.WriteByte(c)
}
}
if inQuotes {
return nil, errors.New("unterminated quoted string")
}
if current.Len() > 0 {
tokens = append(tokens, current.String())
}
return tokens, nil
}
// isFlag determines if a string is a command line flag or a value
// Handles the special case where negative numbers should be treated as values, not flags
func isFlag(arg string) bool {
if !strings.HasPrefix(arg, "-") {
return false
}
// Special case: if it's a negative number, treat it as a value
if _, err := strconv.ParseFloat(arg, 64); err == nil {
return false
}
return true
}

View File

@@ -1,413 +0,0 @@
package llamacpp
import (
"testing"
)
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command with model",
command: "llama-server --model /path/to/model.gguf",
expectErr: false,
},
{
name: "command with multiple flags",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
expectErr: false,
},
{
name: "command with short flags",
command: "llama-server -m /path/to/model.gguf -ngl 32 -c 4096",
expectErr: false,
},
{
name: "command with equals format",
command: "llama-server --model=/path/to/model.gguf --gpu-layers=32",
expectErr: false,
},
{
name: "command with boolean flags",
command: "llama-server --model /path/to/model.gguf --verbose --no-mmap",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "case insensitive command",
command: "LLAMA-SERVER --model /path/to/model.gguf",
expectErr: false,
},
// New test cases for improved functionality
{
name: "args only without llama-server",
command: "--model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "full path to executable",
command: "/usr/local/bin/llama-server --model /path/to/model.gguf",
expectErr: false,
},
{
name: "negative number handling",
command: "llama-server --gpu-layers -1 --model test.gguf",
expectErr: false,
},
{
name: "multiline command with backslashes",
command: "llama-server --model /path/to/model.gguf \\\n --ctx-size 4096 \\\n --batch-size 512",
expectErr: false,
},
{
name: "quoted string with special characters",
command: `llama-server --model test.gguf --chat-template "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"`,
expectErr: false,
},
{
name: "unterminated quoted string",
command: `llama-server --model test.gguf --chat-template "unterminated quote`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
})
}
}
func TestParseLlamaCommandSpecificValues(t *testing.T) {
// Test specific value parsing
command := "llama-server --model /test/model.gguf --gpu-layers 32 --ctx-size 4096 --verbose"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
if !result.Verbose {
t.Errorf("expected verbose to be true, got %v", result.Verbose)
}
}
func TestParseLlamaCommandArrayFlags(t *testing.T) {
// Test array flag handling (critical for lora, override-tensor, etc.)
command := "llama-server --model test.gguf --lora adapter1.bin --lora adapter2.bin"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
if result.Lora[0] != "adapter1.bin" || result.Lora[1] != "adapter2.bin" {
t.Errorf("expected lora adapters [adapter1.bin, adapter2.bin], got %v", result.Lora)
}
}
func TestParseLlamaCommandMixedFormats(t *testing.T) {
// Test mixing --flag=value and --flag value formats
command := "llama-server --model=/path/model.gguf --gpu-layers 16 --batch-size=512 --verbose"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/path/model.gguf" {
t.Errorf("expected model '/path/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
}
if result.BatchSize != 512 {
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
}
if !result.Verbose {
t.Errorf("expected verbose to be true, got %v", result.Verbose)
}
}
func TestParseLlamaCommandTypeConversion(t *testing.T) {
// Test that values are converted to appropriate types
command := "llama-server --model test.gguf --temp 0.7 --top-k 40 --no-mmap"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if result.TopK != 40 {
t.Errorf("expected top_k 40, got %d", result.TopK)
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true, got %v", result.NoMmap)
}
}
func TestParseLlamaCommandArgsOnly(t *testing.T) {
// Test parsing arguments without llama-server command
command := "--model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
}
func TestParseLlamaCommandFullPath(t *testing.T) {
// Test full path to executable
command := "/usr/local/bin/llama-server --model test.gguf --gpu-layers 16"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test.gguf" {
t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
}
if result.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
}
}
func TestParseLlamaCommandNegativeNumbers(t *testing.T) {
// Test negative number parsing
command := "llama-server --model test.gguf --gpu-layers -1 --seed -12345"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.GPULayers != -1 {
t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
}
if result.Seed != -12345 {
t.Errorf("expected seed -12345, got %d", result.Seed)
}
}
func TestParseLlamaCommandMultiline(t *testing.T) {
// Test multiline command with backslashes
command := `llama-server --model /path/to/model.gguf \
--ctx-size 4096 \
--batch-size 512 \
--gpu-layers 32`
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
if result.BatchSize != 512 {
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
}
func TestParseLlamaCommandQuotedStrings(t *testing.T) {
// Test quoted strings with special characters
command := `llama-server --model test.gguf --api-key "sk-1234567890abcdef" --chat-template "User: {user}\nAssistant: "`
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test.gguf" {
t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
}
if result.APIKey != "sk-1234567890abcdef" {
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
}
expectedTemplate := "User: {user}\\nAssistant: "
if result.ChatTemplate != expectedTemplate {
t.Errorf("expected chat_template '%s', got '%s'", expectedTemplate, result.ChatTemplate)
}
}
func TestParseLlamaCommandUnslothExample(t *testing.T) {
// Test with realistic unsloth-style command
command := `llama-server --model /path/to/model.gguf \
--ctx-size 4096 \
--batch-size 512 \
--gpu-layers -1 \
--temp 0.7 \
--repeat-penalty 1.1 \
--top-k 40 \
--top-p 0.95 \
--host 0.0.0.0 \
--port 8000 \
--api-key "sk-1234567890abcdef"`
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Verify key fields
if result.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
if result.BatchSize != 512 {
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
}
if result.GPULayers != -1 {
t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if result.RepeatPenalty != 1.1 {
t.Errorf("expected repeat_penalty 1.1, got %f", result.RepeatPenalty)
}
if result.TopK != 40 {
t.Errorf("expected top_k 40, got %d", result.TopK)
}
if result.TopP != 0.95 {
t.Errorf("expected top_p 0.95, got %f", result.TopP)
}
if result.Host != "0.0.0.0" {
t.Errorf("expected host '0.0.0.0', got '%s'", result.Host)
}
if result.Port != 8000 {
t.Errorf("expected port 8000, got %d", result.Port)
}
if result.APIKey != "sk-1234567890abcdef" {
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
}
}
// Focused additional edge case tests (kept minimal per guidance)
func TestParseLlamaCommandSingleQuotedValue(t *testing.T) {
cmd := "llama-server --model 'my model.gguf' --alias 'Test Alias'"
result, err := ParseLlamaCommand(cmd)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "my model.gguf" {
t.Errorf("expected model 'my model.gguf', got '%s'", result.Model)
}
if result.Alias != "Test Alias" {
t.Errorf("expected alias 'Test Alias', got '%s'", result.Alias)
}
}
func TestParseLlamaCommandMixedArrayForms(t *testing.T) {
// Same multi-value flag using --flag value and --flag=value forms
cmd := "llama-server --lora adapter1.bin --lora=adapter2.bin --lora adapter3.bin"
result, err := ParseLlamaCommand(cmd)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 3 {
t.Fatalf("expected 3 lora values, got %d (%v)", len(result.Lora), result.Lora)
}
expected := []string{"adapter1.bin", "adapter2.bin", "adapter3.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
func TestParseLlamaCommandMalformedFlag(t *testing.T) {
cmd := "llama-server ---model test.gguf"
_, err := ParseLlamaCommand(cmd)
if err == nil {
t.Fatalf("expected error for malformed flag but got none")
}
}

View File

@@ -1,9 +1,7 @@
package mlx package mlx
import ( import (
"encoding/json" "llamactl/pkg/backends"
"reflect"
"strconv"
) )
type MlxServerOptions struct { type MlxServerOptions struct {
@@ -25,181 +23,34 @@ type MlxServerOptions struct {
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults // Sampling defaults
Temp float64 `json:"temp,omitempty"` // Note: MLX uses "temp" not "temperature" Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"` TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"` TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"` MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"` MaxTokens int `json:"max_tokens,omitempty"`
} }
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to handle multiple field names
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions MlxServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = MlxServerOptions(temp)
// Handle alternative field names
fieldMappings := map[string]string{
// Basic connection options
"m": "model",
"host": "host",
"port": "port",
// "python_path": "python_path", // removed
// Model and adapter options
"adapter-path": "adapter_path",
"draft-model": "draft_model",
"num-draft-tokens": "num_draft_tokens",
"trust-remote-code": "trust_remote_code",
// Logging and templates
"log-level": "log_level",
"chat-template": "chat_template",
"use-default-chat-template": "use_default_chat_template",
"chat-template-args": "chat_template_args",
// Sampling defaults
"temperature": "temp", // Support both temp and temperature
"top-p": "top_p",
"top-k": "top_k",
"min-p": "min_p",
"max-tokens": "max_tokens",
}
// Process alternative field names
for altName, canonicalName := range fieldMappings {
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
field := v.FieldByNameFunc(func(fieldName string) bool {
field, _ := v.Type().FieldByName(fieldName)
jsonTag := field.Tag.Get("json")
return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
}
}
}
return nil
}
// NewMlxServerOptions creates MlxServerOptions with MLX defaults
func NewMlxServerOptions() *MlxServerOptions {
return &MlxServerOptions{
Host: "127.0.0.1", // MLX default (different from llama-server)
Port: 8080, // MLX default
NumDraftTokens: 3, // MLX default for speculative decoding
LogLevel: "INFO", // MLX default
Temp: 0.0, // MLX default
TopP: 1.0, // MLX default
TopK: 0, // MLX default (disabled)
MinP: 0.0, // MLX default (disabled)
MaxTokens: 512, // MLX default
ChatTemplateArgs: "{}", // MLX default (empty JSON object)
}
}
// BuildCommandArgs converts to command line arguments // BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string { func (o *MlxServerOptions) BuildCommandArgs() []string {
var args []string multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
// Required and basic options
if o.Model != "" {
args = append(args, "--model", o.Model)
}
if o.Host != "" {
args = append(args, "--host", o.Host)
}
if o.Port != 0 {
args = append(args, "--port", strconv.Itoa(o.Port))
} }
// Model and adapter options // ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
if o.AdapterPath != "" { // Supports multiple formats:
args = append(args, "--adapter-path", o.AdapterPath) // 1. Full command: "mlx_lm.server --model model/path"
} // 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
if o.DraftModel != "" { // 3. Args only: "--model model/path --host 0.0.0.0"
args = append(args, "--draft-model", o.DraftModel) // 4. Multiline commands with backslashes
} func ParseMlxCommand(command string) (*MlxServerOptions, error) {
if o.NumDraftTokens != 0 { executableNames := []string{"mlx_lm.server"}
args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens)) var subcommandNames []string // MLX has no subcommands
} multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
if o.TrustRemoteCode {
args = append(args, "--trust-remote-code") var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
} }
// Logging and templates return &mlxOptions, nil
if o.LogLevel != "" {
args = append(args, "--log-level", o.LogLevel)
}
if o.ChatTemplate != "" {
args = append(args, "--chat-template", o.ChatTemplate)
}
if o.UseDefaultChatTemplate {
args = append(args, "--use-default-chat-template")
}
if o.ChatTemplateArgs != "" {
args = append(args, "--chat-template-args", o.ChatTemplateArgs)
}
// Sampling defaults
if o.Temp != 0 {
args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
}
if o.TopP != 0 {
args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
}
if o.TopK != 0 {
args = append(args, "--top-k", strconv.Itoa(o.TopK))
}
if o.MinP != 0 {
args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
}
if o.MaxTokens != 0 {
args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
}
return args
} }

View File

@@ -0,0 +1,157 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

View File

@@ -1,254 +0,0 @@
package mlx
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
// 1. Normalize the command - handle multiline with backslashes
trimmed := normalizeMultilineCommand(command)
if trimmed == "" {
return nil, fmt.Errorf("command cannot be empty")
}
// 2. Extract arguments from command
args, err := extractArgumentsFromCommand(trimmed)
if err != nil {
return nil, err
}
// 3. Parse arguments into map
options := make(map[string]any)
i := 0
for i < len(args) {
arg := args[i]
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
i++
continue
}
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Unified parsing for --flag=value vs --flag value
var rawFlag, rawValue string
hasEquals := false
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
rawFlag = parts[0]
rawValue = parts[1] // may be empty string
hasEquals = true
} else {
rawFlag = arg
}
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
flagName := strings.ReplaceAll(flagCore, "-", "_")
// Detect value if not in equals form
valueProvided := hasEquals
if !hasEquals {
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
rawValue = args[i+1]
valueProvided = true
}
}
if valueProvided {
// MLX-specific validation for certain flags
if flagName == "log_level" && !isValidLogLevel(rawValue) {
return nil, fmt.Errorf("invalid log level: %s", rawValue)
}
options[flagName] = parseValue(rawValue)
// Advance index: if we consumed a following token as value (non equals form), skip it
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
i += 2
} else {
i++
}
continue
}
// Boolean flag (no value) - MLX specific boolean flags
if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
options[flagName] = true
} else {
options[flagName] = true
}
i++
}
// 4. Convert to MlxServerOptions using existing UnmarshalJSON
jsonData, err := json.Marshal(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
}
var mlxOptions MlxServerOptions
if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
return nil, fmt.Errorf("failed to parse command options: %w", err)
}
// 5. Return MlxServerOptions
return &mlxOptions, nil
}
// isValidLogLevel validates MLX log levels
func isValidLogLevel(level string) bool {
validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
for _, valid := range validLevels {
if level == valid {
return true
}
}
return false
}
// parseValue attempts to parse a string value into the most appropriate type
func parseValue(value string) any {
// Surrounding matching quotes (single or double)
if l := len(value); l >= 2 {
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
value = value[1 : l-1]
}
}
lower := strings.ToLower(value)
if lower == "true" {
return true
}
if lower == "false" {
return false
}
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
return value
}
// normalizeMultilineCommand handles multiline commands with backslashes
func normalizeMultilineCommand(command string) string {
// Handle escaped newlines (backslash followed by newline)
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
// Clean up extra whitespace
re = regexp.MustCompile(`\s+`)
normalized = re.ReplaceAllString(normalized, " ")
return strings.TrimSpace(normalized)
}
// extractArgumentsFromCommand extracts arguments from various command formats
func extractArgumentsFromCommand(command string) ([]string, error) {
// Split command into tokens respecting quotes
tokens, err := splitCommandTokens(command)
if err != nil {
return nil, err
}
if len(tokens) == 0 {
return nil, fmt.Errorf("no command tokens found")
}
// Check if first token looks like an executable
firstToken := tokens[0]
// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
if strings.Contains(firstToken, string(filepath.Separator)) ||
strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
return tokens[1:], nil // Return everything except the executable
}
// Case 2: Just "mlx_lm.server" command
if strings.ToLower(firstToken) == "mlx_lm.server" {
return tokens[1:], nil // Return everything except the command
}
// Case 3: Arguments only (starts with a flag)
if strings.HasPrefix(firstToken, "-") {
return tokens, nil // Return all tokens as arguments
}
// Case 4: Unknown format - might be a different executable name
// Be permissive and assume it's the executable
return tokens[1:], nil
}
// splitCommandTokens splits a command string into tokens, respecting quotes
func splitCommandTokens(command string) ([]string, error) {
var tokens []string
var current strings.Builder
inQuotes := false
quoteChar := byte(0)
escaped := false
for i := 0; i < len(command); i++ {
c := command[i]
if escaped {
current.WriteByte(c)
escaped = false
continue
}
if c == '\\' {
escaped = true
current.WriteByte(c)
continue
}
if !inQuotes && (c == '"' || c == '\'') {
inQuotes = true
quoteChar = c
current.WriteByte(c)
} else if inQuotes && c == quoteChar {
inQuotes = false
quoteChar = 0
current.WriteByte(c)
} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
if current.Len() > 0 {
tokens = append(tokens, current.String())
current.Reset()
}
} else {
current.WriteByte(c)
}
}
if inQuotes {
return nil, fmt.Errorf("unclosed quote in command")
}
if current.Len() > 0 {
tokens = append(tokens, current.String())
}
return tokens, nil
}
// isFlag checks if a string looks like a command line flag
func isFlag(s string) bool {
return strings.HasPrefix(s, "-")
}

213
pkg/backends/parser.go Normal file
View File

@@ -0,0 +1,213 @@
package backends
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
return fmt.Errorf("command cannot be empty")
}
// Extract arguments and positional model
args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
if err != nil {
return err
}
// Parse flags into map
options, err := parseFlags(args, multiValuedFlags)
if err != nil {
return err
}
// If we found a positional model and no --model flag was provided, set the model
if modelFromPositional != "" {
if _, hasModelFlag := options["model"]; !hasModelFlag {
options["model"] = modelFromPositional
}
}
// Convert to target struct via JSON
jsonData, err := json.Marshal(options)
if err != nil {
return fmt.Errorf("failed to marshal options: %w", err)
}
if err := json.Unmarshal(jsonData, target); err != nil {
return fmt.Errorf("failed to unmarshal to target: %w", err)
}
return nil
}
// normalizeCommand handles multiline commands with backslashes
func normalizeCommand(command string) string {
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
re = regexp.MustCompile(`\s+`)
return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
}
// extractArgs extracts arguments from command, removing executable and subcommands
// Returns: args, modelFromPositional, error
func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
// Check for unterminated quotes
if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
return nil, "", fmt.Errorf("unterminated quoted string")
}
tokens := strings.Fields(command)
if len(tokens) == 0 {
return nil, "", fmt.Errorf("no tokens found")
}
// Skip executable
start := 0
firstToken := tokens[0]
// Check for executable name (with or without path)
if strings.Contains(firstToken, string(filepath.Separator)) {
baseName := filepath.Base(firstToken)
for _, execName := range executableNames {
if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
start = 1
break
}
}
} else {
for _, execName := range executableNames {
if strings.EqualFold(firstToken, execName) {
start = 1
break
}
}
}
// Skip subcommand if present
if start < len(tokens) {
for _, subCmd := range subcommandNames {
if strings.EqualFold(tokens[start], subCmd) {
start++
break
}
}
}
// Handle case where command starts with subcommand (no executable)
if start == 0 {
for _, subCmd := range subcommandNames {
if strings.EqualFold(firstToken, subCmd) {
start = 1
break
}
}
}
args := tokens[start:]
// Extract first positional argument (model) if present and not a flag
var modelFromPositional string
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
modelFromPositional = args[0]
args = args[1:] // Remove the model from args to process remaining flags
}
return args, modelFromPositional, nil
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
arg := args[i]
if !strings.HasPrefix(arg, "-") {
continue
}
// Check for malformed flags (more than two leading dashes)
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Get flag name and value
var flagName, value string
var hasValue bool
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
flagName = strings.TrimLeft(parts[0], "-")
value = parts[1]
hasValue = true
} else {
flagName = strings.TrimLeft(arg, "-")
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
value = args[i+1]
hasValue = true
i++ // Skip next arg since we consumed it
}
}
// Convert kebab-case to snake_case for JSON
flagName = strings.ReplaceAll(flagName, "-", "_")
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {
options[flagName] = []string{value}
}
} else {
options[flagName] = parseValue(value)
}
} else {
// Boolean flag
options[flagName] = true
}
}
return options, nil
}
// parseValue converts string to appropriate type
func parseValue(value string) any {
// Remove quotes
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
// Try boolean
switch strings.ToLower(value) {
case "true":
return true
case "false":
return false
}
// Try integer
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
// Try float
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
// Return as string
return value
}

189
pkg/backends/vllm/vllm.go Normal file
View File

@@ -0,0 +1,189 @@
package vllm
import (
"llamactl/pkg/backends"
)
type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and engine configuration
Model string `json:"model,omitempty"`
Tokenizer string `json:"tokenizer,omitempty"`
SkipTokenizerInit bool `json:"skip_tokenizer_init,omitempty"`
Revision string `json:"revision,omitempty"`
CodeRevision string `json:"code_revision,omitempty"`
TokenizerRevision string `json:"tokenizer_revision,omitempty"`
TokenizerMode string `json:"tokenizer_mode,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
DownloadDir string `json:"download_dir,omitempty"`
LoadFormat string `json:"load_format,omitempty"`
ConfigFormat string `json:"config_format,omitempty"`
Dtype string `json:"dtype,omitempty"`
KVCacheDtype string `json:"kv_cache_dtype,omitempty"`
QuantizationParamPath string `json:"quantization_param_path,omitempty"`
Seed int `json:"seed,omitempty"`
MaxModelLen int `json:"max_model_len,omitempty"`
GuidedDecodingBackend string `json:"guided_decoding_backend,omitempty"`
DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
WorkerUseRay bool `json:"worker_use_ray,omitempty"`
RayWorkersUseNSight bool `json:"ray_workers_use_nsight,omitempty"`
// Performance and serving configuration
BlockSize int `json:"block_size,omitempty"`
EnablePrefixCaching bool `json:"enable_prefix_caching,omitempty"`
DisableSlidingWindow bool `json:"disable_sliding_window,omitempty"`
UseV2BlockManager bool `json:"use_v2_block_manager,omitempty"`
NumLookaheadSlots int `json:"num_lookahead_slots,omitempty"`
SwapSpace int `json:"swap_space,omitempty"`
CPUOffloadGB int `json:"cpu_offload_gb,omitempty"`
GPUMemoryUtilization float64 `json:"gpu_memory_utilization,omitempty"`
NumGPUBlocksOverride int `json:"num_gpu_blocks_override,omitempty"`
MaxNumBatchedTokens int `json:"max_num_batched_tokens,omitempty"`
MaxNumSeqs int `json:"max_num_seqs,omitempty"`
MaxLogprobs int `json:"max_logprobs,omitempty"`
DisableLogStats bool `json:"disable_log_stats,omitempty"`
Quantization string `json:"quantization,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeTheta float64 `json:"rope_theta,omitempty"`
EnforceEager bool `json:"enforce_eager,omitempty"`
MaxContextLenToCapture int `json:"max_context_len_to_capture,omitempty"`
MaxSeqLenToCapture int `json:"max_seq_len_to_capture,omitempty"`
DisableCustomAllReduce bool `json:"disable_custom_all_reduce,omitempty"`
TokenizerPoolSize int `json:"tokenizer_pool_size,omitempty"`
TokenizerPoolType string `json:"tokenizer_pool_type,omitempty"`
TokenizerPoolExtraConfig string `json:"tokenizer_pool_extra_config,omitempty"`
EnableLoraBias bool `json:"enable_lora_bias,omitempty"`
LoraExtraVocabSize int `json:"lora_extra_vocab_size,omitempty"`
LoraRank int `json:"lora_rank,omitempty"`
PromptLookbackDistance int `json:"prompt_lookback_distance,omitempty"`
PreemptionMode string `json:"preemption_mode,omitempty"`
// Distributed and parallel processing
TensorParallelSize int `json:"tensor_parallel_size,omitempty"`
PipelineParallelSize int `json:"pipeline_parallel_size,omitempty"`
MaxParallelLoadingWorkers int `json:"max_parallel_loading_workers,omitempty"`
DisableAsyncOutputProc bool `json:"disable_async_output_proc,omitempty"`
WorkerClass string `json:"worker_class,omitempty"`
EnabledLoraModules string `json:"enabled_lora_modules,omitempty"`
MaxLoraRank int `json:"max_lora_rank,omitempty"`
FullyShardedLoras bool `json:"fully_sharded_loras,omitempty"`
LoraModules string `json:"lora_modules,omitempty"`
PromptAdapters string `json:"prompt_adapters,omitempty"`
MaxPromptAdapterToken int `json:"max_prompt_adapter_token,omitempty"`
Device string `json:"device,omitempty"`
SchedulerDelay float64 `json:"scheduler_delay,omitempty"`
EnableChunkedPrefill bool `json:"enable_chunked_prefill,omitempty"`
SpeculativeModel string `json:"speculative_model,omitempty"`
SpeculativeModelQuantization string `json:"speculative_model_quantization,omitempty"`
SpeculativeRevision string `json:"speculative_revision,omitempty"`
SpeculativeMaxModelLen int `json:"speculative_max_model_len,omitempty"`
SpeculativeDisableByBatchSize int `json:"speculative_disable_by_batch_size,omitempty"`
NgptSpeculativeLength int `json:"ngpt_speculative_length,omitempty"`
SpeculativeDisableMqa bool `json:"speculative_disable_mqa,omitempty"`
ModelLoaderExtraConfig string `json:"model_loader_extra_config,omitempty"`
IgnorePatterns string `json:"ignore_patterns,omitempty"`
PreloadedLoraModules string `json:"preloaded_lora_modules,omitempty"`
// OpenAI server specific options
UDS string `json:"uds,omitempty"`
UvicornLogLevel string `json:"uvicorn_log_level,omitempty"`
ResponseRole string `json:"response_role,omitempty"`
SSLKeyfile string `json:"ssl_keyfile,omitempty"`
SSLCertfile string `json:"ssl_certfile,omitempty"`
SSLCACerts string `json:"ssl_ca_certs,omitempty"`
SSLCertReqs int `json:"ssl_cert_reqs,omitempty"`
RootPath string `json:"root_path,omitempty"`
Middleware []string `json:"middleware,omitempty"`
ReturnTokensAsTokenIDS bool `json:"return_tokens_as_token_ids,omitempty"`
DisableFrontendMultiprocessing bool `json:"disable_frontend_multiprocessing,omitempty"`
EnableAutoToolChoice bool `json:"enable_auto_tool_choice,omitempty"`
ToolCallParser string `json:"tool_call_parser,omitempty"`
ToolServer string `json:"tool_server,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateContentFormat string `json:"chat_template_content_format,omitempty"`
AllowCredentials bool `json:"allow_credentials,omitempty"`
AllowedOrigins []string `json:"allowed_origins,omitempty"`
AllowedMethods []string `json:"allowed_methods,omitempty"`
AllowedHeaders []string `json:"allowed_headers,omitempty"`
APIKey []string `json:"api_key,omitempty"`
EnableLogOutputs bool `json:"enable_log_outputs,omitempty"`
EnableTokenUsage bool `json:"enable_token_usage,omitempty"`
EnableAsyncEngineDebug bool `json:"enable_async_engine_debug,omitempty"`
EngineUseRay bool `json:"engine_use_ray,omitempty"`
DisableLogRequests bool `json:"disable_log_requests,omitempty"`
MaxLogLen int `json:"max_log_len,omitempty"`
// Additional engine configuration
Task string `json:"task,omitempty"`
MultiModalConfig string `json:"multi_modal_config,omitempty"`
LimitMmPerPrompt string `json:"limit_mm_per_prompt,omitempty"`
EnableSleepMode bool `json:"enable_sleep_mode,omitempty"`
EnableChunkingRequest bool `json:"enable_chunking_request,omitempty"`
CompilationConfig string `json:"compilation_config,omitempty"`
DisableSlidingWindowMask bool `json:"disable_sliding_window_mask,omitempty"`
EnableTRTLLMEngineLatency bool `json:"enable_trtllm_engine_latency,omitempty"`
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string
// Add model as positional argument if specified
if o.Model != "" {
args = append(args, o.Model)
}
// Create a copy of the options without the Model field to avoid including it as --model flag
optionsCopy := *o
optionsCopy.Model = "" // Clear model field so it won't be included as a flag
multipleFlags := map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
// Build the rest of the arguments as flags
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
args = append(args, flagArgs...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
return nil, err
}
return &vllmOptions, nil
}

View File

@@ -0,0 +1,153 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

View File

@@ -17,6 +17,9 @@ type BackendConfig struct {
// Path to mlx_lm executable (MLX-LM backend) // Path to mlx_lm executable (MLX-LM backend)
MLXLMExecutable string `yaml:"mlx_lm_executable"` MLXLMExecutable string `yaml:"mlx_lm_executable"`
// Path to vllm executable (vLLM backend)
VllmExecutable string `yaml:"vllm_executable"`
} }
// AppConfig represents the configuration for llamactl // AppConfig represents the configuration for llamactl
@@ -122,6 +125,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
Backends: BackendConfig{ Backends: BackendConfig{
LlamaExecutable: "llama-server", LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server", MLXLMExecutable: "mlx_lm.server",
VllmExecutable: "vllm",
}, },
Instances: InstancesConfig{ Instances: InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
@@ -246,6 +250,9 @@ func loadEnvVars(cfg *AppConfig) {
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" { if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
cfg.Backends.MLXLMExecutable = mlxLMExec cfg.Backends.MLXLMExecutable = mlxLMExec
} }
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
cfg.Backends.VllmExecutable = vllmExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil { if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b cfg.Instances.DefaultAutoRestart = b

View File

@@ -105,6 +105,10 @@ func (i *Process) GetPort() int {
if i.options.MlxServerOptions != nil { if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port return i.options.MlxServerOptions.Port
} }
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
} }
} }
return 0 return 0
@@ -123,6 +127,10 @@ func (i *Process) GetHost() string {
if i.options.MlxServerOptions != nil { if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host return i.options.MlxServerOptions.Host
} }
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
} }
} }
return "" return ""
@@ -176,6 +184,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
host = i.options.MlxServerOptions.Host host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port port = i.options.MlxServerOptions.Port
} }
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
} }
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port)) targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))

View File

@@ -52,6 +52,8 @@ func (i *Process) Start() error {
executable = i.globalBackendSettings.LlamaExecutable executable = i.globalBackendSettings.LlamaExecutable
case backends.BackendTypeMlxLm: case backends.BackendTypeMlxLm:
executable = i.globalBackendSettings.MLXLMExecutable executable = i.globalBackendSettings.MLXLMExecutable
case backends.BackendTypeVllm:
executable = i.globalBackendSettings.VllmExecutable
default: default:
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType) return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
} }
@@ -200,6 +202,11 @@ func (i *Process) WaitForHealthy(timeout int) error {
host = opts.MlxServerOptions.Host host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port port = opts.MlxServerOptions.Port
} }
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
} }
if host == "" { if host == "" {
host = "localhost" host = "localhost"

View File

@@ -6,6 +6,7 @@ import (
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx" "llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
) )
@@ -26,6 +27,7 @@ type CreateInstanceOptions struct {
// Backend-specific options // Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"` LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"` MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
} }
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -69,6 +71,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return fmt.Errorf("failed to unmarshal MLX options: %w", err) return fmt.Errorf("failed to unmarshal MLX options: %w", err)
} }
} }
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default: default:
return fmt.Errorf("unknown backend type: %s", c.BackendType) return fmt.Errorf("unknown backend type: %s", c.BackendType)
} }
@@ -114,6 +128,20 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err) return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
} }
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts aux.BackendOptions = backendOpts
} }
} }
@@ -171,6 +199,13 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
if c.MlxServerOptions != nil { if c.MlxServerOptions != nil {
return c.MlxServerOptions.BuildCommandArgs() return c.MlxServerOptions.BuildCommandArgs()
} }
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
// Prepend "serve" as first argument
args := []string{"serve"}
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
return args
}
} }
return []string{} return []string{}
} }

View File

@@ -264,6 +264,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
if options.MlxServerOptions != nil { if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port return options.MlxServerOptions.Port
} }
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
} }
return 0 return 0
} }
@@ -279,6 +283,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
if options.MlxServerOptions != nil { if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port options.MlxServerOptions.Port = port
} }
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
} }
} }

View File

@@ -8,6 +8,7 @@ import (
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx" "llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/manager" "llamactl/pkg/manager"
@@ -739,3 +740,56 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
} }
} }
} }
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}

View File

@@ -58,6 +58,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route("/mlx", func(r chi.Router) { r.Route("/mlx", func(r chi.Router) {
r.Post("/parse-command", handler.ParseMlxCommand()) r.Post("/parse-command", handler.ParseMlxCommand())
}) })
r.Route("/vllm", func(r chi.Router) {
r.Post("/parse-command", handler.ParseVllmCommand())
})
}) })
// Instance management endpoints // Instance management endpoints

View File

@@ -46,6 +46,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
return validateLlamaCppOptions(options) return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm: case backends.BackendTypeMlxLm:
return validateMlxOptions(options) return validateMlxOptions(options)
case backends.BackendTypeVllm:
return validateVllmOptions(options)
default: default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType)) return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
} }
@@ -88,6 +90,25 @@ func validateMlxOptions(options *instance.CreateInstanceOptions) error {
return nil return nil
} }
// validateVllmOptions validates vLLM backend specific options
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
if options.VllmServerOptions == nil {
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct // validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error { func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v) val := reflect.ValueOf(v)

View File

@@ -0,0 +1,65 @@
import React from "react";
import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Cpu, Zap, Server } from "lucide-react";
interface BackendBadgeProps {
backend?: BackendTypeValue;
}
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
if (!backend) {
return null;
}
const getIcon = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return <Cpu className="h-3 w-3" />;
case BackendType.MLX_LM:
return <Zap className="h-3 w-3" />;
case BackendType.VLLM:
return <Server className="h-3 w-3" />;
default:
return <Server className="h-3 w-3" />;
}
};
const getText = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "llama.cpp";
case BackendType.MLX_LM:
return "MLX";
case BackendType.VLLM:
return "vLLM";
default:
return backend;
}
};
const getVariant = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "secondary";
case BackendType.MLX_LM:
return "outline";
case BackendType.VLLM:
return "default";
default:
return "secondary";
}
};
return (
<Badge
variant={getVariant()}
className="flex items-center gap-1.5"
>
{getIcon()}
<span className="text-xs">{getText()}</span>
</Badge>
);
};
export default BackendBadge;

View File

@@ -45,7 +45,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}
@@ -72,7 +71,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}
@@ -99,7 +97,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}

View File

@@ -5,6 +5,7 @@ import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react"; import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
import LogsDialog from "@/components/LogDialog"; import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge"; import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react"; import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth"; import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -58,7 +59,10 @@ function InstanceCard({
<CardHeader className="pb-3"> <CardHeader className="pb-3">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle> <CardTitle className="text-lg">{instance.name}</CardTitle>
<div className="flex flex-col items-end gap-2">
{running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
<BackendBadge backend={instance.options?.backend_type} />
</div>
</div> </div>
</CardHeader> </CardHeader>

View File

@@ -11,11 +11,13 @@ import {
DialogTitle, DialogTitle,
} from "@/components/ui/dialog"; } from "@/components/ui/dialog";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance"; import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils"; import { getAdvancedFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight, Terminal } from "lucide-react"; import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
import ZodFormField from "@/components/ZodFormField";
import BackendFormField from "@/components/BackendFormField";
import ParseCommandDialog from "@/components/ParseCommandDialog"; import ParseCommandDialog from "@/components/ParseCommandDialog";
import AutoRestartConfiguration from "@/components/instance/AutoRestartConfiguration";
import BasicInstanceFields from "@/components/instance/BasicInstanceFields";
import BackendConfiguration from "@/components/instance/BackendConfiguration";
import AdvancedInstanceFields from "@/components/instance/AdvancedInstanceFields";
interface InstanceDialogProps { interface InstanceDialogProps {
open: boolean; open: boolean;
@@ -39,9 +41,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
const [showParseDialog, setShowParseDialog] = useState(false); const [showParseDialog, setShowParseDialog] = useState(false);
// Get field lists dynamically from the type // Get field lists dynamically from the type
const basicFields = getBasicFields();
const advancedFields = getAdvancedFields(); const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields(formData.backend_type);
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type); const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
// Reset form when dialog opens/closes or when instance changes // Reset form when dialog opens/closes or when instance changes
@@ -163,8 +163,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setShowParseDialog(false); setShowParseDialog(false);
}; };
// Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic // Save button label logic
let saveButtonLabel = "Create Instance"; let saveButtonLabel = "Create Instance";
@@ -212,70 +210,23 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</div> </div>
{/* Auto Restart Configuration Section */} {/* Auto Restart Configuration Section */}
<div className="space-y-4"> <AutoRestartConfiguration
<h3 className="text-lg font-medium"> formData={formData}
Auto Restart Configuration
</h3>
{/* Auto Restart Toggle */}
<ZodFormField
fieldKey="auto_restart"
value={formData.auto_restart}
onChange={handleFieldChange} onChange={handleFieldChange}
/> />
{/* Show restart options only when auto restart is enabled */} {/* Basic Fields */}
{isAutoRestartEnabled && ( <BasicInstanceFields
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4"> formData={formData}
<ZodFormField
fieldKey="max_restarts"
value={formData.max_restarts}
onChange={handleFieldChange} onChange={handleFieldChange}
/> />
<ZodFormField
fieldKey="restart_delay"
value={formData.restart_delay}
onChange={handleFieldChange}
/>
</div>
)}
</div>
{/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{basicFields
.filter(
(fieldKey) =>
fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay" &&
fieldKey !== "backend_options" // backend_options is handled separately
)
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
{/* Backend Configuration Section */} {/* Backend Configuration Section */}
<div className="space-y-4"> <BackendConfiguration
<h3 className="text-lg font-medium">Backend Configuration</h3> formData={formData}
onBackendFieldChange={handleBackendFieldChange}
{/* Basic backend fields */} showAdvanced={showAdvanced}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={handleBackendFieldChange}
/> />
))}
</div>
{/* Advanced Fields Toggle */} {/* Advanced Fields Toggle */}
<div className="border-t pt-4"> <div className="border-t pt-4">
@@ -314,54 +265,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</div> </div>
</div> </div>
{/* Advanced Fields - Automatically generated from type (excluding restart options) */} {/* Advanced Fields */}
{showAdvanced && ( {showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted"> <div className="space-y-4 pl-6 border-l-2 border-muted">
{/* Advanced instance fields */} <AdvancedInstanceFields
{advancedFields formData={formData}
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
).length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
)
.sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
onChange={handleFieldChange} onChange={handleFieldChange}
/> />
))}
</div>
)}
{/* Advanced backend fields */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
)}
</div> </div>
)} )}
</div> </div>

View File

@@ -9,7 +9,7 @@ import {
DialogHeader, DialogHeader,
DialogTitle, DialogTitle,
} from "@/components/ui/dialog"; } from "@/components/ui/dialog";
import { type CreateInstanceOptions } from "@/types/instance"; import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
import { backendsApi } from "@/lib/api"; import { backendsApi } from "@/lib/api";
import { toast } from "sonner"; import { toast } from "sonner";
@@ -25,6 +25,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
onParsed, onParsed,
}) => { }) => {
const [command, setCommand] = useState(''); const [command, setCommand] = useState('');
const [backendType, setBackendType] = useState<BackendTypeValue>(BackendType.LLAMA_CPP);
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
@@ -38,18 +39,31 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
setError(null); setError(null);
try { try {
const options = await backendsApi.llamaCpp.parseCommand(command); let options: CreateInstanceOptions;
// Parse based on selected backend type
switch (backendType) {
case BackendType.LLAMA_CPP:
options = await backendsApi.llamaCpp.parseCommand(command);
break;
case BackendType.MLX_LM:
options = await backendsApi.mlx.parseCommand(command);
break;
case BackendType.VLLM:
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
}
onParsed(options); onParsed(options);
onOpenChange(false); onOpenChange(false);
// Reset form
setCommand(''); setCommand('');
setError(null); setError(null);
// Show success toast
toast.success('Command parsed successfully'); toast.success('Command parsed successfully');
} catch (err) { } catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command'; const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
setError(errorMessage); setError(errorMessage);
// Show error toast
toast.error('Failed to parse command', { toast.error('Failed to parse command', {
description: errorMessage description: errorMessage
}); });
@@ -60,31 +74,55 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
const handleOpenChange = (open: boolean) => { const handleOpenChange = (open: boolean) => {
if (!open) { if (!open) {
// Reset form when closing
setCommand(''); setCommand('');
setBackendType(BackendType.LLAMA_CPP);
setError(null); setError(null);
} }
onOpenChange(open); onOpenChange(open);
}; };
const backendPlaceholders: Record<BackendTypeValue, string> = {
[BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
[BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
[BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
};
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
return backendPlaceholders[backendType] || "Enter your command here...";
};
return ( return (
<Dialog open={open} onOpenChange={handleOpenChange}> <Dialog open={open} onOpenChange={handleOpenChange}>
<DialogContent className="sm:max-w-[600px]"> <DialogContent className="sm:max-w-[600px]">
<DialogHeader> <DialogHeader>
<DialogTitle>Parse Llama Server Command</DialogTitle> <DialogTitle>Parse Backend Command</DialogTitle>
<DialogDescription> <DialogDescription>
Paste your llama-server command to automatically populate the form fields Select your backend type and paste the command to automatically populate the form fields
</DialogDescription> </DialogDescription>
</DialogHeader> </DialogHeader>
<div className="space-y-4"> <div className="space-y-4">
<div>
<Label htmlFor="backend-type">Backend Type</Label>
<select
id="backend-type"
value={backendType}
onChange={(e) => setBackendType(e.target.value as BackendTypeValue)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select>
</div>
<div> <div>
<Label htmlFor="command">Command</Label> <Label htmlFor="command">Command</Label>
<textarea <textarea
id="command" id="command"
value={command} value={command}
onChange={(e) => setCommand(e.target.value)} onChange={(e) => setCommand(e.target.value)}
placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096" placeholder={getPlaceholderForBackend(backendType)}
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2" className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
/> />
</div> </div>

View File

@@ -29,7 +29,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<select <select
id={fieldKey} id={fieldKey}
@@ -39,6 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
> >
<option value={BackendType.LLAMA_CPP}>Llama Server</option> <option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option> <option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select> </select>
{config.description && ( {config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p> <p className="text-sm text-muted-foreground">{config.description}</p>
@@ -70,7 +70,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}
@@ -97,7 +96,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}
@@ -124,7 +122,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}

View File

@@ -0,0 +1,62 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface ArrayInputProps {
id: string
label: string
value: string[] | undefined
onChange: (value: string[] | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const ArrayInput: React.FC<ArrayInputProps> = ({
id,
label,
value,
onChange,
placeholder = "item1, item2, item3",
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const arrayValue = inputValue
.split(',')
.map(s => s.trim())
.filter(Boolean)
onChange(arrayValue.length > 0 ? arrayValue : undefined)
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
}
export default ArrayInput

View File

@@ -0,0 +1,42 @@
import React from 'react'
import { Checkbox } from '@/components/ui/checkbox'
import { Label } from '@/components/ui/label'
interface CheckboxInputProps {
id: string
label: string
value: boolean | undefined
onChange: (value: boolean) => void
description?: string
disabled?: boolean
className?: string
}
const CheckboxInput: React.FC<CheckboxInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
return (
<div className={`flex items-center space-x-2 ${className || ''}`}>
<Checkbox
id={id}
checked={value === true}
onCheckedChange={(checked) => onChange(!!checked)}
disabled={disabled}
/>
<Label htmlFor={id} className="text-sm font-normal">
{label}
{description && (
<span className="text-muted-foreground ml-1">- {description}</span>
)}
</Label>
</div>
)
}
export default CheckboxInput

View File

@@ -0,0 +1,60 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface NumberInputProps {
id: string
label: string
value: number | undefined
onChange: (value: number | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const NumberInput: React.FC<NumberInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const numValue = parseFloat(inputValue)
if (!isNaN(numValue)) {
onChange(numValue)
}
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="number"
step="any"
value={value !== undefined ? value : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default NumberInput

View File

@@ -0,0 +1,55 @@
import React from 'react'
import { Label } from '@/components/ui/label'
interface SelectOption {
value: string
label: string
}
interface SelectInputProps {
id: string
label: string
value: string | undefined
onChange: (value: string | undefined) => void
options: SelectOption[]
description?: string
disabled?: boolean
className?: string
}
const SelectInput: React.FC<SelectInputProps> = ({
id,
label,
value,
onChange,
options,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<select
id={id}
value={value || ''}
onChange={(e) => onChange(e.target.value || undefined)}
disabled={disabled}
className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
>
{options.map(option => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))}
</select>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default SelectInput

View File

@@ -0,0 +1,47 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface TextInputProps {
id: string
label: string
value: string | number | undefined
onChange: (value: string | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const TextInput: React.FC<TextInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => onChange(e.target.value || undefined)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default TextInput

View File

@@ -0,0 +1,98 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getAdvancedFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import ArrayInput from '@/components/form/ArrayInput'
interface AdvancedInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AdvancedInstanceFields: React.FC<AdvancedInstanceFieldsProps> = ({
formData,
onChange
}) => {
const advancedFields = getAdvancedFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
case 'array':
return (
<ArrayInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string[] | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out restart options and backend_options (handled separately)
const fieldsToRender = advancedFields.filter(
fieldKey => !['max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
if (fieldsToRender.length === 0) {
return null
}
return (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
{fieldsToRender
.sort()
.map(renderField)}
</div>
)
}
export default AdvancedInstanceFields

View File

@@ -0,0 +1,53 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import CheckboxInput from '@/components/form/CheckboxInput'
import NumberInput from '@/components/form/NumberInput'
interface AutoRestartConfigurationProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
formData,
onChange
}) => {
const isAutoRestartEnabled = formData.auto_restart === true
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Auto Restart Configuration</h3>
<CheckboxInput
id="auto_restart"
label="Auto Restart"
value={formData.auto_restart}
onChange={(value) => onChange('auto_restart', value)}
description="Automatically restart the instance on failure"
/>
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<NumberInput
id="max_restarts"
label="Max Restarts"
value={formData.max_restarts}
onChange={(value) => onChange('max_restarts', value)}
placeholder="3"
description="Maximum number of restart attempts (0 = unlimited)"
/>
<NumberInput
id="restart_delay"
label="Restart Delay (seconds)"
value={formData.restart_delay}
onChange={(value) => onChange('restart_delay', value)}
placeholder="5"
description="Delay in seconds before attempting restart"
/>
</div>
)}
</div>
)
}
export default AutoRestartConfiguration

View File

@@ -0,0 +1,54 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
interface BackendConfigurationProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: any) => void
showAdvanced?: boolean
}
const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
formData,
onBackendFieldChange,
showAdvanced = false
}) => {
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
{/* Advanced backend fields */}
{showAdvanced && advancedBackendFields.length > 0 && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)
}
export default BackendConfiguration

View File

@@ -0,0 +1,99 @@
import React from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import SelectInput from '@/components/form/SelectInput'
interface BasicInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
formData,
onChange
}) => {
const basicFields = getBasicFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
// Special handling for backend_type field
if (fieldKey === 'backend_type') {
return (
<SelectInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] || BackendType.LLAMA_CPP}
onChange={(value) => onChange(fieldKey, value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description={config.description}
/>
)
}
// Render based on field type
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out auto restart fields and backend_options (handled separately)
const fieldsToRender = basicFields.filter(
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{fieldsToRender.map(renderField)}
</div>
)
}
export default BasicInstanceFields

View File

@@ -1,4 +1,5 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1"; const API_BASE = "/api/v1";
@@ -30,25 +31,8 @@ async function apiCall<T>(
headers, headers,
}); });
// Handle authentication errors // Handle errors using centralized error handler
if (response.status === 401) { await handleApiError(response);
throw new Error('Authentication required');
}
if (!response.ok) {
// Try to get error message from response
let errorMessage = `HTTP ${response.status}`;
try {
const errorText = await response.text();
if (errorText) {
errorMessage += `: ${errorText}`;
}
} catch {
// If we can't read the error, just use status
}
throw new Error(errorMessage);
}
// Handle empty responses (like DELETE) // Handle empty responses (like DELETE)
if (response.status === 204) { if (response.status === 204) {
@@ -60,6 +44,14 @@ async function apiCall<T>(
const text = await response.text(); const text = await response.text();
return text as T; return text as T;
} else { } else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
}
const data = await response.json() as T; const data = await response.json() as T;
return data; return data;
} }
@@ -101,6 +93,14 @@ export const backendsApi = {
body: JSON.stringify({ command }), body: JSON.stringify({ command }),
}), }),
}, },
vllm: {
// POST /backends/vllm/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
}; };
// Instance API functions // Instance API functions

View File

@@ -0,0 +1,32 @@
/**
* Parses error response from API calls and returns a formatted error message
*/
export async function parseErrorResponse(response: Response): Promise<string> {
let errorMessage = `HTTP ${response.status}`
try {
const errorText = await response.text()
if (errorText) {
errorMessage += `: ${errorText}`
}
} catch {
// If we can't read the error, just use status
}
return errorMessage
}
/**
* Handles common API call errors and throws appropriate Error objects
*/
export async function handleApiError(response: Response): Promise<void> {
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required')
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
throw new Error(errorMessage)
}
}

View File

@@ -2,13 +2,17 @@ import {
type CreateInstanceOptions, type CreateInstanceOptions,
type LlamaCppBackendOptions, type LlamaCppBackendOptions,
type MlxBackendOptions, type MlxBackendOptions,
type VllmBackendOptions,
LlamaCppBackendOptionsSchema, LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema, MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
getAllFieldKeys, getAllFieldKeys,
getAllLlamaCppFieldKeys, getAllLlamaCppFieldKeys,
getAllMlxFieldKeys, getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType, getLlamaCppFieldType,
getMlxFieldType getMlxFieldType,
getVllmFieldType
} from '@/schemas/instanceOptions' } from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific) // Instance-level basic fields (not backend-specific)
@@ -16,7 +20,6 @@ export const basicFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
placeholder?: string placeholder?: string
required?: boolean
}> = { }> = {
auto_restart: { auto_restart: {
label: 'Auto Restart', label: 'Auto Restart',
@@ -52,13 +55,11 @@ const basicLlamaCppFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
placeholder?: string placeholder?: string
required?: boolean
}> = { }> = {
model: { model: {
label: 'Model Path', label: 'Model Path',
placeholder: '/path/to/model.gguf', placeholder: '/path/to/model.gguf',
description: 'Path to the model file', description: 'Path to the model file'
required: true
}, },
hf_repo: { hf_repo: {
label: 'Hugging Face Repository', label: 'Hugging Face Repository',
@@ -82,13 +83,11 @@ const basicMlxFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
placeholder?: string placeholder?: string
required?: boolean
}> = { }> = {
model: { model: {
label: 'Model', label: 'Model',
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit', placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
description: 'The path to the MLX model weights, tokenizer, and config', description: 'The path to the MLX model weights, tokenizer, and config'
required: true
}, },
temp: { temp: {
label: 'Temperature', label: 'Temperature',
@@ -117,11 +116,46 @@ const basicMlxFieldsConfig: Record<string, {
} }
} }
// vLLM backend-specific basic fields
const basicVllmFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'microsoft/DialoGPT-medium',
description: 'The name or path of the Hugging Face model to use'
},
tensor_parallel_size: {
label: 'Tensor Parallel Size',
placeholder: '1',
description: 'Number of GPUs to use for distributed serving'
},
gpu_memory_utilization: {
label: 'GPU Memory Utilization',
placeholder: '0.9',
description: 'The fraction of GPU memory to be used for the model executor'
}
}
// Backend field configuration lookup
const backendFieldConfigs = {
mlx_lm: basicMlxFieldsConfig,
vllm: basicVllmFieldsConfig,
llama_cpp: basicLlamaCppFieldsConfig,
} as const
const backendFieldGetters = {
mlx_lm: getAllMlxFieldKeys,
vllm: getAllVllmFieldKeys,
llama_cpp: getAllLlamaCppFieldKeys,
} as const
function isBasicField(key: keyof CreateInstanceOptions): boolean { function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig return key in basicFieldsConfig
} }
export function getBasicFields(): (keyof CreateInstanceOptions)[] { export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[] return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
} }
@@ -130,25 +164,18 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key)) return getAllFieldKeys().filter(key => !isBasicField(key))
} }
export function getBasicBackendFields(backendType?: string): string[] { export function getBasicBackendFields(backendType?: string): string[] {
if (backendType === 'mlx_lm') { const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
return Object.keys(basicMlxFieldsConfig) const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
} else if (backendType === 'llama_cpp') { return Object.keys(config)
return Object.keys(basicLlamaCppFieldsConfig)
}
// Default to LlamaCpp for backward compatibility
return Object.keys(basicLlamaCppFieldsConfig)
} }
export function getAdvancedBackendFields(backendType?: string): string[] { export function getAdvancedBackendFields(backendType?: string): string[] {
if (backendType === 'mlx_lm') { const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig)) const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
} else if (backendType === 'llama_cpp') { const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
} return fieldGetter().filter(key => !(key in basicConfig))
// Default to LlamaCpp for backward compatibility
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
} }
// Combined backend fields config for use in BackendFormField // Combined backend fields config for use in BackendFormField
@@ -156,10 +183,10 @@ export const basicBackendFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
placeholder?: string placeholder?: string
required?: boolean
}> = { }> = {
...basicLlamaCppFieldsConfig, ...basicLlamaCppFieldsConfig,
...basicMlxFieldsConfig ...basicMlxFieldsConfig,
...basicVllmFieldsConfig
} }
// Get field type for any backend option (union type) // Get field type for any backend option (union type)
@@ -182,6 +209,15 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
// Schema might not be available // Schema might not be available
} }
// Try vLLM schema
try {
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
return getVllmFieldType(key as keyof VllmBackendOptions)
}
} catch {
// Schema might not be available
}
// Default fallback // Default fallback
return 'text' return 'text'
} }

View File

@@ -0,0 +1,4 @@
// Re-export all backend schemas from one place
export * from './llamacpp'
export * from './mlx'
export * from './vllm'

View File

@@ -0,0 +1,192 @@
import { z } from 'zod'
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Infer the TypeScript type from the schema
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,51 @@
import { z } from 'zod'
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,150 @@
import { z } from 'zod'
// Define the vLLM backend options schema
export const VllmBackendOptionsSchema = z.object({
// Basic connection options (auto-assigned by llamactl)
host: z.string().optional(),
port: z.number().optional(),
// Model and engine configuration
model: z.string().optional(),
tokenizer: z.string().optional(),
skip_tokenizer_init: z.boolean().optional(),
revision: z.string().optional(),
code_revision: z.string().optional(),
tokenizer_revision: z.string().optional(),
tokenizer_mode: z.string().optional(),
trust_remote_code: z.boolean().optional(),
download_dir: z.string().optional(),
load_format: z.string().optional(),
config_format: z.string().optional(),
dtype: z.string().optional(),
kv_cache_dtype: z.string().optional(),
quantization_param_path: z.string().optional(),
seed: z.number().optional(),
max_model_len: z.number().optional(),
guided_decoding_backend: z.string().optional(),
distributed_executor_backend: z.string().optional(),
worker_use_ray: z.boolean().optional(),
ray_workers_use_nsight: z.boolean().optional(),
// Performance and serving configuration
block_size: z.number().optional(),
enable_prefix_caching: z.boolean().optional(),
disable_sliding_window: z.boolean().optional(),
use_v2_block_manager: z.boolean().optional(),
num_lookahead_slots: z.number().optional(),
swap_space: z.number().optional(),
cpu_offload_gb: z.number().optional(),
gpu_memory_utilization: z.number().optional(),
num_gpu_blocks_override: z.number().optional(),
max_num_batched_tokens: z.number().optional(),
max_num_seqs: z.number().optional(),
max_logprobs: z.number().optional(),
disable_log_stats: z.boolean().optional(),
quantization: z.string().optional(),
rope_scaling: z.string().optional(),
rope_theta: z.number().optional(),
enforce_eager: z.boolean().optional(),
max_context_len_to_capture: z.number().optional(),
max_seq_len_to_capture: z.number().optional(),
disable_custom_all_reduce: z.boolean().optional(),
tokenizer_pool_size: z.number().optional(),
tokenizer_pool_type: z.string().optional(),
tokenizer_pool_extra_config: z.string().optional(),
enable_lora_bias: z.boolean().optional(),
lora_extra_vocab_size: z.number().optional(),
lora_rank: z.number().optional(),
prompt_lookback_distance: z.number().optional(),
preemption_mode: z.string().optional(),
// Distributed and parallel processing
tensor_parallel_size: z.number().optional(),
pipeline_parallel_size: z.number().optional(),
max_parallel_loading_workers: z.number().optional(),
disable_async_output_proc: z.boolean().optional(),
worker_class: z.string().optional(),
enabled_lora_modules: z.string().optional(),
max_lora_rank: z.number().optional(),
fully_sharded_loras: z.boolean().optional(),
lora_modules: z.string().optional(),
prompt_adapters: z.string().optional(),
max_prompt_adapter_token: z.number().optional(),
device: z.string().optional(),
scheduler_delay: z.number().optional(),
enable_chunked_prefill: z.boolean().optional(),
speculative_model: z.string().optional(),
speculative_model_quantization: z.string().optional(),
speculative_revision: z.string().optional(),
speculative_max_model_len: z.number().optional(),
speculative_disable_by_batch_size: z.number().optional(),
ngpt_speculative_length: z.number().optional(),
speculative_disable_mqa: z.boolean().optional(),
model_loader_extra_config: z.string().optional(),
ignore_patterns: z.string().optional(),
preloaded_lora_modules: z.string().optional(),
// OpenAI server specific options
uds: z.string().optional(),
uvicorn_log_level: z.string().optional(),
response_role: z.string().optional(),
ssl_keyfile: z.string().optional(),
ssl_certfile: z.string().optional(),
ssl_ca_certs: z.string().optional(),
ssl_cert_reqs: z.number().optional(),
root_path: z.string().optional(),
middleware: z.array(z.string()).optional(),
return_tokens_as_token_ids: z.boolean().optional(),
disable_frontend_multiprocessing: z.boolean().optional(),
enable_auto_tool_choice: z.boolean().optional(),
tool_call_parser: z.string().optional(),
tool_server: z.string().optional(),
chat_template: z.string().optional(),
chat_template_content_format: z.string().optional(),
allow_credentials: z.boolean().optional(),
allowed_origins: z.array(z.string()).optional(),
allowed_methods: z.array(z.string()).optional(),
allowed_headers: z.array(z.string()).optional(),
api_key: z.array(z.string()).optional(),
enable_log_outputs: z.boolean().optional(),
enable_token_usage: z.boolean().optional(),
enable_async_engine_debug: z.boolean().optional(),
engine_use_ray: z.boolean().optional(),
disable_log_requests: z.boolean().optional(),
max_log_len: z.number().optional(),
// Additional engine configuration
task: z.string().optional(),
multi_modal_config: z.string().optional(),
limit_mm_per_prompt: z.string().optional(),
enable_sleep_mode: z.boolean().optional(),
enable_chunking_request: z.boolean().optional(),
compilation_config: z.string().optional(),
disable_sliding_window_mask: z.boolean().optional(),
enable_trtllm_engine_latency: z.boolean().optional(),
override_pooling_config: z.string().optional(),
override_neuron_config: z.string().optional(),
override_kv_cache_align_size: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
// Helper to get all vLLM backend option field keys
export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
}
// Get field type for vLLM backend options
export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = VllmBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -1,206 +1,27 @@
import { BackendType } from '@/types/instance' import { BackendType } from '@/types/instance'
import { z } from 'zod' import { z } from 'zod'
// Define the LlamaCpp backend options schema // Import backend schemas from separate files
export const LlamaCppBackendOptionsSchema = z.object({ import {
// Common params LlamaCppBackendOptionsSchema,
verbose_prompt: z.boolean().optional(), type LlamaCppBackendOptions,
threads: z.number().optional(), getAllLlamaCppFieldKeys,
threads_batch: z.number().optional(), getLlamaCppFieldType,
cpu_mask: z.string().optional(), MlxBackendOptionsSchema,
cpu_range: z.string().optional(), type MlxBackendOptions,
cpu_strict: z.number().optional(), getAllMlxFieldKeys,
prio: z.number().optional(), getMlxFieldType,
poll: z.number().optional(), VllmBackendOptionsSchema,
cpu_mask_batch: z.string().optional(), type VllmBackendOptions,
cpu_range_batch: z.string().optional(), getAllVllmFieldKeys,
cpu_strict_batch: z.number().optional(), getVllmFieldType
prio_batch: z.number().optional(), } from './backends'
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Backend options union // Backend options union
export const BackendOptionsSchema = z.union([ export const BackendOptionsSchema = z.union([
LlamaCppBackendOptionsSchema, LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema, MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
]) ])
// Define the main create instance options schema // Define the main create instance options schema
@@ -213,13 +34,27 @@ export const CreateInstanceOptionsSchema = z.object({
on_demand_start: z.boolean().optional(), on_demand_start: z.boolean().optional(),
// Backend configuration // Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(), backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(), backend_options: BackendOptionsSchema.optional(),
}) })
// Re-export types and schemas from backend files
export {
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
}
// Infer the TypeScript types from the schemas // Infer the TypeScript types from the schemas
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
export type BackendOptions = z.infer<typeof BackendOptionsSchema> export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema> export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
@@ -228,16 +63,6 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[] return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
} }
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type from Zod schema // Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' { export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key] const fieldSchema = CreateInstanceOptionsSchema.shape[key]
@@ -252,32 +77,3 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
if (innerSchema instanceof z.ZodObject) return 'object' if (innerSchema instanceof z.ZodObject) return 'object'
return 'text' // ZodString and others default to text return 'text' // ZodString and others default to text
} }
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -5,6 +5,7 @@ export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = { export const BackendType = {
LLAMA_CPP: 'llama_cpp', LLAMA_CPP: 'llama_cpp',
MLX_LM: 'mlx_lm', MLX_LM: 'mlx_lm',
VLLM: 'vllm',
// MLX_VLM: 'mlx_vlm', // Future expansion // MLX_VLM: 'mlx_vlm', // Future expansion
} as const } as const