Merge pull request #34 from lordmathis/feat/vllm-backend

feat: Implement vLLM backend
This commit is contained in:
2025-09-22 21:58:19 +02:00
committed by GitHub
53 changed files with 3078 additions and 2968 deletions

View File

@@ -13,7 +13,7 @@
### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -31,6 +31,7 @@
# 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -47,7 +48,7 @@ llamactl
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Choose backend type (llama.cpp or MLX)
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options
5. Start or stop the instance
@@ -63,6 +64,11 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \
@@ -121,6 +127,21 @@ source mlx-env/bin/activate
pip install mlx-lm
```
**For vLLM backend:**
You need vLLM installed:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Configuration
llamactl works out of the box with sensible defaults.
@@ -135,6 +156,7 @@ server:
backends:
llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
vllm_executable: vllm # Path to vllm executable
instances:
port_range: [8000, 9000] # Port range for instances

View File

@@ -19,6 +19,159 @@ const docTemplate = `{
"host": "{{.Host}}",
"basePath": "{{.BasePath}}",
"paths": {
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": {
"get": {
"security": [
@@ -681,522 +834,46 @@ const docTemplate = `{
}
},
"definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"batch_size": {
"type": "integer"
"backend_options": {
"type": "object",
"additionalProperties": {}
},
"cache_reuse": {
"type": "integer"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
"backend_type": {
"$ref": "#/definitions/backends.BackendType"
},
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay": {
"type": "integer"
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"description": "seconds",
"type": "integer"
}
}
@@ -1264,6 +941,14 @@ const docTemplate = `{
"type": "string"
}
}
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
}
}
}`

View File

@@ -12,6 +12,159 @@
},
"basePath": "/api/v1",
"paths": {
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": {
"get": {
"security": [
@@ -674,522 +827,46 @@
}
},
"definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"batch_size": {
"type": "integer"
"backend_options": {
"type": "object",
"additionalProperties": {}
},
"cache_reuse": {
"type": "integer"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
"backend_type": {
"$ref": "#/definitions/backends.BackendType"
},
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay": {
"type": "integer"
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"description": "seconds",
"type": "integer"
}
}
@@ -1257,6 +934,14 @@
"type": "string"
}
}
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
}
}
}

View File

@@ -1,352 +1,35 @@
basePath: /api/v1
definitions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions:
properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart:
description: Auto restart
type: boolean
batch_size:
type: integer
cache_reuse:
type: integer
cache_type_k:
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
backend_options:
additionalProperties: {}
type: object
backend_type:
$ref: '#/definitions/backends.BackendType'
idle_timeout:
description: Idle timeout
type: integer
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
type: integer
max_restarts:
type: integer
metrics:
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
on_demand_start:
description: On demand start
type: boolean
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
prio:
type: integer
prio_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay:
type: integer
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temp:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
description: seconds
type: integer
type: object
instance.InstanceStatus:
@@ -391,6 +74,11 @@ definitions:
object:
type: string
type: object
server.ParseCommandRequest:
properties:
command:
type: string
type: object
info:
contact: {}
description: llamactl is a control server for managing Llama Server instances.
@@ -400,6 +88,102 @@ info:
title: llamactl API
version: "1.0"
paths:
/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
description: Parses a llama-server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
"500":
description: Internal Server Error
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/mlx/parse-command:
post:
consumes:
- application/json
description: Parses MLX-LM server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
post:
consumes:
- application/json
description: Parses a vLLM serve command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances:
get:
description: Returns a list of all instances managed by the server

View File

@@ -22,6 +22,7 @@ server:
backends:
llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
vllm_executable: vllm # Path to vllm executable
instances:
port_range: [8000, 9000] # Port range for instances
@@ -94,11 +95,13 @@ server:
backends:
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
vllm_executable: "vllm" # Path to vllm executable (default: "vllm")
```
**Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
### Instance Configuration

View File

@@ -37,6 +37,22 @@ pip install mlx-lm
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods
### Option 1: Download Binary (Recommended)

View File

@@ -29,8 +29,9 @@ You should see the Llamactl web interface.
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Model Path**: Path to your Llama.cpp model file
- **Additional Options**: Any extra Llama.cpp parameters
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance"
@@ -43,17 +44,46 @@ Once created, you can:
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configuration
## Example Configurations
Here's a basic example configuration for a Llama 2 model:
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"model_path": "/path/to/llama-2-7b-chat.gguf",
"options": {
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"context_size": 2048
"ctx_size": 2048,
"gpu_layers": 32
}
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
}
}
```
@@ -66,12 +96,14 @@ You can also manage instances via the REST API:
# List all instances
curl http://localhost:8080/api/instances
# Create a new instance
curl -X POST http://localhost:8080/api/instances \
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"name": "my-model",
"model_path": "/path/to/model.gguf",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance

View File

@@ -170,7 +170,7 @@ POST /api/v1/instances/{name}/start
```json
{
"name": "llama2-7b",
"status": "starting",
"status": "running",
"created": 1705312200
}
```
@@ -191,7 +191,7 @@ POST /api/v1/instances/{name}/stop
```json
{
"name": "llama2-7b",
"status": "stopping",
"status": "stopped",
"created": 1705312200
}
```
@@ -208,7 +208,7 @@ POST /api/v1/instances/{name}/restart
```json
{
"name": "llama2-7b",
"status": "restarting",
"status": "running",
"created": 1705312200
}
```
@@ -401,6 +401,102 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
}'
```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:

View File

@@ -1,6 +1,6 @@
# Managing Instances
Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.
Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview
@@ -42,9 +42,11 @@ Each instance is displayed as a card showing:
3. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
@@ -54,6 +56,7 @@ Each instance is displayed as a card showing:
6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance
### Via API
@@ -87,6 +90,20 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
"max_restarts": 3
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
@@ -179,16 +196,17 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health

View File

@@ -5,5 +5,6 @@ type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
)

70
pkg/backends/builder.go Normal file
View File

@@ -0,0 +1,70 @@
package backends
import (
"reflect"
"strconv"
"strings"
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
var args []string
v := reflect.ValueOf(options).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
if !field.CanInterface() {
continue
}
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
} else {
// Comma-separated: --flag value1,value2
var values []string
for j := 0; j < field.Len(); j++ {
values = append(values, field.Index(j).String())
}
args = append(args, "--"+flagName, strings.Join(values, ","))
}
}
}
}
return args
}

View File

@@ -2,9 +2,9 @@ package llamacpp
import (
"encoding/json"
"llamactl/pkg/backends"
"reflect"
"strconv"
"strings"
)
type LlamaServerOptions struct {
@@ -315,62 +315,44 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string
// Llama uses multiple flags for arrays by default (not comma-separated)
multipleFlags := map[string]bool{
"override-tensor": true,
"override-kv": true,
"lora": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
return backends.BuildCommandArgs(o, multipleFlags)
}
v := reflect.ValueOf(o).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
// Skip unexported fields
if !field.CanInterface() {
continue
}
// Get the JSON tag to determine the flag name
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Remove ",omitempty" from the tag
flagName := jsonTag
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
flagName = jsonTag[:commaIndex]
}
// Convert snake_case to kebab-case for CLI flags
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
multiValuedFlags := map[string]bool{
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
}
return args
var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
return nil, err
}
return &llamaOptions, nil
}

View File

@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
}
}
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)

View File

@@ -1,286 +0,0 @@
package llamacpp
import (
"encoding/json"
"errors"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
// 1. Normalize the command - handle multiline with backslashes
trimmed := normalizeMultilineCommand(command)
if trimmed == "" {
return nil, fmt.Errorf("command cannot be empty")
}
// 2. Extract arguments from command
args, err := extractArgumentsFromCommand(trimmed)
if err != nil {
return nil, err
}
// 3. Parse arguments into map
options := make(map[string]any)
// Known multi-valued flags (snake_case form)
multiValued := map[string]struct{}{
"override_tensor": {},
"override_kv": {},
"lora": {},
"lora_scaled": {},
"control_vector": {},
"control_vector_scaled": {},
"dry_sequence_breaker": {},
"logit_bias": {},
}
i := 0
for i < len(args) {
arg := args[i]
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
i++
continue
}
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Unified parsing for --flag=value vs --flag value
var rawFlag, rawValue string
hasEquals := false
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
rawFlag = parts[0]
rawValue = parts[1] // may be empty string
hasEquals = true
} else {
rawFlag = arg
}
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
flagName := strings.ReplaceAll(flagCore, "-", "_")
// Detect value if not in equals form
valueProvided := hasEquals
if !hasEquals {
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
rawValue = args[i+1]
valueProvided = true
}
}
// Determine if multi-valued flag
_, isMulti := multiValued[flagName]
// Normalization helper: ensure slice for multi-valued flags
appendValue := func(valStr string) {
if existing, ok := options[flagName]; ok {
// Existing value; ensure slice semantics for multi-valued flags or repeated occurrences
if slice, ok := existing.([]string); ok {
options[flagName] = append(slice, valStr)
return
}
// Convert scalar to slice
options[flagName] = []string{fmt.Sprintf("%v", existing), valStr}
return
}
// First value
if isMulti {
options[flagName] = []string{valStr}
} else {
// We'll parse type below for single-valued flags
options[flagName] = valStr
}
}
if valueProvided {
// Use raw token for multi-valued flags; else allow typed parsing
appendValue(rawValue)
if !isMulti { // convert to typed value if scalar
if strVal, ok := options[flagName].(string); ok { // still scalar
options[flagName] = parseValue(strVal)
}
}
// Advance index: if we consumed a following token as value (non equals form), skip it
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
i += 2
} else {
i++
}
continue
}
// Boolean flag (no value)
options[flagName] = true
i++
}
// 4. Convert to LlamaServerOptions using existing UnmarshalJSON
jsonData, err := json.Marshal(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
}
var llamaOptions LlamaServerOptions
if err := json.Unmarshal(jsonData, &llamaOptions); err != nil {
return nil, fmt.Errorf("failed to parse command options: %w", err)
}
// 5. Return LlamaServerOptions
return &llamaOptions, nil
}
// parseValue attempts to parse a string value into the most appropriate type
func parseValue(value string) any {
// Surrounding matching quotes (single or double)
if l := len(value); l >= 2 {
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
value = value[1 : l-1]
}
}
lower := strings.ToLower(value)
if lower == "true" {
return true
}
if lower == "false" {
return false
}
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
return value
}
// normalizeMultilineCommand handles multiline commands with backslashes
func normalizeMultilineCommand(command string) string {
// Handle escaped newlines (backslash followed by newline)
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
// Clean up extra whitespace
re = regexp.MustCompile(`\s+`)
normalized = re.ReplaceAllString(normalized, " ")
return strings.TrimSpace(normalized)
}
// extractArgumentsFromCommand extracts arguments from various command formats
func extractArgumentsFromCommand(command string) ([]string, error) {
// Split command into tokens respecting quotes
tokens, err := splitCommandTokens(command)
if err != nil {
return nil, err
}
if len(tokens) == 0 {
return nil, fmt.Errorf("no command tokens found")
}
// Check if first token looks like an executable
firstToken := tokens[0]
// Case 1: Full path to executable (contains path separator or ends with llama-server)
if strings.Contains(firstToken, string(filepath.Separator)) ||
strings.HasSuffix(filepath.Base(firstToken), "llama-server") {
return tokens[1:], nil // Return everything except the executable
}
// Case 2: Just "llama-server" command
if strings.ToLower(firstToken) == "llama-server" {
return tokens[1:], nil // Return everything except the command
}
// Case 3: Arguments only (starts with a flag)
if strings.HasPrefix(firstToken, "-") {
return tokens, nil // Return all tokens as arguments
}
// Case 4: Unknown format - might be a different executable name
// Be permissive and assume it's the executable
return tokens[1:], nil
}
// splitCommandTokens splits a command string into tokens, respecting quotes
func splitCommandTokens(command string) ([]string, error) {
var tokens []string
var current strings.Builder
inQuotes := false
quoteChar := byte(0)
escaped := false
for i := 0; i < len(command); i++ {
c := command[i]
if escaped {
current.WriteByte(c)
escaped = false
continue
}
if c == '\\' {
escaped = true
current.WriteByte(c)
continue
}
if !inQuotes && (c == '"' || c == '\'') {
inQuotes = true
quoteChar = c
current.WriteByte(c)
} else if inQuotes && c == quoteChar {
inQuotes = false
quoteChar = 0
current.WriteByte(c)
} else if !inQuotes && (c == ' ' || c == '\t') {
if current.Len() > 0 {
tokens = append(tokens, current.String())
current.Reset()
}
} else {
current.WriteByte(c)
}
}
if inQuotes {
return nil, errors.New("unterminated quoted string")
}
if current.Len() > 0 {
tokens = append(tokens, current.String())
}
return tokens, nil
}
// isFlag determines if a string is a command line flag or a value
// Handles the special case where negative numbers should be treated as values, not flags
func isFlag(arg string) bool {
if !strings.HasPrefix(arg, "-") {
return false
}
// Special case: if it's a negative number, treat it as a value
if _, err := strconv.ParseFloat(arg, 64); err == nil {
return false
}
return true
}

View File

@@ -1,413 +0,0 @@
package llamacpp
import (
"testing"
)
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command with model",
command: "llama-server --model /path/to/model.gguf",
expectErr: false,
},
{
name: "command with multiple flags",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
expectErr: false,
},
{
name: "command with short flags",
command: "llama-server -m /path/to/model.gguf -ngl 32 -c 4096",
expectErr: false,
},
{
name: "command with equals format",
command: "llama-server --model=/path/to/model.gguf --gpu-layers=32",
expectErr: false,
},
{
name: "command with boolean flags",
command: "llama-server --model /path/to/model.gguf --verbose --no-mmap",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "case insensitive command",
command: "LLAMA-SERVER --model /path/to/model.gguf",
expectErr: false,
},
// New test cases for improved functionality
{
name: "args only without llama-server",
command: "--model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "full path to executable",
command: "/usr/local/bin/llama-server --model /path/to/model.gguf",
expectErr: false,
},
{
name: "negative number handling",
command: "llama-server --gpu-layers -1 --model test.gguf",
expectErr: false,
},
{
name: "multiline command with backslashes",
command: "llama-server --model /path/to/model.gguf \\\n --ctx-size 4096 \\\n --batch-size 512",
expectErr: false,
},
{
name: "quoted string with special characters",
command: `llama-server --model test.gguf --chat-template "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"`,
expectErr: false,
},
{
name: "unterminated quoted string",
command: `llama-server --model test.gguf --chat-template "unterminated quote`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
})
}
}
func TestParseLlamaCommandSpecificValues(t *testing.T) {
// Test specific value parsing
command := "llama-server --model /test/model.gguf --gpu-layers 32 --ctx-size 4096 --verbose"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
if !result.Verbose {
t.Errorf("expected verbose to be true, got %v", result.Verbose)
}
}
func TestParseLlamaCommandArrayFlags(t *testing.T) {
// Test array flag handling (critical for lora, override-tensor, etc.)
command := "llama-server --model test.gguf --lora adapter1.bin --lora adapter2.bin"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
if result.Lora[0] != "adapter1.bin" || result.Lora[1] != "adapter2.bin" {
t.Errorf("expected lora adapters [adapter1.bin, adapter2.bin], got %v", result.Lora)
}
}
func TestParseLlamaCommandMixedFormats(t *testing.T) {
// Test mixing --flag=value and --flag value formats
command := "llama-server --model=/path/model.gguf --gpu-layers 16 --batch-size=512 --verbose"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/path/model.gguf" {
t.Errorf("expected model '/path/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
}
if result.BatchSize != 512 {
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
}
if !result.Verbose {
t.Errorf("expected verbose to be true, got %v", result.Verbose)
}
}
func TestParseLlamaCommandTypeConversion(t *testing.T) {
// Test that values are converted to appropriate types
command := "llama-server --model test.gguf --temp 0.7 --top-k 40 --no-mmap"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if result.TopK != 40 {
t.Errorf("expected top_k 40, got %d", result.TopK)
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true, got %v", result.NoMmap)
}
}
func TestParseLlamaCommandArgsOnly(t *testing.T) {
// Test parsing arguments without llama-server command
command := "--model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
}
func TestParseLlamaCommandFullPath(t *testing.T) {
// Test full path to executable
command := "/usr/local/bin/llama-server --model test.gguf --gpu-layers 16"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test.gguf" {
t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
}
if result.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
}
}
func TestParseLlamaCommandNegativeNumbers(t *testing.T) {
// Test negative number parsing
command := "llama-server --model test.gguf --gpu-layers -1 --seed -12345"
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.GPULayers != -1 {
t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
}
if result.Seed != -12345 {
t.Errorf("expected seed -12345, got %d", result.Seed)
}
}
func TestParseLlamaCommandMultiline(t *testing.T) {
// Test multiline command with backslashes
command := `llama-server --model /path/to/model.gguf \
--ctx-size 4096 \
--batch-size 512 \
--gpu-layers 32`
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
if result.BatchSize != 512 {
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
}
func TestParseLlamaCommandQuotedStrings(t *testing.T) {
// Test quoted strings with special characters
command := `llama-server --model test.gguf --api-key "sk-1234567890abcdef" --chat-template "User: {user}\nAssistant: "`
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test.gguf" {
t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
}
if result.APIKey != "sk-1234567890abcdef" {
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
}
expectedTemplate := "User: {user}\\nAssistant: "
if result.ChatTemplate != expectedTemplate {
t.Errorf("expected chat_template '%s', got '%s'", expectedTemplate, result.ChatTemplate)
}
}
func TestParseLlamaCommandUnslothExample(t *testing.T) {
// Test with realistic unsloth-style command
command := `llama-server --model /path/to/model.gguf \
--ctx-size 4096 \
--batch-size 512 \
--gpu-layers -1 \
--temp 0.7 \
--repeat-penalty 1.1 \
--top-k 40 \
--top-p 0.95 \
--host 0.0.0.0 \
--port 8000 \
--api-key "sk-1234567890abcdef"`
result, err := ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
// Verify key fields
if result.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
}
if result.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
}
if result.BatchSize != 512 {
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
}
if result.GPULayers != -1 {
t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if result.RepeatPenalty != 1.1 {
t.Errorf("expected repeat_penalty 1.1, got %f", result.RepeatPenalty)
}
if result.TopK != 40 {
t.Errorf("expected top_k 40, got %d", result.TopK)
}
if result.TopP != 0.95 {
t.Errorf("expected top_p 0.95, got %f", result.TopP)
}
if result.Host != "0.0.0.0" {
t.Errorf("expected host '0.0.0.0', got '%s'", result.Host)
}
if result.Port != 8000 {
t.Errorf("expected port 8000, got %d", result.Port)
}
if result.APIKey != "sk-1234567890abcdef" {
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
}
}
// Focused additional edge case tests (kept minimal per guidance)
func TestParseLlamaCommandSingleQuotedValue(t *testing.T) {
cmd := "llama-server --model 'my model.gguf' --alias 'Test Alias'"
result, err := ParseLlamaCommand(cmd)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "my model.gguf" {
t.Errorf("expected model 'my model.gguf', got '%s'", result.Model)
}
if result.Alias != "Test Alias" {
t.Errorf("expected alias 'Test Alias', got '%s'", result.Alias)
}
}
func TestParseLlamaCommandMixedArrayForms(t *testing.T) {
// Same multi-value flag using --flag value and --flag=value forms
cmd := "llama-server --lora adapter1.bin --lora=adapter2.bin --lora adapter3.bin"
result, err := ParseLlamaCommand(cmd)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 3 {
t.Fatalf("expected 3 lora values, got %d (%v)", len(result.Lora), result.Lora)
}
expected := []string{"adapter1.bin", "adapter2.bin", "adapter3.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
func TestParseLlamaCommandMalformedFlag(t *testing.T) {
cmd := "llama-server ---model test.gguf"
_, err := ParseLlamaCommand(cmd)
if err == nil {
t.Fatalf("expected error for malformed flag but got none")
}
}

View File

@@ -1,16 +1,14 @@
package mlx
import (
"encoding/json"
"reflect"
"strconv"
"llamactl/pkg/backends"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
@@ -19,187 +17,40 @@ type MlxServerOptions struct {
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"` // Note: MLX uses "temp" not "temperature"
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to handle multiple field names
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions MlxServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = MlxServerOptions(temp)
// Handle alternative field names
fieldMappings := map[string]string{
// Basic connection options
"m": "model",
"host": "host",
"port": "port",
// "python_path": "python_path", // removed
// Model and adapter options
"adapter-path": "adapter_path",
"draft-model": "draft_model",
"num-draft-tokens": "num_draft_tokens",
"trust-remote-code": "trust_remote_code",
// Logging and templates
"log-level": "log_level",
"chat-template": "chat_template",
"use-default-chat-template": "use_default_chat_template",
"chat-template-args": "chat_template_args",
// Sampling defaults
"temperature": "temp", // Support both temp and temperature
"top-p": "top_p",
"top-k": "top_k",
"min-p": "min_p",
"max-tokens": "max_tokens",
}
// Process alternative field names
for altName, canonicalName := range fieldMappings {
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
field := v.FieldByNameFunc(func(fieldName string) bool {
field, _ := v.Type().FieldByName(fieldName)
jsonTag := field.Tag.Get("json")
return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
}
}
}
return nil
}
// NewMlxServerOptions creates MlxServerOptions with MLX defaults
func NewMlxServerOptions() *MlxServerOptions {
return &MlxServerOptions{
Host: "127.0.0.1", // MLX default (different from llama-server)
Port: 8080, // MLX default
NumDraftTokens: 3, // MLX default for speculative decoding
LogLevel: "INFO", // MLX default
Temp: 0.0, // MLX default
TopP: 1.0, // MLX default
TopK: 0, // MLX default (disabled)
MinP: 0.0, // MLX default (disabled)
MaxTokens: 512, // MLX default
ChatTemplateArgs: "{}", // MLX default (empty JSON object)
}
Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
var args []string
// Required and basic options
if o.Model != "" {
args = append(args, "--model", o.Model)
}
if o.Host != "" {
args = append(args, "--host", o.Host)
}
if o.Port != 0 {
args = append(args, "--port", strconv.Itoa(o.Port))
}
// Model and adapter options
if o.AdapterPath != "" {
args = append(args, "--adapter-path", o.AdapterPath)
}
if o.DraftModel != "" {
args = append(args, "--draft-model", o.DraftModel)
}
if o.NumDraftTokens != 0 {
args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
}
if o.TrustRemoteCode {
args = append(args, "--trust-remote-code")
}
// Logging and templates
if o.LogLevel != "" {
args = append(args, "--log-level", o.LogLevel)
}
if o.ChatTemplate != "" {
args = append(args, "--chat-template", o.ChatTemplate)
}
if o.UseDefaultChatTemplate {
args = append(args, "--use-default-chat-template")
}
if o.ChatTemplateArgs != "" {
args = append(args, "--chat-template-args", o.ChatTemplateArgs)
}
// Sampling defaults
if o.Temp != 0 {
args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
}
if o.TopP != 0 {
args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
}
if o.TopK != 0 {
args = append(args, "--top-k", strconv.Itoa(o.TopK))
}
if o.MinP != 0 {
args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
}
if o.MaxTokens != 0 {
args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
}
return args
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
}
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}
return &mlxOptions, nil
}

View File

@@ -0,0 +1,157 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

View File

@@ -1,254 +0,0 @@
package mlx
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
// 1. Normalize the command - handle multiline with backslashes
trimmed := normalizeMultilineCommand(command)
if trimmed == "" {
return nil, fmt.Errorf("command cannot be empty")
}
// 2. Extract arguments from command
args, err := extractArgumentsFromCommand(trimmed)
if err != nil {
return nil, err
}
// 3. Parse arguments into map
options := make(map[string]any)
i := 0
for i < len(args) {
arg := args[i]
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
i++
continue
}
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Unified parsing for --flag=value vs --flag value
var rawFlag, rawValue string
hasEquals := false
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
rawFlag = parts[0]
rawValue = parts[1] // may be empty string
hasEquals = true
} else {
rawFlag = arg
}
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
flagName := strings.ReplaceAll(flagCore, "-", "_")
// Detect value if not in equals form
valueProvided := hasEquals
if !hasEquals {
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
rawValue = args[i+1]
valueProvided = true
}
}
if valueProvided {
// MLX-specific validation for certain flags
if flagName == "log_level" && !isValidLogLevel(rawValue) {
return nil, fmt.Errorf("invalid log level: %s", rawValue)
}
options[flagName] = parseValue(rawValue)
// Advance index: if we consumed a following token as value (non equals form), skip it
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
i += 2
} else {
i++
}
continue
}
// Boolean flag (no value) - MLX specific boolean flags
if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
options[flagName] = true
} else {
options[flagName] = true
}
i++
}
// 4. Convert to MlxServerOptions using existing UnmarshalJSON
jsonData, err := json.Marshal(options)
if err != nil {
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
}
var mlxOptions MlxServerOptions
if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
return nil, fmt.Errorf("failed to parse command options: %w", err)
}
// 5. Return MlxServerOptions
return &mlxOptions, nil
}
// isValidLogLevel validates MLX log levels
func isValidLogLevel(level string) bool {
validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
for _, valid := range validLevels {
if level == valid {
return true
}
}
return false
}
// parseValue attempts to parse a string value into the most appropriate type
func parseValue(value string) any {
// Surrounding matching quotes (single or double)
if l := len(value); l >= 2 {
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
value = value[1 : l-1]
}
}
lower := strings.ToLower(value)
if lower == "true" {
return true
}
if lower == "false" {
return false
}
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
return value
}
// normalizeMultilineCommand handles multiline commands with backslashes
func normalizeMultilineCommand(command string) string {
// Handle escaped newlines (backslash followed by newline)
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
// Clean up extra whitespace
re = regexp.MustCompile(`\s+`)
normalized = re.ReplaceAllString(normalized, " ")
return strings.TrimSpace(normalized)
}
// extractArgumentsFromCommand extracts arguments from various command formats
func extractArgumentsFromCommand(command string) ([]string, error) {
// Split command into tokens respecting quotes
tokens, err := splitCommandTokens(command)
if err != nil {
return nil, err
}
if len(tokens) == 0 {
return nil, fmt.Errorf("no command tokens found")
}
// Check if first token looks like an executable
firstToken := tokens[0]
// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
if strings.Contains(firstToken, string(filepath.Separator)) ||
strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
return tokens[1:], nil // Return everything except the executable
}
// Case 2: Just "mlx_lm.server" command
if strings.ToLower(firstToken) == "mlx_lm.server" {
return tokens[1:], nil // Return everything except the command
}
// Case 3: Arguments only (starts with a flag)
if strings.HasPrefix(firstToken, "-") {
return tokens, nil // Return all tokens as arguments
}
// Case 4: Unknown format - might be a different executable name
// Be permissive and assume it's the executable
return tokens[1:], nil
}
// splitCommandTokens splits a command string into tokens, respecting quotes
func splitCommandTokens(command string) ([]string, error) {
var tokens []string
var current strings.Builder
inQuotes := false
quoteChar := byte(0)
escaped := false
for i := 0; i < len(command); i++ {
c := command[i]
if escaped {
current.WriteByte(c)
escaped = false
continue
}
if c == '\\' {
escaped = true
current.WriteByte(c)
continue
}
if !inQuotes && (c == '"' || c == '\'') {
inQuotes = true
quoteChar = c
current.WriteByte(c)
} else if inQuotes && c == quoteChar {
inQuotes = false
quoteChar = 0
current.WriteByte(c)
} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
if current.Len() > 0 {
tokens = append(tokens, current.String())
current.Reset()
}
} else {
current.WriteByte(c)
}
}
if inQuotes {
return nil, fmt.Errorf("unclosed quote in command")
}
if current.Len() > 0 {
tokens = append(tokens, current.String())
}
return tokens, nil
}
// isFlag checks if a string looks like a command line flag
func isFlag(s string) bool {
return strings.HasPrefix(s, "-")
}

213
pkg/backends/parser.go Normal file
View File

@@ -0,0 +1,213 @@
package backends
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
return fmt.Errorf("command cannot be empty")
}
// Extract arguments and positional model
args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
if err != nil {
return err
}
// Parse flags into map
options, err := parseFlags(args, multiValuedFlags)
if err != nil {
return err
}
// If we found a positional model and no --model flag was provided, set the model
if modelFromPositional != "" {
if _, hasModelFlag := options["model"]; !hasModelFlag {
options["model"] = modelFromPositional
}
}
// Convert to target struct via JSON
jsonData, err := json.Marshal(options)
if err != nil {
return fmt.Errorf("failed to marshal options: %w", err)
}
if err := json.Unmarshal(jsonData, target); err != nil {
return fmt.Errorf("failed to unmarshal to target: %w", err)
}
return nil
}
// normalizeCommand handles multiline commands with backslashes
func normalizeCommand(command string) string {
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
re = regexp.MustCompile(`\s+`)
return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
}
// extractArgs extracts arguments from command, removing executable and subcommands
// Returns: args, modelFromPositional, error
func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
// Check for unterminated quotes
if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
return nil, "", fmt.Errorf("unterminated quoted string")
}
tokens := strings.Fields(command)
if len(tokens) == 0 {
return nil, "", fmt.Errorf("no tokens found")
}
// Skip executable
start := 0
firstToken := tokens[0]
// Check for executable name (with or without path)
if strings.Contains(firstToken, string(filepath.Separator)) {
baseName := filepath.Base(firstToken)
for _, execName := range executableNames {
if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
start = 1
break
}
}
} else {
for _, execName := range executableNames {
if strings.EqualFold(firstToken, execName) {
start = 1
break
}
}
}
// Skip subcommand if present
if start < len(tokens) {
for _, subCmd := range subcommandNames {
if strings.EqualFold(tokens[start], subCmd) {
start++
break
}
}
}
// Handle case where command starts with subcommand (no executable)
if start == 0 {
for _, subCmd := range subcommandNames {
if strings.EqualFold(firstToken, subCmd) {
start = 1
break
}
}
}
args := tokens[start:]
// Extract first positional argument (model) if present and not a flag
var modelFromPositional string
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
modelFromPositional = args[0]
args = args[1:] // Remove the model from args to process remaining flags
}
return args, modelFromPositional, nil
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
arg := args[i]
if !strings.HasPrefix(arg, "-") {
continue
}
// Check for malformed flags (more than two leading dashes)
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Get flag name and value
var flagName, value string
var hasValue bool
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
flagName = strings.TrimLeft(parts[0], "-")
value = parts[1]
hasValue = true
} else {
flagName = strings.TrimLeft(arg, "-")
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
value = args[i+1]
hasValue = true
i++ // Skip next arg since we consumed it
}
}
// Convert kebab-case to snake_case for JSON
flagName = strings.ReplaceAll(flagName, "-", "_")
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {
options[flagName] = []string{value}
}
} else {
options[flagName] = parseValue(value)
}
} else {
// Boolean flag
options[flagName] = true
}
}
return options, nil
}
// parseValue converts string to appropriate type
func parseValue(value string) any {
// Remove quotes
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
// Try boolean
switch strings.ToLower(value) {
case "true":
return true
case "false":
return false
}
// Try integer
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
// Try float
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
// Return as string
return value
}

189
pkg/backends/vllm/vllm.go Normal file
View File

@@ -0,0 +1,189 @@
package vllm
import (
"llamactl/pkg/backends"
)
type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and engine configuration
Model string `json:"model,omitempty"`
Tokenizer string `json:"tokenizer,omitempty"`
SkipTokenizerInit bool `json:"skip_tokenizer_init,omitempty"`
Revision string `json:"revision,omitempty"`
CodeRevision string `json:"code_revision,omitempty"`
TokenizerRevision string `json:"tokenizer_revision,omitempty"`
TokenizerMode string `json:"tokenizer_mode,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
DownloadDir string `json:"download_dir,omitempty"`
LoadFormat string `json:"load_format,omitempty"`
ConfigFormat string `json:"config_format,omitempty"`
Dtype string `json:"dtype,omitempty"`
KVCacheDtype string `json:"kv_cache_dtype,omitempty"`
QuantizationParamPath string `json:"quantization_param_path,omitempty"`
Seed int `json:"seed,omitempty"`
MaxModelLen int `json:"max_model_len,omitempty"`
GuidedDecodingBackend string `json:"guided_decoding_backend,omitempty"`
DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
WorkerUseRay bool `json:"worker_use_ray,omitempty"`
RayWorkersUseNSight bool `json:"ray_workers_use_nsight,omitempty"`
// Performance and serving configuration
BlockSize int `json:"block_size,omitempty"`
EnablePrefixCaching bool `json:"enable_prefix_caching,omitempty"`
DisableSlidingWindow bool `json:"disable_sliding_window,omitempty"`
UseV2BlockManager bool `json:"use_v2_block_manager,omitempty"`
NumLookaheadSlots int `json:"num_lookahead_slots,omitempty"`
SwapSpace int `json:"swap_space,omitempty"`
CPUOffloadGB int `json:"cpu_offload_gb,omitempty"`
GPUMemoryUtilization float64 `json:"gpu_memory_utilization,omitempty"`
NumGPUBlocksOverride int `json:"num_gpu_blocks_override,omitempty"`
MaxNumBatchedTokens int `json:"max_num_batched_tokens,omitempty"`
MaxNumSeqs int `json:"max_num_seqs,omitempty"`
MaxLogprobs int `json:"max_logprobs,omitempty"`
DisableLogStats bool `json:"disable_log_stats,omitempty"`
Quantization string `json:"quantization,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeTheta float64 `json:"rope_theta,omitempty"`
EnforceEager bool `json:"enforce_eager,omitempty"`
MaxContextLenToCapture int `json:"max_context_len_to_capture,omitempty"`
MaxSeqLenToCapture int `json:"max_seq_len_to_capture,omitempty"`
DisableCustomAllReduce bool `json:"disable_custom_all_reduce,omitempty"`
TokenizerPoolSize int `json:"tokenizer_pool_size,omitempty"`
TokenizerPoolType string `json:"tokenizer_pool_type,omitempty"`
TokenizerPoolExtraConfig string `json:"tokenizer_pool_extra_config,omitempty"`
EnableLoraBias bool `json:"enable_lora_bias,omitempty"`
LoraExtraVocabSize int `json:"lora_extra_vocab_size,omitempty"`
LoraRank int `json:"lora_rank,omitempty"`
PromptLookbackDistance int `json:"prompt_lookback_distance,omitempty"`
PreemptionMode string `json:"preemption_mode,omitempty"`
// Distributed and parallel processing
TensorParallelSize int `json:"tensor_parallel_size,omitempty"`
PipelineParallelSize int `json:"pipeline_parallel_size,omitempty"`
MaxParallelLoadingWorkers int `json:"max_parallel_loading_workers,omitempty"`
DisableAsyncOutputProc bool `json:"disable_async_output_proc,omitempty"`
WorkerClass string `json:"worker_class,omitempty"`
EnabledLoraModules string `json:"enabled_lora_modules,omitempty"`
MaxLoraRank int `json:"max_lora_rank,omitempty"`
FullyShardedLoras bool `json:"fully_sharded_loras,omitempty"`
LoraModules string `json:"lora_modules,omitempty"`
PromptAdapters string `json:"prompt_adapters,omitempty"`
MaxPromptAdapterToken int `json:"max_prompt_adapter_token,omitempty"`
Device string `json:"device,omitempty"`
SchedulerDelay float64 `json:"scheduler_delay,omitempty"`
EnableChunkedPrefill bool `json:"enable_chunked_prefill,omitempty"`
SpeculativeModel string `json:"speculative_model,omitempty"`
SpeculativeModelQuantization string `json:"speculative_model_quantization,omitempty"`
SpeculativeRevision string `json:"speculative_revision,omitempty"`
SpeculativeMaxModelLen int `json:"speculative_max_model_len,omitempty"`
SpeculativeDisableByBatchSize int `json:"speculative_disable_by_batch_size,omitempty"`
NgptSpeculativeLength int `json:"ngpt_speculative_length,omitempty"`
SpeculativeDisableMqa bool `json:"speculative_disable_mqa,omitempty"`
ModelLoaderExtraConfig string `json:"model_loader_extra_config,omitempty"`
IgnorePatterns string `json:"ignore_patterns,omitempty"`
PreloadedLoraModules string `json:"preloaded_lora_modules,omitempty"`
// OpenAI server specific options
UDS string `json:"uds,omitempty"`
UvicornLogLevel string `json:"uvicorn_log_level,omitempty"`
ResponseRole string `json:"response_role,omitempty"`
SSLKeyfile string `json:"ssl_keyfile,omitempty"`
SSLCertfile string `json:"ssl_certfile,omitempty"`
SSLCACerts string `json:"ssl_ca_certs,omitempty"`
SSLCertReqs int `json:"ssl_cert_reqs,omitempty"`
RootPath string `json:"root_path,omitempty"`
Middleware []string `json:"middleware,omitempty"`
ReturnTokensAsTokenIDS bool `json:"return_tokens_as_token_ids,omitempty"`
DisableFrontendMultiprocessing bool `json:"disable_frontend_multiprocessing,omitempty"`
EnableAutoToolChoice bool `json:"enable_auto_tool_choice,omitempty"`
ToolCallParser string `json:"tool_call_parser,omitempty"`
ToolServer string `json:"tool_server,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateContentFormat string `json:"chat_template_content_format,omitempty"`
AllowCredentials bool `json:"allow_credentials,omitempty"`
AllowedOrigins []string `json:"allowed_origins,omitempty"`
AllowedMethods []string `json:"allowed_methods,omitempty"`
AllowedHeaders []string `json:"allowed_headers,omitempty"`
APIKey []string `json:"api_key,omitempty"`
EnableLogOutputs bool `json:"enable_log_outputs,omitempty"`
EnableTokenUsage bool `json:"enable_token_usage,omitempty"`
EnableAsyncEngineDebug bool `json:"enable_async_engine_debug,omitempty"`
EngineUseRay bool `json:"engine_use_ray,omitempty"`
DisableLogRequests bool `json:"disable_log_requests,omitempty"`
MaxLogLen int `json:"max_log_len,omitempty"`
// Additional engine configuration
Task string `json:"task,omitempty"`
MultiModalConfig string `json:"multi_modal_config,omitempty"`
LimitMmPerPrompt string `json:"limit_mm_per_prompt,omitempty"`
EnableSleepMode bool `json:"enable_sleep_mode,omitempty"`
EnableChunkingRequest bool `json:"enable_chunking_request,omitempty"`
CompilationConfig string `json:"compilation_config,omitempty"`
DisableSlidingWindowMask bool `json:"disable_sliding_window_mask,omitempty"`
EnableTRTLLMEngineLatency bool `json:"enable_trtllm_engine_latency,omitempty"`
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string
// Add model as positional argument if specified
if o.Model != "" {
args = append(args, o.Model)
}
// Create a copy of the options without the Model field to avoid including it as --model flag
optionsCopy := *o
optionsCopy.Model = "" // Clear model field so it won't be included as a flag
multipleFlags := map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
// Build the rest of the arguments as flags
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
args = append(args, flagArgs...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
return nil, err
}
return &vllmOptions, nil
}

View File

@@ -0,0 +1,153 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

View File

@@ -17,6 +17,9 @@ type BackendConfig struct {
// Path to mlx_lm executable (MLX-LM backend)
MLXLMExecutable string `yaml:"mlx_lm_executable"`
// Path to vllm executable (vLLM backend)
VllmExecutable string `yaml:"vllm_executable"`
}
// AppConfig represents the configuration for llamactl
@@ -122,6 +125,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
Backends: BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
VllmExecutable: "vllm",
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
@@ -246,6 +250,9 @@ func loadEnvVars(cfg *AppConfig) {
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
cfg.Backends.MLXLMExecutable = mlxLMExec
}
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
cfg.Backends.VllmExecutable = vllmExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b

View File

@@ -105,6 +105,10 @@ func (i *Process) GetPort() int {
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
}
}
return 0
@@ -123,6 +127,10 @@ func (i *Process) GetHost() string {
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
}
}
return ""
@@ -176,6 +184,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))

View File

@@ -52,6 +52,8 @@ func (i *Process) Start() error {
executable = i.globalBackendSettings.LlamaExecutable
case backends.BackendTypeMlxLm:
executable = i.globalBackendSettings.MLXLMExecutable
case backends.BackendTypeVllm:
executable = i.globalBackendSettings.VllmExecutable
default:
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
@@ -200,6 +202,11 @@ func (i *Process) WaitForHealthy(timeout int) error {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
}
if host == "" {
host = "localhost"

View File

@@ -6,6 +6,7 @@ import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"log"
)
@@ -26,6 +27,7 @@ type CreateInstanceOptions struct {
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -69,6 +71,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
@@ -114,6 +128,20 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
}
@@ -171,6 +199,13 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
if c.MlxServerOptions != nil {
return c.MlxServerOptions.BuildCommandArgs()
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
// Prepend "serve" as first argument
args := []string{"serve"}
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
return args
}
}
return []string{}
}

View File

@@ -264,6 +264,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
}
return 0
}
@@ -279,6 +283,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
}
}

View File

@@ -8,6 +8,7 @@ import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
@@ -739,3 +740,56 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}

View File

@@ -58,6 +58,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route("/mlx", func(r chi.Router) {
r.Post("/parse-command", handler.ParseMlxCommand())
})
r.Route("/vllm", func(r chi.Router) {
r.Post("/parse-command", handler.ParseVllmCommand())
})
})
// Instance management endpoints

View File

@@ -46,6 +46,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm:
return validateMlxOptions(options)
case backends.BackendTypeVllm:
return validateVllmOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
@@ -88,6 +90,25 @@ func validateMlxOptions(options *instance.CreateInstanceOptions) error {
return nil
}
// validateVllmOptions validates vLLM backend specific options
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
if options.VllmServerOptions == nil {
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v)

View File

@@ -0,0 +1,65 @@
import React from "react";
import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Cpu, Zap, Server } from "lucide-react";
interface BackendBadgeProps {
backend?: BackendTypeValue;
}
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
if (!backend) {
return null;
}
const getIcon = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return <Cpu className="h-3 w-3" />;
case BackendType.MLX_LM:
return <Zap className="h-3 w-3" />;
case BackendType.VLLM:
return <Server className="h-3 w-3" />;
default:
return <Server className="h-3 w-3" />;
}
};
const getText = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "llama.cpp";
case BackendType.MLX_LM:
return "MLX";
case BackendType.VLLM:
return "vLLM";
default:
return backend;
}
};
const getVariant = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "secondary";
case BackendType.MLX_LM:
return "outline";
case BackendType.VLLM:
return "default";
default:
return "secondary";
}
};
return (
<Badge
variant={getVariant()}
className="flex items-center gap-1.5"
>
{getIcon()}
<span className="text-xs">{getText()}</span>
</Badge>
);
};
export default BackendBadge;

View File

@@ -45,7 +45,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
@@ -72,7 +71,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
@@ -99,7 +97,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}

View File

@@ -5,6 +5,7 @@ import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -58,7 +59,10 @@ function InstanceCard({
<CardHeader className="pb-3">
<div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle>
{running && <HealthBadge health={health} />}
<div className="flex flex-col items-end gap-2">
{running && <HealthBadge health={health} />}
<BackendBadge backend={instance.options?.backend_type} />
</div>
</div>
</CardHeader>

View File

@@ -11,11 +11,13 @@ import {
DialogTitle,
} from "@/components/ui/dialog";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
import { getAdvancedFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
import ZodFormField from "@/components/ZodFormField";
import BackendFormField from "@/components/BackendFormField";
import ParseCommandDialog from "@/components/ParseCommandDialog";
import AutoRestartConfiguration from "@/components/instance/AutoRestartConfiguration";
import BasicInstanceFields from "@/components/instance/BasicInstanceFields";
import BackendConfiguration from "@/components/instance/BackendConfiguration";
import AdvancedInstanceFields from "@/components/instance/AdvancedInstanceFields";
interface InstanceDialogProps {
open: boolean;
@@ -39,9 +41,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
const [showParseDialog, setShowParseDialog] = useState(false);
// Get field lists dynamically from the type
const basicFields = getBasicFields();
const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields(formData.backend_type);
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
// Reset form when dialog opens/closes or when instance changes
@@ -163,8 +163,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setShowParseDialog(false);
};
// Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic
let saveButtonLabel = "Create Instance";
@@ -212,70 +210,23 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</div>
{/* Auto Restart Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">
Auto Restart Configuration
</h3>
<AutoRestartConfiguration
formData={formData}
onChange={handleFieldChange}
/>
{/* Auto Restart Toggle */}
<ZodFormField
fieldKey="auto_restart"
value={formData.auto_restart}
onChange={handleFieldChange}
/>
{/* Show restart options only when auto restart is enabled */}
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<ZodFormField
fieldKey="max_restarts"
value={formData.max_restarts}
onChange={handleFieldChange}
/>
<ZodFormField
fieldKey="restart_delay"
value={formData.restart_delay}
onChange={handleFieldChange}
/>
</div>
)}
</div>
{/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{basicFields
.filter(
(fieldKey) =>
fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay" &&
fieldKey !== "backend_options" // backend_options is handled separately
)
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
{/* Basic Fields */}
<BasicInstanceFields
formData={formData}
onChange={handleFieldChange}
/>
{/* Backend Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
<BackendConfiguration
formData={formData}
onBackendFieldChange={handleBackendFieldChange}
showAdvanced={showAdvanced}
/>
{/* Advanced Fields Toggle */}
<div className="border-t pt-4">
@@ -314,54 +265,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</div>
</div>
{/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{/* Advanced Fields */}
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
{/* Advanced instance fields */}
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
).length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
)
.sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
)}
{/* Advanced backend fields */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
)}
<AdvancedInstanceFields
formData={formData}
onChange={handleFieldChange}
/>
</div>
)}
</div>

View File

@@ -9,7 +9,7 @@ import {
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { type CreateInstanceOptions } from "@/types/instance";
import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
import { backendsApi } from "@/lib/api";
import { toast } from "sonner";
@@ -25,6 +25,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
onParsed,
}) => {
const [command, setCommand] = useState('');
const [backendType, setBackendType] = useState<BackendTypeValue>(BackendType.LLAMA_CPP);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
@@ -38,18 +39,31 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
setError(null);
try {
const options = await backendsApi.llamaCpp.parseCommand(command);
let options: CreateInstanceOptions;
// Parse based on selected backend type
switch (backendType) {
case BackendType.LLAMA_CPP:
options = await backendsApi.llamaCpp.parseCommand(command);
break;
case BackendType.MLX_LM:
options = await backendsApi.mlx.parseCommand(command);
break;
case BackendType.VLLM:
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
}
onParsed(options);
onOpenChange(false);
// Reset form
setCommand('');
setError(null);
// Show success toast
toast.success('Command parsed successfully');
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
setError(errorMessage);
// Show error toast
toast.error('Failed to parse command', {
description: errorMessage
});
@@ -60,31 +74,55 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
const handleOpenChange = (open: boolean) => {
if (!open) {
// Reset form when closing
setCommand('');
setBackendType(BackendType.LLAMA_CPP);
setError(null);
}
onOpenChange(open);
};
const backendPlaceholders: Record<BackendTypeValue, string> = {
[BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
[BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
[BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
};
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
return backendPlaceholders[backendType] || "Enter your command here...";
};
return (
<Dialog open={open} onOpenChange={handleOpenChange}>
<DialogContent className="sm:max-w-[600px]">
<DialogHeader>
<DialogTitle>Parse Llama Server Command</DialogTitle>
<DialogTitle>Parse Backend Command</DialogTitle>
<DialogDescription>
Paste your llama-server command to automatically populate the form fields
Select your backend type and paste the command to automatically populate the form fields
</DialogDescription>
</DialogHeader>
<div className="space-y-4">
<div>
<Label htmlFor="backend-type">Backend Type</Label>
<select
id="backend-type"
value={backendType}
onChange={(e) => setBackendType(e.target.value as BackendTypeValue)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select>
</div>
<div>
<Label htmlFor="command">Command</Label>
<textarea
id="command"
value={command}
onChange={(e) => setCommand(e.target.value)}
placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
placeholder={getPlaceholderForBackend(backendType)}
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
/>
</div>

View File

@@ -29,7 +29,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<select
id={fieldKey}
@@ -39,6 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
@@ -70,8 +70,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
</Label>
<Input
id={fieldKey}
type="number"
@@ -97,8 +96,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
</Label>
<Input
id={fieldKey}
type="text"
@@ -124,8 +122,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
</Label>
<Input
id={fieldKey}
type="text"

View File

@@ -0,0 +1,62 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface ArrayInputProps {
id: string
label: string
value: string[] | undefined
onChange: (value: string[] | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const ArrayInput: React.FC<ArrayInputProps> = ({
id,
label,
value,
onChange,
placeholder = "item1, item2, item3",
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const arrayValue = inputValue
.split(',')
.map(s => s.trim())
.filter(Boolean)
onChange(arrayValue.length > 0 ? arrayValue : undefined)
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
}
export default ArrayInput

View File

@@ -0,0 +1,42 @@
import React from 'react'
import { Checkbox } from '@/components/ui/checkbox'
import { Label } from '@/components/ui/label'
interface CheckboxInputProps {
id: string
label: string
value: boolean | undefined
onChange: (value: boolean) => void
description?: string
disabled?: boolean
className?: string
}
const CheckboxInput: React.FC<CheckboxInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
return (
<div className={`flex items-center space-x-2 ${className || ''}`}>
<Checkbox
id={id}
checked={value === true}
onCheckedChange={(checked) => onChange(!!checked)}
disabled={disabled}
/>
<Label htmlFor={id} className="text-sm font-normal">
{label}
{description && (
<span className="text-muted-foreground ml-1">- {description}</span>
)}
</Label>
</div>
)
}
export default CheckboxInput

View File

@@ -0,0 +1,60 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface NumberInputProps {
id: string
label: string
value: number | undefined
onChange: (value: number | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const NumberInput: React.FC<NumberInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const numValue = parseFloat(inputValue)
if (!isNaN(numValue)) {
onChange(numValue)
}
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="number"
step="any"
value={value !== undefined ? value : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default NumberInput

View File

@@ -0,0 +1,55 @@
import React from 'react'
import { Label } from '@/components/ui/label'
interface SelectOption {
value: string
label: string
}
interface SelectInputProps {
id: string
label: string
value: string | undefined
onChange: (value: string | undefined) => void
options: SelectOption[]
description?: string
disabled?: boolean
className?: string
}
const SelectInput: React.FC<SelectInputProps> = ({
id,
label,
value,
onChange,
options,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<select
id={id}
value={value || ''}
onChange={(e) => onChange(e.target.value || undefined)}
disabled={disabled}
className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
>
{options.map(option => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))}
</select>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default SelectInput

View File

@@ -0,0 +1,47 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface TextInputProps {
id: string
label: string
value: string | number | undefined
onChange: (value: string | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const TextInput: React.FC<TextInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => onChange(e.target.value || undefined)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default TextInput

View File

@@ -0,0 +1,98 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getAdvancedFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import ArrayInput from '@/components/form/ArrayInput'
interface AdvancedInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AdvancedInstanceFields: React.FC<AdvancedInstanceFieldsProps> = ({
formData,
onChange
}) => {
const advancedFields = getAdvancedFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
case 'array':
return (
<ArrayInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string[] | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out restart options and backend_options (handled separately)
const fieldsToRender = advancedFields.filter(
fieldKey => !['max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
if (fieldsToRender.length === 0) {
return null
}
return (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
{fieldsToRender
.sort()
.map(renderField)}
</div>
)
}
export default AdvancedInstanceFields

View File

@@ -0,0 +1,53 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import CheckboxInput from '@/components/form/CheckboxInput'
import NumberInput from '@/components/form/NumberInput'
interface AutoRestartConfigurationProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
formData,
onChange
}) => {
const isAutoRestartEnabled = formData.auto_restart === true
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Auto Restart Configuration</h3>
<CheckboxInput
id="auto_restart"
label="Auto Restart"
value={formData.auto_restart}
onChange={(value) => onChange('auto_restart', value)}
description="Automatically restart the instance on failure"
/>
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<NumberInput
id="max_restarts"
label="Max Restarts"
value={formData.max_restarts}
onChange={(value) => onChange('max_restarts', value)}
placeholder="3"
description="Maximum number of restart attempts (0 = unlimited)"
/>
<NumberInput
id="restart_delay"
label="Restart Delay (seconds)"
value={formData.restart_delay}
onChange={(value) => onChange('restart_delay', value)}
placeholder="5"
description="Delay in seconds before attempting restart"
/>
</div>
)}
</div>
)
}
export default AutoRestartConfiguration

View File

@@ -0,0 +1,54 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
interface BackendConfigurationProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: any) => void
showAdvanced?: boolean
}
const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
formData,
onBackendFieldChange,
showAdvanced = false
}) => {
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
{/* Advanced backend fields */}
{showAdvanced && advancedBackendFields.length > 0 && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)
}
export default BackendConfiguration

View File

@@ -0,0 +1,99 @@
import React from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import SelectInput from '@/components/form/SelectInput'
interface BasicInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
formData,
onChange
}) => {
const basicFields = getBasicFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
// Special handling for backend_type field
if (fieldKey === 'backend_type') {
return (
<SelectInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] || BackendType.LLAMA_CPP}
onChange={(value) => onChange(fieldKey, value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description={config.description}
/>
)
}
// Render based on field type
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out auto restart fields and backend_options (handled separately)
const fieldsToRender = basicFields.filter(
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{fieldsToRender.map(renderField)}
</div>
)
}
export default BasicInstanceFields

View File

@@ -1,4 +1,5 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1";
@@ -30,25 +31,8 @@ async function apiCall<T>(
headers,
});
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required');
}
if (!response.ok) {
// Try to get error message from response
let errorMessage = `HTTP ${response.status}`;
try {
const errorText = await response.text();
if (errorText) {
errorMessage += `: ${errorText}`;
}
} catch {
// If we can't read the error, just use status
}
throw new Error(errorMessage);
}
// Handle errors using centralized error handler
await handleApiError(response);
// Handle empty responses (like DELETE)
if (response.status === 204) {
@@ -60,6 +44,14 @@ async function apiCall<T>(
const text = await response.text();
return text as T;
} else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
}
const data = await response.json() as T;
return data;
}
@@ -101,6 +93,14 @@ export const backendsApi = {
body: JSON.stringify({ command }),
}),
},
vllm: {
// POST /backends/vllm/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
};
// Instance API functions

View File

@@ -0,0 +1,32 @@
/**
* Parses error response from API calls and returns a formatted error message
*/
export async function parseErrorResponse(response: Response): Promise<string> {
let errorMessage = `HTTP ${response.status}`
try {
const errorText = await response.text()
if (errorText) {
errorMessage += `: ${errorText}`
}
} catch {
// If we can't read the error, just use status
}
return errorMessage
}
/**
* Handles common API call errors and throws appropriate Error objects
*/
export async function handleApiError(response: Response): Promise<void> {
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required')
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
throw new Error(errorMessage)
}
}

View File

@@ -2,13 +2,17 @@ import {
type CreateInstanceOptions,
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
getAllFieldKeys,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType
getMlxFieldType,
getVllmFieldType
} from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific)
@@ -16,7 +20,6 @@ export const basicFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
auto_restart: {
label: 'Auto Restart',
@@ -52,13 +55,11 @@ const basicLlamaCppFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
model: {
label: 'Model Path',
placeholder: '/path/to/model.gguf',
description: 'Path to the model file',
required: true
description: 'Path to the model file'
},
hf_repo: {
label: 'Hugging Face Repository',
@@ -82,13 +83,11 @@ const basicMlxFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
model: {
label: 'Model',
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
description: 'The path to the MLX model weights, tokenizer, and config',
required: true
description: 'The path to the MLX model weights, tokenizer, and config'
},
temp: {
label: 'Temperature',
@@ -117,11 +116,46 @@ const basicMlxFieldsConfig: Record<string, {
}
}
// vLLM backend-specific basic fields
const basicVllmFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'microsoft/DialoGPT-medium',
description: 'The name or path of the Hugging Face model to use'
},
tensor_parallel_size: {
label: 'Tensor Parallel Size',
placeholder: '1',
description: 'Number of GPUs to use for distributed serving'
},
gpu_memory_utilization: {
label: 'GPU Memory Utilization',
placeholder: '0.9',
description: 'The fraction of GPU memory to be used for the model executor'
}
}
// Backend field configuration lookup
const backendFieldConfigs = {
mlx_lm: basicMlxFieldsConfig,
vllm: basicVllmFieldsConfig,
llama_cpp: basicLlamaCppFieldsConfig,
} as const
const backendFieldGetters = {
mlx_lm: getAllMlxFieldKeys,
vllm: getAllVllmFieldKeys,
llama_cpp: getAllLlamaCppFieldKeys,
} as const
function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
}
@@ -130,25 +164,18 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
}
export function getBasicBackendFields(backendType?: string): string[] {
if (backendType === 'mlx_lm') {
return Object.keys(basicMlxFieldsConfig)
} else if (backendType === 'llama_cpp') {
return Object.keys(basicLlamaCppFieldsConfig)
}
// Default to LlamaCpp for backward compatibility
return Object.keys(basicLlamaCppFieldsConfig)
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return Object.keys(config)
}
export function getAdvancedBackendFields(backendType?: string): string[] {
if (backendType === 'mlx_lm') {
return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
} else if (backendType === 'llama_cpp') {
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
}
// Default to LlamaCpp for backward compatibility
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return fieldGetter().filter(key => !(key in basicConfig))
}
// Combined backend fields config for use in BackendFormField
@@ -156,10 +183,10 @@ export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
...basicLlamaCppFieldsConfig,
...basicMlxFieldsConfig
...basicMlxFieldsConfig,
...basicVllmFieldsConfig
}
// Get field type for any backend option (union type)
@@ -182,6 +209,15 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
// Schema might not be available
}
// Try vLLM schema
try {
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
return getVllmFieldType(key as keyof VllmBackendOptions)
}
} catch {
// Schema might not be available
}
// Default fallback
return 'text'
}

View File

@@ -0,0 +1,4 @@
// Re-export all backend schemas from one place
export * from './llamacpp'
export * from './mlx'
export * from './vllm'

View File

@@ -0,0 +1,192 @@
import { z } from 'zod'
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Infer the TypeScript type from the schema
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,51 @@
import { z } from 'zod'
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,150 @@
import { z } from 'zod'
// Define the vLLM backend options schema
export const VllmBackendOptionsSchema = z.object({
// Basic connection options (auto-assigned by llamactl)
host: z.string().optional(),
port: z.number().optional(),
// Model and engine configuration
model: z.string().optional(),
tokenizer: z.string().optional(),
skip_tokenizer_init: z.boolean().optional(),
revision: z.string().optional(),
code_revision: z.string().optional(),
tokenizer_revision: z.string().optional(),
tokenizer_mode: z.string().optional(),
trust_remote_code: z.boolean().optional(),
download_dir: z.string().optional(),
load_format: z.string().optional(),
config_format: z.string().optional(),
dtype: z.string().optional(),
kv_cache_dtype: z.string().optional(),
quantization_param_path: z.string().optional(),
seed: z.number().optional(),
max_model_len: z.number().optional(),
guided_decoding_backend: z.string().optional(),
distributed_executor_backend: z.string().optional(),
worker_use_ray: z.boolean().optional(),
ray_workers_use_nsight: z.boolean().optional(),
// Performance and serving configuration
block_size: z.number().optional(),
enable_prefix_caching: z.boolean().optional(),
disable_sliding_window: z.boolean().optional(),
use_v2_block_manager: z.boolean().optional(),
num_lookahead_slots: z.number().optional(),
swap_space: z.number().optional(),
cpu_offload_gb: z.number().optional(),
gpu_memory_utilization: z.number().optional(),
num_gpu_blocks_override: z.number().optional(),
max_num_batched_tokens: z.number().optional(),
max_num_seqs: z.number().optional(),
max_logprobs: z.number().optional(),
disable_log_stats: z.boolean().optional(),
quantization: z.string().optional(),
rope_scaling: z.string().optional(),
rope_theta: z.number().optional(),
enforce_eager: z.boolean().optional(),
max_context_len_to_capture: z.number().optional(),
max_seq_len_to_capture: z.number().optional(),
disable_custom_all_reduce: z.boolean().optional(),
tokenizer_pool_size: z.number().optional(),
tokenizer_pool_type: z.string().optional(),
tokenizer_pool_extra_config: z.string().optional(),
enable_lora_bias: z.boolean().optional(),
lora_extra_vocab_size: z.number().optional(),
lora_rank: z.number().optional(),
prompt_lookback_distance: z.number().optional(),
preemption_mode: z.string().optional(),
// Distributed and parallel processing
tensor_parallel_size: z.number().optional(),
pipeline_parallel_size: z.number().optional(),
max_parallel_loading_workers: z.number().optional(),
disable_async_output_proc: z.boolean().optional(),
worker_class: z.string().optional(),
enabled_lora_modules: z.string().optional(),
max_lora_rank: z.number().optional(),
fully_sharded_loras: z.boolean().optional(),
lora_modules: z.string().optional(),
prompt_adapters: z.string().optional(),
max_prompt_adapter_token: z.number().optional(),
device: z.string().optional(),
scheduler_delay: z.number().optional(),
enable_chunked_prefill: z.boolean().optional(),
speculative_model: z.string().optional(),
speculative_model_quantization: z.string().optional(),
speculative_revision: z.string().optional(),
speculative_max_model_len: z.number().optional(),
speculative_disable_by_batch_size: z.number().optional(),
ngpt_speculative_length: z.number().optional(),
speculative_disable_mqa: z.boolean().optional(),
model_loader_extra_config: z.string().optional(),
ignore_patterns: z.string().optional(),
preloaded_lora_modules: z.string().optional(),
// OpenAI server specific options
uds: z.string().optional(),
uvicorn_log_level: z.string().optional(),
response_role: z.string().optional(),
ssl_keyfile: z.string().optional(),
ssl_certfile: z.string().optional(),
ssl_ca_certs: z.string().optional(),
ssl_cert_reqs: z.number().optional(),
root_path: z.string().optional(),
middleware: z.array(z.string()).optional(),
return_tokens_as_token_ids: z.boolean().optional(),
disable_frontend_multiprocessing: z.boolean().optional(),
enable_auto_tool_choice: z.boolean().optional(),
tool_call_parser: z.string().optional(),
tool_server: z.string().optional(),
chat_template: z.string().optional(),
chat_template_content_format: z.string().optional(),
allow_credentials: z.boolean().optional(),
allowed_origins: z.array(z.string()).optional(),
allowed_methods: z.array(z.string()).optional(),
allowed_headers: z.array(z.string()).optional(),
api_key: z.array(z.string()).optional(),
enable_log_outputs: z.boolean().optional(),
enable_token_usage: z.boolean().optional(),
enable_async_engine_debug: z.boolean().optional(),
engine_use_ray: z.boolean().optional(),
disable_log_requests: z.boolean().optional(),
max_log_len: z.number().optional(),
// Additional engine configuration
task: z.string().optional(),
multi_modal_config: z.string().optional(),
limit_mm_per_prompt: z.string().optional(),
enable_sleep_mode: z.boolean().optional(),
enable_chunking_request: z.boolean().optional(),
compilation_config: z.string().optional(),
disable_sliding_window_mask: z.boolean().optional(),
enable_trtllm_engine_latency: z.boolean().optional(),
override_pooling_config: z.string().optional(),
override_neuron_config: z.string().optional(),
override_kv_cache_align_size: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
// Helper to get all vLLM backend option field keys
export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
}
// Get field type for vLLM backend options
export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = VllmBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -1,206 +1,27 @@
import { BackendType } from '@/types/instance'
import { z } from 'zod'
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Import backend schemas from separate files
import {
LlamaCppBackendOptionsSchema,
type LlamaCppBackendOptions,
getAllLlamaCppFieldKeys,
getLlamaCppFieldType,
MlxBackendOptionsSchema,
type MlxBackendOptions,
getAllMlxFieldKeys,
getMlxFieldType,
VllmBackendOptionsSchema,
type VllmBackendOptions,
getAllVllmFieldKeys,
getVllmFieldType
} from './backends'
// Backend options union
export const BackendOptionsSchema = z.union([
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
])
// Define the main create instance options schema
@@ -213,13 +34,27 @@ export const CreateInstanceOptionsSchema = z.object({
on_demand_start: z.boolean().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(),
})
// Re-export types and schemas from backend files
export {
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
}
// Infer the TypeScript types from the schemas
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
@@ -228,16 +63,6 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
}
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key]
@@ -252,32 +77,3 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
if (innerSchema instanceof z.ZodObject) return 'object'
return 'text' // ZodString and others default to text
}
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -5,6 +5,7 @@ export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = {
LLAMA_CPP: 'llama_cpp',
MLX_LM: 'mlx_lm',
VLLM: 'vllm',
// MLX_VLM: 'mlx_vlm', // Future expansion
} as const