mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
Merge pull request #34 from lordmathis/feat/vllm-backend
feat: Implement vLLM backend
This commit is contained in:
26
README.md
26
README.md
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
### 🔗 Universal Compatibility
|
### 🔗 Universal Compatibility
|
||||||
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
|
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
|
||||||
- **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
|
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
|
||||||
|
|
||||||
### 🌐 User-Friendly Interface
|
### 🌐 User-Friendly Interface
|
||||||
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||||
@@ -31,6 +31,7 @@
|
|||||||
# 1. Install backend (one-time setup)
|
# 1. Install backend (one-time setup)
|
||||||
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
|
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
|
||||||
# For MLX on macOS: pip install mlx-lm
|
# For MLX on macOS: pip install mlx-lm
|
||||||
|
# For vLLM: pip install vllm
|
||||||
|
|
||||||
# 2. Download and run llamactl
|
# 2. Download and run llamactl
|
||||||
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||||
@@ -47,7 +48,7 @@ llamactl
|
|||||||
### Create and manage instances via web dashboard:
|
### Create and manage instances via web dashboard:
|
||||||
1. Open http://localhost:8080
|
1. Open http://localhost:8080
|
||||||
2. Click "Create Instance"
|
2. Click "Create Instance"
|
||||||
3. Choose backend type (llama.cpp or MLX)
|
3. Choose backend type (llama.cpp, MLX, or vLLM)
|
||||||
4. Set model path and backend-specific options
|
4. Set model path and backend-specific options
|
||||||
5. Start or stop the instance
|
5. Start or stop the instance
|
||||||
|
|
||||||
@@ -63,6 +64,11 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
|
|||||||
-H "Authorization: Bearer your-key" \
|
-H "Authorization: Bearer your-key" \
|
||||||
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
||||||
|
|
||||||
|
# Create vLLM instance
|
||||||
|
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
|
||||||
|
-H "Authorization: Bearer your-key" \
|
||||||
|
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
|
||||||
|
|
||||||
# Use with OpenAI SDK
|
# Use with OpenAI SDK
|
||||||
curl -X POST localhost:8080/v1/chat/completions \
|
curl -X POST localhost:8080/v1/chat/completions \
|
||||||
-H "Authorization: Bearer your-key" \
|
-H "Authorization: Bearer your-key" \
|
||||||
@@ -121,6 +127,21 @@ source mlx-env/bin/activate
|
|||||||
pip install mlx-lm
|
pip install mlx-lm
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**For vLLM backend:**
|
||||||
|
You need vLLM installed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install via pip (requires Python 3.8+, GPU required)
|
||||||
|
pip install vllm
|
||||||
|
|
||||||
|
# Or in a virtual environment (recommended)
|
||||||
|
python -m venv vllm-env
|
||||||
|
source vllm-env/bin/activate
|
||||||
|
pip install vllm
|
||||||
|
|
||||||
|
# For production deployments, consider container-based installation
|
||||||
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
llamactl works out of the box with sensible defaults.
|
llamactl works out of the box with sensible defaults.
|
||||||
@@ -135,6 +156,7 @@ server:
|
|||||||
backends:
|
backends:
|
||||||
llama_executable: llama-server # Path to llama-server executable
|
llama_executable: llama-server # Path to llama-server executable
|
||||||
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
||||||
|
vllm_executable: vllm # Path to vllm executable
|
||||||
|
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances
|
port_range: [8000, 9000] # Port range for instances
|
||||||
|
|||||||
675
apidocs/docs.go
675
apidocs/docs.go
@@ -19,6 +19,159 @@ const docTemplate = `{
|
|||||||
"host": "{{.Host}}",
|
"host": "{{.Host}}",
|
||||||
"basePath": "{{.BasePath}}",
|
"basePath": "{{.BasePath}}",
|
||||||
"paths": {
|
"paths": {
|
||||||
|
"/backends/llama-cpp/parse-command": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Parses a llama-server command string into instance options",
|
||||||
|
"consumes": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"backends"
|
||||||
|
],
|
||||||
|
"summary": "Parse llama-server command",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Command to parse",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/server.ParseCommandRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Parsed options",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid request or command",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/backends/mlx/parse-command": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Parses MLX-LM server command string into instance options",
|
||||||
|
"consumes": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"backends"
|
||||||
|
],
|
||||||
|
"summary": "Parse mlx_lm.server command",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Command to parse",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/server.ParseCommandRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Parsed options",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid request or command",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/backends/vllm/parse-command": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Parses a vLLM serve command string into instance options",
|
||||||
|
"consumes": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"backends"
|
||||||
|
],
|
||||||
|
"summary": "Parse vllm serve command",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Command to parse",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/server.ParseCommandRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Parsed options",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid request or command",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/instances": {
|
"/instances": {
|
||||||
"get": {
|
"get": {
|
||||||
"security": [
|
"security": [
|
||||||
@@ -681,522 +834,46 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"definitions": {
|
"definitions": {
|
||||||
|
"backends.BackendType": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"llama_cpp",
|
||||||
|
"mlx_lm",
|
||||||
|
"vllm"
|
||||||
|
],
|
||||||
|
"x-enum-varnames": [
|
||||||
|
"BackendTypeLlamaCpp",
|
||||||
|
"BackendTypeMlxLm",
|
||||||
|
"BackendTypeVllm"
|
||||||
|
]
|
||||||
|
},
|
||||||
"instance.CreateInstanceOptions": {
|
"instance.CreateInstanceOptions": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"alias": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"api_key": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"api_key_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"auto_restart": {
|
"auto_restart": {
|
||||||
"description": "Auto restart",
|
"description": "Auto restart",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"batch_size": {
|
"backend_options": {
|
||||||
"type": "integer"
|
"type": "object",
|
||||||
|
"additionalProperties": {}
|
||||||
},
|
},
|
||||||
"cache_reuse": {
|
"backend_type": {
|
||||||
"type": "integer"
|
"$ref": "#/definitions/backends.BackendType"
|
||||||
},
|
|
||||||
"cache_type_k": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cache_type_k_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cache_type_v": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cache_type_v_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"chat_template": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"chat_template_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"chat_template_kwargs": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"check_tensors": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"cont_batching": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"control_vector": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"control_vector_layer_range": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"control_vector_scaled": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"cpu_mask": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_mask_batch": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_range": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_range_batch": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_strict": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"cpu_strict_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"ctx_size": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"ctx_size_draft": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"defrag_thold": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"device": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"device_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"draft_max": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"draft_min": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"draft_p_min": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dry_allowed_length": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"dry_base": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dry_multiplier": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dry_penalty_last_n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"dry_sequence_breaker": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"dump_kv_cache": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"dynatemp_exp": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dynatemp_range": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"embd_bge_small_en_default": {
|
|
||||||
"description": "Default model params",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"embd_e5_small_en_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"embd_gte_small_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"embedding": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"escape": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_14b_spec": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_1_5b_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_3b_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_7b_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_7b_spec": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"flash_attn": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"frequency_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"gpu_layers": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"gpu_layers_draft": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"grammar": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"grammar_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_file_v": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_repo": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_repo_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_repo_v": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_token": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"host": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
},
|
||||||
"idle_timeout": {
|
"idle_timeout": {
|
||||||
"description": "Idle timeout",
|
"description": "Idle timeout",
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"ignore_eos": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"jinja": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"json_schema": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"json_schema_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"keep": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"log_colors": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"log_disable": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"log_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"log_prefix": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"log_timestamps": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"logit_bias": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"lora": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"lora_init_without_apply": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"lora_scaled": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"main_gpu": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"max_restarts": {
|
"max_restarts": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"metrics": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"min_p": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"mirostat": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"mirostat_ent": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"mirostat_lr": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"mlock": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"mmproj": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"mmproj_url": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model_url": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model_vocoder": {
|
|
||||||
"description": "Audio/TTS params",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"no_cont_batching": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_context_shift": {
|
|
||||||
"description": "Example-specific params",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_escape": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_kv_offload": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_mmap": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_mmproj": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_mmproj_offload": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_perf": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_prefill_assistant": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_slots": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_warmup": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_webui": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"numa": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"on_demand_start": {
|
"on_demand_start": {
|
||||||
"description": "On demand start",
|
"description": "On demand start",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"override_kv": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"override_tensor": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"parallel": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"path": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"poll": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"poll_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"pooling": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"port": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"predict": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"presence_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"prio": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"prio_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"props": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"reasoning_budget": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"reasoning_format": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"repeat_last_n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"repeat_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"reranking": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"restart_delay": {
|
"restart_delay": {
|
||||||
"type": "integer"
|
"description": "seconds",
|
||||||
},
|
|
||||||
"rope_freq_base": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"rope_freq_scale": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"rope_scale": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"rope_scaling": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"samplers": {
|
|
||||||
"description": "Sampling params",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"sampling_seq": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"seed": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"slot_prompt_similarity": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"slot_save_path": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"slots": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"special": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"split_mode": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"spm_infill": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"ssl_cert_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"ssl_key_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"temp": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"tensor_split": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"threads": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"threads_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"threads_http": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"timeout": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"top_k": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"top_p": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"tts_use_guide_tokens": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"typical": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"ubatch_size": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"verbose": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"verbose_prompt": {
|
|
||||||
"description": "Common params",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"verbosity": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"xtc_probability": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"xtc_threshold": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_attn_factor": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_beta_fast": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_beta_slow": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_ext_factor": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_orig_ctx": {
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1264,6 +941,14 @@ const docTemplate = `{
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"server.ParseCommandRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"command": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}`
|
}`
|
||||||
|
|||||||
@@ -12,6 +12,159 @@
|
|||||||
},
|
},
|
||||||
"basePath": "/api/v1",
|
"basePath": "/api/v1",
|
||||||
"paths": {
|
"paths": {
|
||||||
|
"/backends/llama-cpp/parse-command": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Parses a llama-server command string into instance options",
|
||||||
|
"consumes": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"backends"
|
||||||
|
],
|
||||||
|
"summary": "Parse llama-server command",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Command to parse",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/server.ParseCommandRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Parsed options",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid request or command",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/backends/mlx/parse-command": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Parses MLX-LM server command string into instance options",
|
||||||
|
"consumes": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"backends"
|
||||||
|
],
|
||||||
|
"summary": "Parse mlx_lm.server command",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Command to parse",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/server.ParseCommandRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Parsed options",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid request or command",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/backends/vllm/parse-command": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Parses a vLLM serve command string into instance options",
|
||||||
|
"consumes": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"backends"
|
||||||
|
],
|
||||||
|
"summary": "Parse vllm serve command",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Command to parse",
|
||||||
|
"name": "request",
|
||||||
|
"in": "body",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/server.ParseCommandRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Parsed options",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid request or command",
|
||||||
|
"schema": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/instances": {
|
"/instances": {
|
||||||
"get": {
|
"get": {
|
||||||
"security": [
|
"security": [
|
||||||
@@ -674,522 +827,46 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"definitions": {
|
"definitions": {
|
||||||
|
"backends.BackendType": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"llama_cpp",
|
||||||
|
"mlx_lm",
|
||||||
|
"vllm"
|
||||||
|
],
|
||||||
|
"x-enum-varnames": [
|
||||||
|
"BackendTypeLlamaCpp",
|
||||||
|
"BackendTypeMlxLm",
|
||||||
|
"BackendTypeVllm"
|
||||||
|
]
|
||||||
|
},
|
||||||
"instance.CreateInstanceOptions": {
|
"instance.CreateInstanceOptions": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"alias": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"api_key": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"api_key_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"auto_restart": {
|
"auto_restart": {
|
||||||
"description": "Auto restart",
|
"description": "Auto restart",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"batch_size": {
|
"backend_options": {
|
||||||
"type": "integer"
|
"type": "object",
|
||||||
|
"additionalProperties": {}
|
||||||
},
|
},
|
||||||
"cache_reuse": {
|
"backend_type": {
|
||||||
"type": "integer"
|
"$ref": "#/definitions/backends.BackendType"
|
||||||
},
|
|
||||||
"cache_type_k": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cache_type_k_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cache_type_v": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cache_type_v_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"chat_template": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"chat_template_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"chat_template_kwargs": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"check_tensors": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"cont_batching": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"control_vector": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"control_vector_layer_range": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"control_vector_scaled": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"cpu_mask": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_mask_batch": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_range": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_range_batch": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"cpu_strict": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"cpu_strict_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"ctx_size": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"ctx_size_draft": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"defrag_thold": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"device": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"device_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"draft_max": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"draft_min": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"draft_p_min": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dry_allowed_length": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"dry_base": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dry_multiplier": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dry_penalty_last_n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"dry_sequence_breaker": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"dump_kv_cache": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"dynatemp_exp": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"dynatemp_range": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"embd_bge_small_en_default": {
|
|
||||||
"description": "Default model params",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"embd_e5_small_en_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"embd_gte_small_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"embedding": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"escape": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_14b_spec": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_1_5b_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_3b_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_7b_default": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"fim_qwen_7b_spec": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"flash_attn": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"frequency_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"gpu_layers": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"gpu_layers_draft": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"grammar": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"grammar_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_file_v": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_repo": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_repo_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_repo_v": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"hf_token": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"host": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
},
|
||||||
"idle_timeout": {
|
"idle_timeout": {
|
||||||
"description": "Idle timeout",
|
"description": "Idle timeout",
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"ignore_eos": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"jinja": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"json_schema": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"json_schema_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"keep": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"log_colors": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"log_disable": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"log_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"log_prefix": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"log_timestamps": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"logit_bias": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"lora": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"lora_init_without_apply": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"lora_scaled": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"main_gpu": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"max_restarts": {
|
"max_restarts": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"metrics": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"min_p": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"mirostat": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"mirostat_ent": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"mirostat_lr": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"mlock": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"mmproj": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"mmproj_url": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model_draft": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model_url": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"model_vocoder": {
|
|
||||||
"description": "Audio/TTS params",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"no_cont_batching": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_context_shift": {
|
|
||||||
"description": "Example-specific params",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_escape": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_kv_offload": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_mmap": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_mmproj": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_mmproj_offload": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_perf": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_prefill_assistant": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_slots": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_warmup": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"no_webui": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"numa": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"on_demand_start": {
|
"on_demand_start": {
|
||||||
"description": "On demand start",
|
"description": "On demand start",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"override_kv": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"override_tensor": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"parallel": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"path": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"poll": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"poll_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"pooling": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"port": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"predict": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"presence_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"prio": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"prio_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"props": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"reasoning_budget": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"reasoning_format": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"repeat_last_n": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"repeat_penalty": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"reranking": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"restart_delay": {
|
"restart_delay": {
|
||||||
"type": "integer"
|
"description": "seconds",
|
||||||
},
|
|
||||||
"rope_freq_base": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"rope_freq_scale": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"rope_scale": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"rope_scaling": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"samplers": {
|
|
||||||
"description": "Sampling params",
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"sampling_seq": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"seed": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"slot_prompt_similarity": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"slot_save_path": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"slots": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"special": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"split_mode": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"spm_infill": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"ssl_cert_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"ssl_key_file": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"temp": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"tensor_split": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"threads": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"threads_batch": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"threads_http": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"timeout": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"top_k": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"top_p": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"tts_use_guide_tokens": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"typical": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"ubatch_size": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"verbose": {
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"verbose_prompt": {
|
|
||||||
"description": "Common params",
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
"verbosity": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"xtc_probability": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"xtc_threshold": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_attn_factor": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_beta_fast": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_beta_slow": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_ext_factor": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"yarn_orig_ctx": {
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1257,6 +934,14 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"server.ParseCommandRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"command": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,352 +1,35 @@
|
|||||||
basePath: /api/v1
|
basePath: /api/v1
|
||||||
definitions:
|
definitions:
|
||||||
|
backends.BackendType:
|
||||||
|
enum:
|
||||||
|
- llama_cpp
|
||||||
|
- mlx_lm
|
||||||
|
- vllm
|
||||||
|
type: string
|
||||||
|
x-enum-varnames:
|
||||||
|
- BackendTypeLlamaCpp
|
||||||
|
- BackendTypeMlxLm
|
||||||
|
- BackendTypeVllm
|
||||||
instance.CreateInstanceOptions:
|
instance.CreateInstanceOptions:
|
||||||
properties:
|
properties:
|
||||||
alias:
|
|
||||||
type: string
|
|
||||||
api_key:
|
|
||||||
type: string
|
|
||||||
api_key_file:
|
|
||||||
type: string
|
|
||||||
auto_restart:
|
auto_restart:
|
||||||
description: Auto restart
|
description: Auto restart
|
||||||
type: boolean
|
type: boolean
|
||||||
batch_size:
|
backend_options:
|
||||||
type: integer
|
additionalProperties: {}
|
||||||
cache_reuse:
|
type: object
|
||||||
type: integer
|
backend_type:
|
||||||
cache_type_k:
|
$ref: '#/definitions/backends.BackendType'
|
||||||
type: string
|
|
||||||
cache_type_k_draft:
|
|
||||||
type: string
|
|
||||||
cache_type_v:
|
|
||||||
type: string
|
|
||||||
cache_type_v_draft:
|
|
||||||
type: string
|
|
||||||
chat_template:
|
|
||||||
type: string
|
|
||||||
chat_template_file:
|
|
||||||
type: string
|
|
||||||
chat_template_kwargs:
|
|
||||||
type: string
|
|
||||||
check_tensors:
|
|
||||||
type: boolean
|
|
||||||
cont_batching:
|
|
||||||
type: boolean
|
|
||||||
control_vector:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
control_vector_layer_range:
|
|
||||||
type: string
|
|
||||||
control_vector_scaled:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
cpu_mask:
|
|
||||||
type: string
|
|
||||||
cpu_mask_batch:
|
|
||||||
type: string
|
|
||||||
cpu_range:
|
|
||||||
type: string
|
|
||||||
cpu_range_batch:
|
|
||||||
type: string
|
|
||||||
cpu_strict:
|
|
||||||
type: integer
|
|
||||||
cpu_strict_batch:
|
|
||||||
type: integer
|
|
||||||
ctx_size:
|
|
||||||
type: integer
|
|
||||||
ctx_size_draft:
|
|
||||||
type: integer
|
|
||||||
defrag_thold:
|
|
||||||
type: number
|
|
||||||
device:
|
|
||||||
type: string
|
|
||||||
device_draft:
|
|
||||||
type: string
|
|
||||||
draft_max:
|
|
||||||
type: integer
|
|
||||||
draft_min:
|
|
||||||
type: integer
|
|
||||||
draft_p_min:
|
|
||||||
type: number
|
|
||||||
dry_allowed_length:
|
|
||||||
type: integer
|
|
||||||
dry_base:
|
|
||||||
type: number
|
|
||||||
dry_multiplier:
|
|
||||||
type: number
|
|
||||||
dry_penalty_last_n:
|
|
||||||
type: integer
|
|
||||||
dry_sequence_breaker:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
dump_kv_cache:
|
|
||||||
type: boolean
|
|
||||||
dynatemp_exp:
|
|
||||||
type: number
|
|
||||||
dynatemp_range:
|
|
||||||
type: number
|
|
||||||
embd_bge_small_en_default:
|
|
||||||
description: Default model params
|
|
||||||
type: boolean
|
|
||||||
embd_e5_small_en_default:
|
|
||||||
type: boolean
|
|
||||||
embd_gte_small_default:
|
|
||||||
type: boolean
|
|
||||||
embedding:
|
|
||||||
type: boolean
|
|
||||||
escape:
|
|
||||||
type: boolean
|
|
||||||
fim_qwen_1_5b_default:
|
|
||||||
type: boolean
|
|
||||||
fim_qwen_3b_default:
|
|
||||||
type: boolean
|
|
||||||
fim_qwen_7b_default:
|
|
||||||
type: boolean
|
|
||||||
fim_qwen_7b_spec:
|
|
||||||
type: boolean
|
|
||||||
fim_qwen_14b_spec:
|
|
||||||
type: boolean
|
|
||||||
flash_attn:
|
|
||||||
type: boolean
|
|
||||||
frequency_penalty:
|
|
||||||
type: number
|
|
||||||
gpu_layers:
|
|
||||||
type: integer
|
|
||||||
gpu_layers_draft:
|
|
||||||
type: integer
|
|
||||||
grammar:
|
|
||||||
type: string
|
|
||||||
grammar_file:
|
|
||||||
type: string
|
|
||||||
hf_file:
|
|
||||||
type: string
|
|
||||||
hf_file_v:
|
|
||||||
type: string
|
|
||||||
hf_repo:
|
|
||||||
type: string
|
|
||||||
hf_repo_draft:
|
|
||||||
type: string
|
|
||||||
hf_repo_v:
|
|
||||||
type: string
|
|
||||||
hf_token:
|
|
||||||
type: string
|
|
||||||
host:
|
|
||||||
type: string
|
|
||||||
idle_timeout:
|
idle_timeout:
|
||||||
description: Idle timeout
|
description: Idle timeout
|
||||||
type: integer
|
type: integer
|
||||||
ignore_eos:
|
|
||||||
type: boolean
|
|
||||||
jinja:
|
|
||||||
type: boolean
|
|
||||||
json_schema:
|
|
||||||
type: string
|
|
||||||
json_schema_file:
|
|
||||||
type: string
|
|
||||||
keep:
|
|
||||||
type: integer
|
|
||||||
log_colors:
|
|
||||||
type: boolean
|
|
||||||
log_disable:
|
|
||||||
type: boolean
|
|
||||||
log_file:
|
|
||||||
type: string
|
|
||||||
log_prefix:
|
|
||||||
type: boolean
|
|
||||||
log_timestamps:
|
|
||||||
type: boolean
|
|
||||||
logit_bias:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
lora:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
lora_init_without_apply:
|
|
||||||
type: boolean
|
|
||||||
lora_scaled:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
main_gpu:
|
|
||||||
type: integer
|
|
||||||
max_restarts:
|
max_restarts:
|
||||||
type: integer
|
type: integer
|
||||||
metrics:
|
|
||||||
type: boolean
|
|
||||||
min_p:
|
|
||||||
type: number
|
|
||||||
mirostat:
|
|
||||||
type: integer
|
|
||||||
mirostat_ent:
|
|
||||||
type: number
|
|
||||||
mirostat_lr:
|
|
||||||
type: number
|
|
||||||
mlock:
|
|
||||||
type: boolean
|
|
||||||
mmproj:
|
|
||||||
type: string
|
|
||||||
mmproj_url:
|
|
||||||
type: string
|
|
||||||
model:
|
|
||||||
type: string
|
|
||||||
model_draft:
|
|
||||||
type: string
|
|
||||||
model_url:
|
|
||||||
type: string
|
|
||||||
model_vocoder:
|
|
||||||
description: Audio/TTS params
|
|
||||||
type: string
|
|
||||||
no_cont_batching:
|
|
||||||
type: boolean
|
|
||||||
no_context_shift:
|
|
||||||
description: Example-specific params
|
|
||||||
type: boolean
|
|
||||||
no_escape:
|
|
||||||
type: boolean
|
|
||||||
no_kv_offload:
|
|
||||||
type: boolean
|
|
||||||
no_mmap:
|
|
||||||
type: boolean
|
|
||||||
no_mmproj:
|
|
||||||
type: boolean
|
|
||||||
no_mmproj_offload:
|
|
||||||
type: boolean
|
|
||||||
no_perf:
|
|
||||||
type: boolean
|
|
||||||
no_prefill_assistant:
|
|
||||||
type: boolean
|
|
||||||
no_slots:
|
|
||||||
type: boolean
|
|
||||||
no_warmup:
|
|
||||||
type: boolean
|
|
||||||
no_webui:
|
|
||||||
type: boolean
|
|
||||||
numa:
|
|
||||||
type: string
|
|
||||||
on_demand_start:
|
on_demand_start:
|
||||||
description: On demand start
|
description: On demand start
|
||||||
type: boolean
|
type: boolean
|
||||||
override_kv:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
override_tensor:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
parallel:
|
|
||||||
type: integer
|
|
||||||
path:
|
|
||||||
type: string
|
|
||||||
poll:
|
|
||||||
type: integer
|
|
||||||
poll_batch:
|
|
||||||
type: integer
|
|
||||||
pooling:
|
|
||||||
type: string
|
|
||||||
port:
|
|
||||||
type: integer
|
|
||||||
predict:
|
|
||||||
type: integer
|
|
||||||
presence_penalty:
|
|
||||||
type: number
|
|
||||||
prio:
|
|
||||||
type: integer
|
|
||||||
prio_batch:
|
|
||||||
type: integer
|
|
||||||
props:
|
|
||||||
type: boolean
|
|
||||||
reasoning_budget:
|
|
||||||
type: integer
|
|
||||||
reasoning_format:
|
|
||||||
type: string
|
|
||||||
repeat_last_n:
|
|
||||||
type: integer
|
|
||||||
repeat_penalty:
|
|
||||||
type: number
|
|
||||||
reranking:
|
|
||||||
type: boolean
|
|
||||||
restart_delay:
|
restart_delay:
|
||||||
type: integer
|
description: seconds
|
||||||
rope_freq_base:
|
|
||||||
type: number
|
|
||||||
rope_freq_scale:
|
|
||||||
type: number
|
|
||||||
rope_scale:
|
|
||||||
type: number
|
|
||||||
rope_scaling:
|
|
||||||
type: string
|
|
||||||
samplers:
|
|
||||||
description: Sampling params
|
|
||||||
type: string
|
|
||||||
sampling_seq:
|
|
||||||
type: string
|
|
||||||
seed:
|
|
||||||
type: integer
|
|
||||||
slot_prompt_similarity:
|
|
||||||
type: number
|
|
||||||
slot_save_path:
|
|
||||||
type: string
|
|
||||||
slots:
|
|
||||||
type: boolean
|
|
||||||
special:
|
|
||||||
type: boolean
|
|
||||||
split_mode:
|
|
||||||
type: string
|
|
||||||
spm_infill:
|
|
||||||
type: boolean
|
|
||||||
ssl_cert_file:
|
|
||||||
type: string
|
|
||||||
ssl_key_file:
|
|
||||||
type: string
|
|
||||||
temp:
|
|
||||||
type: number
|
|
||||||
tensor_split:
|
|
||||||
type: string
|
|
||||||
threads:
|
|
||||||
type: integer
|
|
||||||
threads_batch:
|
|
||||||
type: integer
|
|
||||||
threads_http:
|
|
||||||
type: integer
|
|
||||||
timeout:
|
|
||||||
type: integer
|
|
||||||
top_k:
|
|
||||||
type: integer
|
|
||||||
top_p:
|
|
||||||
type: number
|
|
||||||
tts_use_guide_tokens:
|
|
||||||
type: boolean
|
|
||||||
typical:
|
|
||||||
type: number
|
|
||||||
ubatch_size:
|
|
||||||
type: integer
|
|
||||||
verbose:
|
|
||||||
type: boolean
|
|
||||||
verbose_prompt:
|
|
||||||
description: Common params
|
|
||||||
type: boolean
|
|
||||||
verbosity:
|
|
||||||
type: integer
|
|
||||||
xtc_probability:
|
|
||||||
type: number
|
|
||||||
xtc_threshold:
|
|
||||||
type: number
|
|
||||||
yarn_attn_factor:
|
|
||||||
type: number
|
|
||||||
yarn_beta_fast:
|
|
||||||
type: number
|
|
||||||
yarn_beta_slow:
|
|
||||||
type: number
|
|
||||||
yarn_ext_factor:
|
|
||||||
type: number
|
|
||||||
yarn_orig_ctx:
|
|
||||||
type: integer
|
type: integer
|
||||||
type: object
|
type: object
|
||||||
instance.InstanceStatus:
|
instance.InstanceStatus:
|
||||||
@@ -391,6 +74,11 @@ definitions:
|
|||||||
object:
|
object:
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
|
server.ParseCommandRequest:
|
||||||
|
properties:
|
||||||
|
command:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
info:
|
info:
|
||||||
contact: {}
|
contact: {}
|
||||||
description: llamactl is a control server for managing Llama Server instances.
|
description: llamactl is a control server for managing Llama Server instances.
|
||||||
@@ -400,6 +88,102 @@ info:
|
|||||||
title: llamactl API
|
title: llamactl API
|
||||||
version: "1.0"
|
version: "1.0"
|
||||||
paths:
|
paths:
|
||||||
|
/backends/llama-cpp/parse-command:
|
||||||
|
post:
|
||||||
|
consumes:
|
||||||
|
- application/json
|
||||||
|
description: Parses a llama-server command string into instance options
|
||||||
|
parameters:
|
||||||
|
- description: Command to parse
|
||||||
|
in: body
|
||||||
|
name: request
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/server.ParseCommandRequest'
|
||||||
|
produces:
|
||||||
|
- application/json
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Parsed options
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/instance.CreateInstanceOptions'
|
||||||
|
"400":
|
||||||
|
description: Invalid request or command
|
||||||
|
schema:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
"500":
|
||||||
|
description: Internal Server Error
|
||||||
|
schema:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
summary: Parse llama-server command
|
||||||
|
tags:
|
||||||
|
- backends
|
||||||
|
/backends/mlx/parse-command:
|
||||||
|
post:
|
||||||
|
consumes:
|
||||||
|
- application/json
|
||||||
|
description: Parses MLX-LM server command string into instance options
|
||||||
|
parameters:
|
||||||
|
- description: Command to parse
|
||||||
|
in: body
|
||||||
|
name: request
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/server.ParseCommandRequest'
|
||||||
|
produces:
|
||||||
|
- application/json
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Parsed options
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/instance.CreateInstanceOptions'
|
||||||
|
"400":
|
||||||
|
description: Invalid request or command
|
||||||
|
schema:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
summary: Parse mlx_lm.server command
|
||||||
|
tags:
|
||||||
|
- backends
|
||||||
|
/backends/vllm/parse-command:
|
||||||
|
post:
|
||||||
|
consumes:
|
||||||
|
- application/json
|
||||||
|
description: Parses a vLLM serve command string into instance options
|
||||||
|
parameters:
|
||||||
|
- description: Command to parse
|
||||||
|
in: body
|
||||||
|
name: request
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/server.ParseCommandRequest'
|
||||||
|
produces:
|
||||||
|
- application/json
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Parsed options
|
||||||
|
schema:
|
||||||
|
$ref: '#/definitions/instance.CreateInstanceOptions'
|
||||||
|
"400":
|
||||||
|
description: Invalid request or command
|
||||||
|
schema:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
summary: Parse vllm serve command
|
||||||
|
tags:
|
||||||
|
- backends
|
||||||
/instances:
|
/instances:
|
||||||
get:
|
get:
|
||||||
description: Returns a list of all instances managed by the server
|
description: Returns a list of all instances managed by the server
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ server:
|
|||||||
backends:
|
backends:
|
||||||
llama_executable: llama-server # Path to llama-server executable
|
llama_executable: llama-server # Path to llama-server executable
|
||||||
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
||||||
|
vllm_executable: vllm # Path to vllm executable
|
||||||
|
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances
|
port_range: [8000, 9000] # Port range for instances
|
||||||
@@ -94,11 +95,13 @@ server:
|
|||||||
backends:
|
backends:
|
||||||
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
|
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
|
||||||
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
|
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
|
||||||
|
vllm_executable: "vllm" # Path to vllm executable (default: "vllm")
|
||||||
```
|
```
|
||||||
|
|
||||||
**Environment Variables:**
|
**Environment Variables:**
|
||||||
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
|
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
|
||||||
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
|
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
|
||||||
|
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
|
||||||
|
|
||||||
### Instance Configuration
|
### Instance Configuration
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,22 @@ pip install mlx-lm
|
|||||||
|
|
||||||
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
|
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
|
||||||
|
|
||||||
|
**For vLLM backend:**
|
||||||
|
|
||||||
|
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install via pip (requires Python 3.8+, GPU required)
|
||||||
|
pip install vllm
|
||||||
|
|
||||||
|
# Or in a virtual environment (recommended)
|
||||||
|
python -m venv vllm-env
|
||||||
|
source vllm-env/bin/activate
|
||||||
|
pip install vllm
|
||||||
|
|
||||||
|
# For production deployments, consider container-based installation
|
||||||
|
```
|
||||||
|
|
||||||
## Installation Methods
|
## Installation Methods
|
||||||
|
|
||||||
### Option 1: Download Binary (Recommended)
|
### Option 1: Download Binary (Recommended)
|
||||||
|
|||||||
@@ -29,8 +29,9 @@ You should see the Llamactl web interface.
|
|||||||
1. Click the "Add Instance" button
|
1. Click the "Add Instance" button
|
||||||
2. Fill in the instance configuration:
|
2. Fill in the instance configuration:
|
||||||
- **Name**: Give your instance a descriptive name
|
- **Name**: Give your instance a descriptive name
|
||||||
- **Model Path**: Path to your Llama.cpp model file
|
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
|
||||||
- **Additional Options**: Any extra Llama.cpp parameters
|
- **Model**: Model path or identifier for your chosen backend
|
||||||
|
- **Additional Options**: Backend-specific parameters
|
||||||
|
|
||||||
3. Click "Create Instance"
|
3. Click "Create Instance"
|
||||||
|
|
||||||
@@ -43,17 +44,46 @@ Once created, you can:
|
|||||||
- **View logs** by clicking the logs button
|
- **View logs** by clicking the logs button
|
||||||
- **Stop** the instance when needed
|
- **Stop** the instance when needed
|
||||||
|
|
||||||
## Example Configuration
|
## Example Configurations
|
||||||
|
|
||||||
Here's a basic example configuration for a Llama 2 model:
|
Here are basic example configurations for each backend:
|
||||||
|
|
||||||
|
**llama.cpp backend:**
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name": "llama2-7b",
|
"name": "llama2-7b",
|
||||||
"model_path": "/path/to/llama-2-7b-chat.gguf",
|
"backend_type": "llama_cpp",
|
||||||
"options": {
|
"backend_options": {
|
||||||
|
"model": "/path/to/llama-2-7b-chat.gguf",
|
||||||
"threads": 4,
|
"threads": 4,
|
||||||
"context_size": 2048
|
"ctx_size": 2048,
|
||||||
|
"gpu_layers": 32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**MLX backend (macOS only):**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "mistral-mlx",
|
||||||
|
"backend_type": "mlx_lm",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
|
||||||
|
"temp": 0.7,
|
||||||
|
"max_tokens": 2048
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**vLLM backend:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "dialogpt-vllm",
|
||||||
|
"backend_type": "vllm",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "microsoft/DialoGPT-medium",
|
||||||
|
"tensor_parallel_size": 2,
|
||||||
|
"gpu_memory_utilization": 0.9
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -66,12 +96,14 @@ You can also manage instances via the REST API:
|
|||||||
# List all instances
|
# List all instances
|
||||||
curl http://localhost:8080/api/instances
|
curl http://localhost:8080/api/instances
|
||||||
|
|
||||||
# Create a new instance
|
# Create a new llama.cpp instance
|
||||||
curl -X POST http://localhost:8080/api/instances \
|
curl -X POST http://localhost:8080/api/instances/my-model \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"name": "my-model",
|
"backend_type": "llama_cpp",
|
||||||
"model_path": "/path/to/model.gguf",
|
"backend_options": {
|
||||||
|
"model": "/path/to/model.gguf"
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
# Start an instance
|
# Start an instance
|
||||||
|
|||||||
@@ -170,7 +170,7 @@ POST /api/v1/instances/{name}/start
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name": "llama2-7b",
|
"name": "llama2-7b",
|
||||||
"status": "starting",
|
"status": "running",
|
||||||
"created": 1705312200
|
"created": 1705312200
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -191,7 +191,7 @@ POST /api/v1/instances/{name}/stop
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name": "llama2-7b",
|
"name": "llama2-7b",
|
||||||
"status": "stopping",
|
"status": "stopped",
|
||||||
"created": 1705312200
|
"created": 1705312200
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -208,7 +208,7 @@ POST /api/v1/instances/{name}/restart
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name": "llama2-7b",
|
"name": "llama2-7b",
|
||||||
"status": "restarting",
|
"status": "running",
|
||||||
"created": 1705312200
|
"created": 1705312200
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -401,6 +401,102 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Backend-Specific Endpoints
|
||||||
|
|
||||||
|
### Parse Commands
|
||||||
|
|
||||||
|
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
|
||||||
|
|
||||||
|
#### Parse Llama.cpp Command
|
||||||
|
|
||||||
|
Parse a llama-server command string into instance options.
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/backends/llama-cpp/parse-command
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request Body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"llama_server_options": {
|
||||||
|
"model": "/path/to/model.gguf",
|
||||||
|
"ctx_size": 2048,
|
||||||
|
"port": 8080
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parse MLX-LM Command
|
||||||
|
|
||||||
|
Parse an MLX-LM server command string into instance options.
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/backends/mlx/parse-command
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request Body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"command": "mlx_lm.server --model /path/to/model --port 8080"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"backend_type": "mlx_lm",
|
||||||
|
"mlx_server_options": {
|
||||||
|
"model": "/path/to/model",
|
||||||
|
"port": 8080
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Parse vLLM Command
|
||||||
|
|
||||||
|
Parse a vLLM serve command string into instance options.
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/backends/vllm/parse-command
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request Body:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"command": "vllm serve /path/to/model --port 8080"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"backend_type": "vllm",
|
||||||
|
"vllm_server_options": {
|
||||||
|
"model": "/path/to/model",
|
||||||
|
"port": 8080
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Error Responses for Parse Commands:**
|
||||||
|
- `400 Bad Request`: Invalid request body, empty command, or parse error
|
||||||
|
- `500 Internal Server Error`: Encoding error
|
||||||
|
|
||||||
|
## Auto-Generated Documentation
|
||||||
|
|
||||||
|
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
|
||||||
|
|
||||||
|
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
|
||||||
|
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
|
||||||
|
|
||||||
## Swagger Documentation
|
## Swagger Documentation
|
||||||
|
|
||||||
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
|
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# Managing Instances
|
# Managing Instances
|
||||||
|
|
||||||
Learn how to effectively manage your llama.cpp and MLX instances with Llamactl through both the Web UI and API.
|
Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
@@ -42,9 +42,11 @@ Each instance is displayed as a card showing:
|
|||||||
3. **Choose Backend Type**:
|
3. **Choose Backend Type**:
|
||||||
- **llama.cpp**: For GGUF models using llama-server
|
- **llama.cpp**: For GGUF models using llama-server
|
||||||
- **MLX**: For MLX-optimized models (macOS only)
|
- **MLX**: For MLX-optimized models (macOS only)
|
||||||
|
- **vLLM**: For distributed serving and high-throughput inference
|
||||||
4. Configure model source:
|
4. Configure model source:
|
||||||
- **For llama.cpp**: GGUF model path or HuggingFace repo
|
- **For llama.cpp**: GGUF model path or HuggingFace repo
|
||||||
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
|
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
|
||||||
|
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
|
||||||
5. Configure optional instance management settings:
|
5. Configure optional instance management settings:
|
||||||
- **Auto Restart**: Automatically restart instance on failure
|
- **Auto Restart**: Automatically restart instance on failure
|
||||||
- **Max Restarts**: Maximum number of restart attempts
|
- **Max Restarts**: Maximum number of restart attempts
|
||||||
@@ -54,6 +56,7 @@ Each instance is displayed as a card showing:
|
|||||||
6. Configure backend-specific options:
|
6. Configure backend-specific options:
|
||||||
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
||||||
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
||||||
|
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
|
||||||
7. Click **"Create"** to save the instance
|
7. Click **"Create"** to save the instance
|
||||||
|
|
||||||
### Via API
|
### Via API
|
||||||
@@ -87,6 +90,20 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
|
|||||||
"max_restarts": 3
|
"max_restarts": 3
|
||||||
}'
|
}'
|
||||||
|
|
||||||
|
# Create vLLM instance
|
||||||
|
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "vllm",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "microsoft/DialoGPT-medium",
|
||||||
|
"tensor_parallel_size": 2,
|
||||||
|
"gpu_memory_utilization": 0.9
|
||||||
|
},
|
||||||
|
"auto_restart": true,
|
||||||
|
"on_demand_start": true
|
||||||
|
}'
|
||||||
|
|
||||||
# Create llama.cpp instance with HuggingFace model
|
# Create llama.cpp instance with HuggingFace model
|
||||||
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
|
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
@@ -179,16 +196,17 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
|
|||||||
|
|
||||||
## Instance Proxy
|
## Instance Proxy
|
||||||
|
|
||||||
Llamactl proxies all requests to the underlying backend instances (llama-server or MLX).
|
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Get instance details
|
# Get instance details
|
||||||
curl http://localhost:8080/api/instances/{name}/proxy/
|
curl http://localhost:8080/api/instances/{name}/proxy/
|
||||||
```
|
```
|
||||||
|
|
||||||
Both backends provide OpenAI-compatible endpoints. Check the respective documentation:
|
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
|
||||||
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
|
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
|
||||||
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
|
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
|
||||||
|
- [vLLM docs](https://docs.vllm.ai/en/latest/)
|
||||||
|
|
||||||
### Instance Health
|
### Instance Health
|
||||||
|
|
||||||
|
|||||||
@@ -5,5 +5,6 @@ type BackendType string
|
|||||||
const (
|
const (
|
||||||
BackendTypeLlamaCpp BackendType = "llama_cpp"
|
BackendTypeLlamaCpp BackendType = "llama_cpp"
|
||||||
BackendTypeMlxLm BackendType = "mlx_lm"
|
BackendTypeMlxLm BackendType = "mlx_lm"
|
||||||
|
BackendTypeVllm BackendType = "vllm"
|
||||||
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
|
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
|
||||||
)
|
)
|
||||||
|
|||||||
70
pkg/backends/builder.go
Normal file
70
pkg/backends/builder.go
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
package backends
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BuildCommandArgs converts a struct to command line arguments
|
||||||
|
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
|
||||||
|
var args []string
|
||||||
|
|
||||||
|
v := reflect.ValueOf(options).Elem()
|
||||||
|
t := v.Type()
|
||||||
|
|
||||||
|
for i := 0; i < v.NumField(); i++ {
|
||||||
|
field := v.Field(i)
|
||||||
|
fieldType := t.Field(i)
|
||||||
|
|
||||||
|
if !field.CanInterface() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonTag := fieldType.Tag.Get("json")
|
||||||
|
if jsonTag == "" || jsonTag == "-" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get flag name from JSON tag
|
||||||
|
flagName := strings.Split(jsonTag, ",")[0]
|
||||||
|
flagName = strings.ReplaceAll(flagName, "_", "-")
|
||||||
|
|
||||||
|
switch field.Kind() {
|
||||||
|
case reflect.Bool:
|
||||||
|
if field.Bool() {
|
||||||
|
args = append(args, "--"+flagName)
|
||||||
|
}
|
||||||
|
case reflect.Int:
|
||||||
|
if field.Int() != 0 {
|
||||||
|
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
|
||||||
|
}
|
||||||
|
case reflect.Float64:
|
||||||
|
if field.Float() != 0 {
|
||||||
|
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
|
||||||
|
}
|
||||||
|
case reflect.String:
|
||||||
|
if field.String() != "" {
|
||||||
|
args = append(args, "--"+flagName, field.String())
|
||||||
|
}
|
||||||
|
case reflect.Slice:
|
||||||
|
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
|
||||||
|
if multipleFlags[flagName] {
|
||||||
|
// Multiple flags: --flag value1 --flag value2
|
||||||
|
for j := 0; j < field.Len(); j++ {
|
||||||
|
args = append(args, "--"+flagName, field.Index(j).String())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Comma-separated: --flag value1,value2
|
||||||
|
var values []string
|
||||||
|
for j := 0; j < field.Len(); j++ {
|
||||||
|
values = append(values, field.Index(j).String())
|
||||||
|
}
|
||||||
|
args = append(args, "--"+flagName, strings.Join(values, ","))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return args
|
||||||
|
}
|
||||||
@@ -2,9 +2,9 @@ package llamacpp
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"llamactl/pkg/backends"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LlamaServerOptions struct {
|
type LlamaServerOptions struct {
|
||||||
@@ -315,62 +315,44 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
|
|||||||
|
|
||||||
// BuildCommandArgs converts InstanceOptions to command line arguments
|
// BuildCommandArgs converts InstanceOptions to command line arguments
|
||||||
func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
||||||
var args []string
|
// Llama uses multiple flags for arrays by default (not comma-separated)
|
||||||
|
multipleFlags := map[string]bool{
|
||||||
v := reflect.ValueOf(o).Elem()
|
"override-tensor": true,
|
||||||
t := v.Type()
|
"override-kv": true,
|
||||||
|
"lora": true,
|
||||||
for i := 0; i < v.NumField(); i++ {
|
"lora-scaled": true,
|
||||||
field := v.Field(i)
|
"control-vector": true,
|
||||||
fieldType := t.Field(i)
|
"control-vector-scaled": true,
|
||||||
|
"dry-sequence-breaker": true,
|
||||||
// Skip unexported fields
|
"logit-bias": true,
|
||||||
if !field.CanInterface() {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
return backends.BuildCommandArgs(o, multipleFlags)
|
||||||
// Get the JSON tag to determine the flag name
|
}
|
||||||
jsonTag := fieldType.Tag.Get("json")
|
|
||||||
if jsonTag == "" || jsonTag == "-" {
|
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
|
||||||
continue
|
// Supports multiple formats:
|
||||||
}
|
// 1. Full command: "llama-server --model file.gguf"
|
||||||
|
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
|
||||||
// Remove ",omitempty" from the tag
|
// 3. Args only: "--model file.gguf --gpu-layers 32"
|
||||||
flagName := jsonTag
|
// 4. Multiline commands with backslashes
|
||||||
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
|
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
|
||||||
flagName = jsonTag[:commaIndex]
|
executableNames := []string{"llama-server"}
|
||||||
}
|
var subcommandNames []string // Llama has no subcommands
|
||||||
|
multiValuedFlags := map[string]bool{
|
||||||
// Convert snake_case to kebab-case for CLI flags
|
"override_tensor": true,
|
||||||
flagName = strings.ReplaceAll(flagName, "_", "-")
|
"override_kv": true,
|
||||||
|
"lora": true,
|
||||||
// Add the appropriate arguments based on field type and value
|
"lora_scaled": true,
|
||||||
switch field.Kind() {
|
"control_vector": true,
|
||||||
case reflect.Bool:
|
"control_vector_scaled": true,
|
||||||
if field.Bool() {
|
"dry_sequence_breaker": true,
|
||||||
args = append(args, "--"+flagName)
|
"logit_bias": true,
|
||||||
}
|
}
|
||||||
case reflect.Int:
|
|
||||||
if field.Int() != 0 {
|
var llamaOptions LlamaServerOptions
|
||||||
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
|
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
|
||||||
}
|
return nil, err
|
||||||
case reflect.Float64:
|
}
|
||||||
if field.Float() != 0 {
|
|
||||||
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
|
return &llamaOptions, nil
|
||||||
}
|
|
||||||
case reflect.String:
|
|
||||||
if field.String() != "" {
|
|
||||||
args = append(args, "--"+flagName, field.String())
|
|
||||||
}
|
|
||||||
case reflect.Slice:
|
|
||||||
if field.Type().Elem().Kind() == reflect.String {
|
|
||||||
// Handle []string fields
|
|
||||||
for j := 0; j < field.Len(); j++ {
|
|
||||||
args = append(args, "--"+flagName, field.Index(j).String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return args
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseLlamaCommand(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
command string
|
||||||
|
expectErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "basic command",
|
||||||
|
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "args only",
|
||||||
|
command: "--model /path/to/model.gguf --ctx-size 4096",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mixed flag formats",
|
||||||
|
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "quoted strings",
|
||||||
|
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty command",
|
||||||
|
command: "",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unterminated quote",
|
||||||
|
command: `llama-server --model test.gguf --api-key "unterminated`,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "malformed flag",
|
||||||
|
command: "llama-server ---model test.gguf",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := llamacpp.ParseLlamaCommand(tt.command)
|
||||||
|
|
||||||
|
if tt.expectErr {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("expected error but got none")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if result == nil {
|
||||||
|
t.Errorf("expected result but got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseLlamaCommandValues(t *testing.T) {
|
||||||
|
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
|
||||||
|
result, err := llamacpp.ParseLlamaCommand(command)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Model != "/test/model.gguf" {
|
||||||
|
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.GPULayers != 32 {
|
||||||
|
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Temperature != 0.7 {
|
||||||
|
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !result.Verbose {
|
||||||
|
t.Errorf("expected verbose to be true")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !result.NoMmap {
|
||||||
|
t.Errorf("expected no_mmap to be true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseLlamaCommandArrays(t *testing.T) {
|
||||||
|
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
|
||||||
|
result, err := llamacpp.ParseLlamaCommand(command)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result.Lora) != 2 {
|
||||||
|
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := []string{"adapter1.bin", "adapter2.bin"}
|
||||||
|
for i, v := range expected {
|
||||||
|
if result.Lora[i] != v {
|
||||||
|
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Helper functions
|
// Helper functions
|
||||||
func contains(slice []string, item string) bool {
|
func contains(slice []string, item string) bool {
|
||||||
return slices.Contains(slice, item)
|
return slices.Contains(slice, item)
|
||||||
|
|||||||
@@ -1,286 +0,0 @@
|
|||||||
package llamacpp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
|
|
||||||
// Supports multiple formats:
|
|
||||||
// 1. Full command: "llama-server --model file.gguf"
|
|
||||||
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
|
|
||||||
// 3. Args only: "--model file.gguf --gpu-layers 32"
|
|
||||||
// 4. Multiline commands with backslashes
|
|
||||||
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
|
|
||||||
// 1. Normalize the command - handle multiline with backslashes
|
|
||||||
trimmed := normalizeMultilineCommand(command)
|
|
||||||
if trimmed == "" {
|
|
||||||
return nil, fmt.Errorf("command cannot be empty")
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Extract arguments from command
|
|
||||||
args, err := extractArgumentsFromCommand(trimmed)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Parse arguments into map
|
|
||||||
options := make(map[string]any)
|
|
||||||
|
|
||||||
// Known multi-valued flags (snake_case form)
|
|
||||||
multiValued := map[string]struct{}{
|
|
||||||
"override_tensor": {},
|
|
||||||
"override_kv": {},
|
|
||||||
"lora": {},
|
|
||||||
"lora_scaled": {},
|
|
||||||
"control_vector": {},
|
|
||||||
"control_vector_scaled": {},
|
|
||||||
"dry_sequence_breaker": {},
|
|
||||||
"logit_bias": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
i := 0
|
|
||||||
for i < len(args) {
|
|
||||||
arg := args[i]
|
|
||||||
|
|
||||||
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
|
|
||||||
i++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
|
|
||||||
if strings.HasPrefix(arg, "---") {
|
|
||||||
return nil, fmt.Errorf("malformed flag: %s", arg)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unified parsing for --flag=value vs --flag value
|
|
||||||
var rawFlag, rawValue string
|
|
||||||
hasEquals := false
|
|
||||||
if strings.Contains(arg, "=") {
|
|
||||||
parts := strings.SplitN(arg, "=", 2)
|
|
||||||
rawFlag = parts[0]
|
|
||||||
rawValue = parts[1] // may be empty string
|
|
||||||
hasEquals = true
|
|
||||||
} else {
|
|
||||||
rawFlag = arg
|
|
||||||
}
|
|
||||||
|
|
||||||
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
|
|
||||||
flagName := strings.ReplaceAll(flagCore, "-", "_")
|
|
||||||
|
|
||||||
// Detect value if not in equals form
|
|
||||||
valueProvided := hasEquals
|
|
||||||
if !hasEquals {
|
|
||||||
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
|
|
||||||
rawValue = args[i+1]
|
|
||||||
valueProvided = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine if multi-valued flag
|
|
||||||
_, isMulti := multiValued[flagName]
|
|
||||||
|
|
||||||
// Normalization helper: ensure slice for multi-valued flags
|
|
||||||
appendValue := func(valStr string) {
|
|
||||||
if existing, ok := options[flagName]; ok {
|
|
||||||
// Existing value; ensure slice semantics for multi-valued flags or repeated occurrences
|
|
||||||
if slice, ok := existing.([]string); ok {
|
|
||||||
options[flagName] = append(slice, valStr)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// Convert scalar to slice
|
|
||||||
options[flagName] = []string{fmt.Sprintf("%v", existing), valStr}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// First value
|
|
||||||
if isMulti {
|
|
||||||
options[flagName] = []string{valStr}
|
|
||||||
} else {
|
|
||||||
// We'll parse type below for single-valued flags
|
|
||||||
options[flagName] = valStr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if valueProvided {
|
|
||||||
// Use raw token for multi-valued flags; else allow typed parsing
|
|
||||||
appendValue(rawValue)
|
|
||||||
if !isMulti { // convert to typed value if scalar
|
|
||||||
if strVal, ok := options[flagName].(string); ok { // still scalar
|
|
||||||
options[flagName] = parseValue(strVal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Advance index: if we consumed a following token as value (non equals form), skip it
|
|
||||||
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
|
|
||||||
i += 2
|
|
||||||
} else {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Boolean flag (no value)
|
|
||||||
options[flagName] = true
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Convert to LlamaServerOptions using existing UnmarshalJSON
|
|
||||||
jsonData, err := json.Marshal(options)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var llamaOptions LlamaServerOptions
|
|
||||||
if err := json.Unmarshal(jsonData, &llamaOptions); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to parse command options: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Return LlamaServerOptions
|
|
||||||
return &llamaOptions, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseValue attempts to parse a string value into the most appropriate type
|
|
||||||
func parseValue(value string) any {
|
|
||||||
// Surrounding matching quotes (single or double)
|
|
||||||
if l := len(value); l >= 2 {
|
|
||||||
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
|
|
||||||
value = value[1 : l-1]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lower := strings.ToLower(value)
|
|
||||||
if lower == "true" {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if lower == "false" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if intVal, err := strconv.Atoi(value); err == nil {
|
|
||||||
return intVal
|
|
||||||
}
|
|
||||||
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
|
|
||||||
return floatVal
|
|
||||||
}
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
|
|
||||||
// normalizeMultilineCommand handles multiline commands with backslashes
|
|
||||||
func normalizeMultilineCommand(command string) string {
|
|
||||||
// Handle escaped newlines (backslash followed by newline)
|
|
||||||
re := regexp.MustCompile(`\\\s*\n\s*`)
|
|
||||||
normalized := re.ReplaceAllString(command, " ")
|
|
||||||
|
|
||||||
// Clean up extra whitespace
|
|
||||||
re = regexp.MustCompile(`\s+`)
|
|
||||||
normalized = re.ReplaceAllString(normalized, " ")
|
|
||||||
|
|
||||||
return strings.TrimSpace(normalized)
|
|
||||||
}
|
|
||||||
|
|
||||||
// extractArgumentsFromCommand extracts arguments from various command formats
|
|
||||||
func extractArgumentsFromCommand(command string) ([]string, error) {
|
|
||||||
// Split command into tokens respecting quotes
|
|
||||||
tokens, err := splitCommandTokens(command)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(tokens) == 0 {
|
|
||||||
return nil, fmt.Errorf("no command tokens found")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if first token looks like an executable
|
|
||||||
firstToken := tokens[0]
|
|
||||||
|
|
||||||
// Case 1: Full path to executable (contains path separator or ends with llama-server)
|
|
||||||
if strings.Contains(firstToken, string(filepath.Separator)) ||
|
|
||||||
strings.HasSuffix(filepath.Base(firstToken), "llama-server") {
|
|
||||||
return tokens[1:], nil // Return everything except the executable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 2: Just "llama-server" command
|
|
||||||
if strings.ToLower(firstToken) == "llama-server" {
|
|
||||||
return tokens[1:], nil // Return everything except the command
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 3: Arguments only (starts with a flag)
|
|
||||||
if strings.HasPrefix(firstToken, "-") {
|
|
||||||
return tokens, nil // Return all tokens as arguments
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 4: Unknown format - might be a different executable name
|
|
||||||
// Be permissive and assume it's the executable
|
|
||||||
return tokens[1:], nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// splitCommandTokens splits a command string into tokens, respecting quotes
|
|
||||||
func splitCommandTokens(command string) ([]string, error) {
|
|
||||||
var tokens []string
|
|
||||||
var current strings.Builder
|
|
||||||
inQuotes := false
|
|
||||||
quoteChar := byte(0)
|
|
||||||
escaped := false
|
|
||||||
|
|
||||||
for i := 0; i < len(command); i++ {
|
|
||||||
c := command[i]
|
|
||||||
|
|
||||||
if escaped {
|
|
||||||
current.WriteByte(c)
|
|
||||||
escaped = false
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if c == '\\' {
|
|
||||||
escaped = true
|
|
||||||
current.WriteByte(c)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inQuotes && (c == '"' || c == '\'') {
|
|
||||||
inQuotes = true
|
|
||||||
quoteChar = c
|
|
||||||
current.WriteByte(c)
|
|
||||||
} else if inQuotes && c == quoteChar {
|
|
||||||
inQuotes = false
|
|
||||||
quoteChar = 0
|
|
||||||
current.WriteByte(c)
|
|
||||||
} else if !inQuotes && (c == ' ' || c == '\t') {
|
|
||||||
if current.Len() > 0 {
|
|
||||||
tokens = append(tokens, current.String())
|
|
||||||
current.Reset()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
current.WriteByte(c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if inQuotes {
|
|
||||||
return nil, errors.New("unterminated quoted string")
|
|
||||||
}
|
|
||||||
|
|
||||||
if current.Len() > 0 {
|
|
||||||
tokens = append(tokens, current.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokens, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// isFlag determines if a string is a command line flag or a value
|
|
||||||
// Handles the special case where negative numbers should be treated as values, not flags
|
|
||||||
func isFlag(arg string) bool {
|
|
||||||
if !strings.HasPrefix(arg, "-") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case: if it's a negative number, treat it as a value
|
|
||||||
if _, err := strconv.ParseFloat(arg, 64); err == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
@@ -1,413 +0,0 @@
|
|||||||
package llamacpp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseLlamaCommand(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
command string
|
|
||||||
expectErr bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "basic command with model",
|
|
||||||
command: "llama-server --model /path/to/model.gguf",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "command with multiple flags",
|
|
||||||
command: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "command with short flags",
|
|
||||||
command: "llama-server -m /path/to/model.gguf -ngl 32 -c 4096",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "command with equals format",
|
|
||||||
command: "llama-server --model=/path/to/model.gguf --gpu-layers=32",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "command with boolean flags",
|
|
||||||
command: "llama-server --model /path/to/model.gguf --verbose --no-mmap",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "empty command",
|
|
||||||
command: "",
|
|
||||||
expectErr: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "case insensitive command",
|
|
||||||
command: "LLAMA-SERVER --model /path/to/model.gguf",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
// New test cases for improved functionality
|
|
||||||
{
|
|
||||||
name: "args only without llama-server",
|
|
||||||
command: "--model /path/to/model.gguf --gpu-layers 32",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "full path to executable",
|
|
||||||
command: "/usr/local/bin/llama-server --model /path/to/model.gguf",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "negative number handling",
|
|
||||||
command: "llama-server --gpu-layers -1 --model test.gguf",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "multiline command with backslashes",
|
|
||||||
command: "llama-server --model /path/to/model.gguf \\\n --ctx-size 4096 \\\n --batch-size 512",
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "quoted string with special characters",
|
|
||||||
command: `llama-server --model test.gguf --chat-template "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"`,
|
|
||||||
expectErr: false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "unterminated quoted string",
|
|
||||||
command: `llama-server --model test.gguf --chat-template "unterminated quote`,
|
|
||||||
expectErr: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
result, err := ParseLlamaCommand(tt.command)
|
|
||||||
|
|
||||||
if tt.expectErr {
|
|
||||||
if err == nil {
|
|
||||||
t.Errorf("expected error but got none")
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("unexpected error: %v", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if result == nil {
|
|
||||||
t.Errorf("expected result but got nil")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandSpecificValues(t *testing.T) {
|
|
||||||
// Test specific value parsing
|
|
||||||
command := "llama-server --model /test/model.gguf --gpu-layers 32 --ctx-size 4096 --verbose"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Model != "/test/model.gguf" {
|
|
||||||
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != 32 {
|
|
||||||
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.CtxSize != 4096 {
|
|
||||||
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !result.Verbose {
|
|
||||||
t.Errorf("expected verbose to be true, got %v", result.Verbose)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandArrayFlags(t *testing.T) {
|
|
||||||
// Test array flag handling (critical for lora, override-tensor, etc.)
|
|
||||||
command := "llama-server --model test.gguf --lora adapter1.bin --lora adapter2.bin"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(result.Lora) != 2 {
|
|
||||||
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Lora[0] != "adapter1.bin" || result.Lora[1] != "adapter2.bin" {
|
|
||||||
t.Errorf("expected lora adapters [adapter1.bin, adapter2.bin], got %v", result.Lora)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandMixedFormats(t *testing.T) {
|
|
||||||
// Test mixing --flag=value and --flag value formats
|
|
||||||
command := "llama-server --model=/path/model.gguf --gpu-layers 16 --batch-size=512 --verbose"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Model != "/path/model.gguf" {
|
|
||||||
t.Errorf("expected model '/path/model.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != 16 {
|
|
||||||
t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.BatchSize != 512 {
|
|
||||||
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !result.Verbose {
|
|
||||||
t.Errorf("expected verbose to be true, got %v", result.Verbose)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandTypeConversion(t *testing.T) {
|
|
||||||
// Test that values are converted to appropriate types
|
|
||||||
command := "llama-server --model test.gguf --temp 0.7 --top-k 40 --no-mmap"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Temperature != 0.7 {
|
|
||||||
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.TopK != 40 {
|
|
||||||
t.Errorf("expected top_k 40, got %d", result.TopK)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !result.NoMmap {
|
|
||||||
t.Errorf("expected no_mmap to be true, got %v", result.NoMmap)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandArgsOnly(t *testing.T) {
|
|
||||||
// Test parsing arguments without llama-server command
|
|
||||||
command := "--model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Model != "/path/to/model.gguf" {
|
|
||||||
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != 32 {
|
|
||||||
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.CtxSize != 4096 {
|
|
||||||
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandFullPath(t *testing.T) {
|
|
||||||
// Test full path to executable
|
|
||||||
command := "/usr/local/bin/llama-server --model test.gguf --gpu-layers 16"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Model != "test.gguf" {
|
|
||||||
t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != 16 {
|
|
||||||
t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandNegativeNumbers(t *testing.T) {
|
|
||||||
// Test negative number parsing
|
|
||||||
command := "llama-server --model test.gguf --gpu-layers -1 --seed -12345"
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != -1 {
|
|
||||||
t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Seed != -12345 {
|
|
||||||
t.Errorf("expected seed -12345, got %d", result.Seed)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandMultiline(t *testing.T) {
|
|
||||||
// Test multiline command with backslashes
|
|
||||||
command := `llama-server --model /path/to/model.gguf \
|
|
||||||
--ctx-size 4096 \
|
|
||||||
--batch-size 512 \
|
|
||||||
--gpu-layers 32`
|
|
||||||
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Model != "/path/to/model.gguf" {
|
|
||||||
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.CtxSize != 4096 {
|
|
||||||
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.BatchSize != 512 {
|
|
||||||
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != 32 {
|
|
||||||
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandQuotedStrings(t *testing.T) {
|
|
||||||
// Test quoted strings with special characters
|
|
||||||
command := `llama-server --model test.gguf --api-key "sk-1234567890abcdef" --chat-template "User: {user}\nAssistant: "`
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Model != "test.gguf" {
|
|
||||||
t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.APIKey != "sk-1234567890abcdef" {
|
|
||||||
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
|
|
||||||
}
|
|
||||||
|
|
||||||
expectedTemplate := "User: {user}\\nAssistant: "
|
|
||||||
if result.ChatTemplate != expectedTemplate {
|
|
||||||
t.Errorf("expected chat_template '%s', got '%s'", expectedTemplate, result.ChatTemplate)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandUnslothExample(t *testing.T) {
|
|
||||||
// Test with realistic unsloth-style command
|
|
||||||
command := `llama-server --model /path/to/model.gguf \
|
|
||||||
--ctx-size 4096 \
|
|
||||||
--batch-size 512 \
|
|
||||||
--gpu-layers -1 \
|
|
||||||
--temp 0.7 \
|
|
||||||
--repeat-penalty 1.1 \
|
|
||||||
--top-k 40 \
|
|
||||||
--top-p 0.95 \
|
|
||||||
--host 0.0.0.0 \
|
|
||||||
--port 8000 \
|
|
||||||
--api-key "sk-1234567890abcdef"`
|
|
||||||
|
|
||||||
result, err := ParseLlamaCommand(command)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify key fields
|
|
||||||
if result.Model != "/path/to/model.gguf" {
|
|
||||||
t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.CtxSize != 4096 {
|
|
||||||
t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.BatchSize != 512 {
|
|
||||||
t.Errorf("expected batch_size 512, got %d", result.BatchSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.GPULayers != -1 {
|
|
||||||
t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Temperature != 0.7 {
|
|
||||||
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.RepeatPenalty != 1.1 {
|
|
||||||
t.Errorf("expected repeat_penalty 1.1, got %f", result.RepeatPenalty)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.TopK != 40 {
|
|
||||||
t.Errorf("expected top_k 40, got %d", result.TopK)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.TopP != 0.95 {
|
|
||||||
t.Errorf("expected top_p 0.95, got %f", result.TopP)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Host != "0.0.0.0" {
|
|
||||||
t.Errorf("expected host '0.0.0.0', got '%s'", result.Host)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.Port != 8000 {
|
|
||||||
t.Errorf("expected port 8000, got %d", result.Port)
|
|
||||||
}
|
|
||||||
|
|
||||||
if result.APIKey != "sk-1234567890abcdef" {
|
|
||||||
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Focused additional edge case tests (kept minimal per guidance)
|
|
||||||
func TestParseLlamaCommandSingleQuotedValue(t *testing.T) {
|
|
||||||
cmd := "llama-server --model 'my model.gguf' --alias 'Test Alias'"
|
|
||||||
result, err := ParseLlamaCommand(cmd)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
if result.Model != "my model.gguf" {
|
|
||||||
t.Errorf("expected model 'my model.gguf', got '%s'", result.Model)
|
|
||||||
}
|
|
||||||
if result.Alias != "Test Alias" {
|
|
||||||
t.Errorf("expected alias 'Test Alias', got '%s'", result.Alias)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandMixedArrayForms(t *testing.T) {
|
|
||||||
// Same multi-value flag using --flag value and --flag=value forms
|
|
||||||
cmd := "llama-server --lora adapter1.bin --lora=adapter2.bin --lora adapter3.bin"
|
|
||||||
result, err := ParseLlamaCommand(cmd)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("unexpected error: %v", err)
|
|
||||||
}
|
|
||||||
if len(result.Lora) != 3 {
|
|
||||||
t.Fatalf("expected 3 lora values, got %d (%v)", len(result.Lora), result.Lora)
|
|
||||||
}
|
|
||||||
expected := []string{"adapter1.bin", "adapter2.bin", "adapter3.bin"}
|
|
||||||
for i, v := range expected {
|
|
||||||
if result.Lora[i] != v {
|
|
||||||
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseLlamaCommandMalformedFlag(t *testing.T) {
|
|
||||||
cmd := "llama-server ---model test.gguf"
|
|
||||||
_, err := ParseLlamaCommand(cmd)
|
|
||||||
if err == nil {
|
|
||||||
t.Fatalf("expected error for malformed flag but got none")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,9 +1,7 @@
|
|||||||
package mlx
|
package mlx
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"llamactl/pkg/backends"
|
||||||
"reflect"
|
|
||||||
"strconv"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type MlxServerOptions struct {
|
type MlxServerOptions struct {
|
||||||
@@ -25,181 +23,34 @@ type MlxServerOptions struct {
|
|||||||
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
|
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
|
||||||
|
|
||||||
// Sampling defaults
|
// Sampling defaults
|
||||||
Temp float64 `json:"temp,omitempty"` // Note: MLX uses "temp" not "temperature"
|
Temp float64 `json:"temp,omitempty"`
|
||||||
TopP float64 `json:"top_p,omitempty"`
|
TopP float64 `json:"top_p,omitempty"`
|
||||||
TopK int `json:"top_k,omitempty"`
|
TopK int `json:"top_k,omitempty"`
|
||||||
MinP float64 `json:"min_p,omitempty"`
|
MinP float64 `json:"min_p,omitempty"`
|
||||||
MaxTokens int `json:"max_tokens,omitempty"`
|
MaxTokens int `json:"max_tokens,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
|
|
||||||
func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
|
|
||||||
// First unmarshal into a map to handle multiple field names
|
|
||||||
var raw map[string]any
|
|
||||||
if err := json.Unmarshal(data, &raw); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a temporary struct for standard unmarshaling
|
|
||||||
type tempOptions MlxServerOptions
|
|
||||||
temp := tempOptions{}
|
|
||||||
|
|
||||||
// Standard unmarshal first
|
|
||||||
if err := json.Unmarshal(data, &temp); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy to our struct
|
|
||||||
*o = MlxServerOptions(temp)
|
|
||||||
|
|
||||||
// Handle alternative field names
|
|
||||||
fieldMappings := map[string]string{
|
|
||||||
// Basic connection options
|
|
||||||
"m": "model",
|
|
||||||
"host": "host",
|
|
||||||
"port": "port",
|
|
||||||
// "python_path": "python_path", // removed
|
|
||||||
|
|
||||||
// Model and adapter options
|
|
||||||
"adapter-path": "adapter_path",
|
|
||||||
"draft-model": "draft_model",
|
|
||||||
"num-draft-tokens": "num_draft_tokens",
|
|
||||||
"trust-remote-code": "trust_remote_code",
|
|
||||||
|
|
||||||
// Logging and templates
|
|
||||||
"log-level": "log_level",
|
|
||||||
"chat-template": "chat_template",
|
|
||||||
"use-default-chat-template": "use_default_chat_template",
|
|
||||||
"chat-template-args": "chat_template_args",
|
|
||||||
|
|
||||||
// Sampling defaults
|
|
||||||
"temperature": "temp", // Support both temp and temperature
|
|
||||||
"top-p": "top_p",
|
|
||||||
"top-k": "top_k",
|
|
||||||
"min-p": "min_p",
|
|
||||||
"max-tokens": "max_tokens",
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process alternative field names
|
|
||||||
for altName, canonicalName := range fieldMappings {
|
|
||||||
if value, exists := raw[altName]; exists {
|
|
||||||
// Use reflection to set the field value
|
|
||||||
v := reflect.ValueOf(o).Elem()
|
|
||||||
field := v.FieldByNameFunc(func(fieldName string) bool {
|
|
||||||
field, _ := v.Type().FieldByName(fieldName)
|
|
||||||
jsonTag := field.Tag.Get("json")
|
|
||||||
return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
|
|
||||||
})
|
|
||||||
|
|
||||||
if field.IsValid() && field.CanSet() {
|
|
||||||
switch field.Kind() {
|
|
||||||
case reflect.Int:
|
|
||||||
if intVal, ok := value.(float64); ok {
|
|
||||||
field.SetInt(int64(intVal))
|
|
||||||
} else if strVal, ok := value.(string); ok {
|
|
||||||
if intVal, err := strconv.Atoi(strVal); err == nil {
|
|
||||||
field.SetInt(int64(intVal))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case reflect.Float64:
|
|
||||||
if floatVal, ok := value.(float64); ok {
|
|
||||||
field.SetFloat(floatVal)
|
|
||||||
} else if strVal, ok := value.(string); ok {
|
|
||||||
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
|
|
||||||
field.SetFloat(floatVal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case reflect.String:
|
|
||||||
if strVal, ok := value.(string); ok {
|
|
||||||
field.SetString(strVal)
|
|
||||||
}
|
|
||||||
case reflect.Bool:
|
|
||||||
if boolVal, ok := value.(bool); ok {
|
|
||||||
field.SetBool(boolVal)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewMlxServerOptions creates MlxServerOptions with MLX defaults
|
|
||||||
func NewMlxServerOptions() *MlxServerOptions {
|
|
||||||
return &MlxServerOptions{
|
|
||||||
Host: "127.0.0.1", // MLX default (different from llama-server)
|
|
||||||
Port: 8080, // MLX default
|
|
||||||
NumDraftTokens: 3, // MLX default for speculative decoding
|
|
||||||
LogLevel: "INFO", // MLX default
|
|
||||||
Temp: 0.0, // MLX default
|
|
||||||
TopP: 1.0, // MLX default
|
|
||||||
TopK: 0, // MLX default (disabled)
|
|
||||||
MinP: 0.0, // MLX default (disabled)
|
|
||||||
MaxTokens: 512, // MLX default
|
|
||||||
ChatTemplateArgs: "{}", // MLX default (empty JSON object)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// BuildCommandArgs converts to command line arguments
|
// BuildCommandArgs converts to command line arguments
|
||||||
func (o *MlxServerOptions) BuildCommandArgs() []string {
|
func (o *MlxServerOptions) BuildCommandArgs() []string {
|
||||||
var args []string
|
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
|
||||||
|
return backends.BuildCommandArgs(o, multipleFlags)
|
||||||
// Required and basic options
|
}
|
||||||
if o.Model != "" {
|
|
||||||
args = append(args, "--model", o.Model)
|
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
|
||||||
}
|
// Supports multiple formats:
|
||||||
if o.Host != "" {
|
// 1. Full command: "mlx_lm.server --model model/path"
|
||||||
args = append(args, "--host", o.Host)
|
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
|
||||||
}
|
// 3. Args only: "--model model/path --host 0.0.0.0"
|
||||||
if o.Port != 0 {
|
// 4. Multiline commands with backslashes
|
||||||
args = append(args, "--port", strconv.Itoa(o.Port))
|
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
|
||||||
}
|
executableNames := []string{"mlx_lm.server"}
|
||||||
|
var subcommandNames []string // MLX has no subcommands
|
||||||
// Model and adapter options
|
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
|
||||||
if o.AdapterPath != "" {
|
|
||||||
args = append(args, "--adapter-path", o.AdapterPath)
|
var mlxOptions MlxServerOptions
|
||||||
}
|
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
|
||||||
if o.DraftModel != "" {
|
return nil, err
|
||||||
args = append(args, "--draft-model", o.DraftModel)
|
}
|
||||||
}
|
|
||||||
if o.NumDraftTokens != 0 {
|
return &mlxOptions, nil
|
||||||
args = append(args, "--num-draft-tokens", strconv.Itoa(o.NumDraftTokens))
|
|
||||||
}
|
|
||||||
if o.TrustRemoteCode {
|
|
||||||
args = append(args, "--trust-remote-code")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Logging and templates
|
|
||||||
if o.LogLevel != "" {
|
|
||||||
args = append(args, "--log-level", o.LogLevel)
|
|
||||||
}
|
|
||||||
if o.ChatTemplate != "" {
|
|
||||||
args = append(args, "--chat-template", o.ChatTemplate)
|
|
||||||
}
|
|
||||||
if o.UseDefaultChatTemplate {
|
|
||||||
args = append(args, "--use-default-chat-template")
|
|
||||||
}
|
|
||||||
if o.ChatTemplateArgs != "" {
|
|
||||||
args = append(args, "--chat-template-args", o.ChatTemplateArgs)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sampling defaults
|
|
||||||
if o.Temp != 0 {
|
|
||||||
args = append(args, "--temp", strconv.FormatFloat(o.Temp, 'f', -1, 64))
|
|
||||||
}
|
|
||||||
if o.TopP != 0 {
|
|
||||||
args = append(args, "--top-p", strconv.FormatFloat(o.TopP, 'f', -1, 64))
|
|
||||||
}
|
|
||||||
if o.TopK != 0 {
|
|
||||||
args = append(args, "--top-k", strconv.Itoa(o.TopK))
|
|
||||||
}
|
|
||||||
if o.MinP != 0 {
|
|
||||||
args = append(args, "--min-p", strconv.FormatFloat(o.MinP, 'f', -1, 64))
|
|
||||||
}
|
|
||||||
if o.MaxTokens != 0 {
|
|
||||||
args = append(args, "--max-tokens", strconv.Itoa(o.MaxTokens))
|
|
||||||
}
|
|
||||||
|
|
||||||
return args
|
|
||||||
}
|
}
|
||||||
157
pkg/backends/mlx/mlx_test.go
Normal file
157
pkg/backends/mlx/mlx_test.go
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
package mlx_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"llamactl/pkg/backends/mlx"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseMlxCommand(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
command string
|
||||||
|
expectErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "basic command",
|
||||||
|
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "args only",
|
||||||
|
command: "--model /path/to/model --port 8080",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mixed flag formats",
|
||||||
|
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "quoted strings",
|
||||||
|
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty command",
|
||||||
|
command: "",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unterminated quote",
|
||||||
|
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "malformed flag",
|
||||||
|
command: "mlx_lm.server ---model test.mlx",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := mlx.ParseMlxCommand(tt.command)
|
||||||
|
|
||||||
|
if tt.expectErr {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("expected error but got none")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if result == nil {
|
||||||
|
t.Errorf("expected result but got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseMlxCommandValues(t *testing.T) {
|
||||||
|
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
|
||||||
|
result, err := mlx.ParseMlxCommand(command)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Model != "/test/model.mlx" {
|
||||||
|
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Port != 8080 {
|
||||||
|
t.Errorf("expected port 8080, got %d", result.Port)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Temp != 0.7 {
|
||||||
|
t.Errorf("expected temp 0.7, got %f", result.Temp)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !result.TrustRemoteCode {
|
||||||
|
t.Errorf("expected trust_remote_code to be true")
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.LogLevel != "DEBUG" {
|
||||||
|
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildCommandArgs(t *testing.T) {
|
||||||
|
options := &mlx.MlxServerOptions{
|
||||||
|
Model: "/test/model.mlx",
|
||||||
|
Host: "127.0.0.1",
|
||||||
|
Port: 8080,
|
||||||
|
Temp: 0.7,
|
||||||
|
TopP: 0.9,
|
||||||
|
TopK: 40,
|
||||||
|
MaxTokens: 2048,
|
||||||
|
TrustRemoteCode: true,
|
||||||
|
LogLevel: "DEBUG",
|
||||||
|
ChatTemplate: "custom template",
|
||||||
|
}
|
||||||
|
|
||||||
|
args := options.BuildCommandArgs()
|
||||||
|
|
||||||
|
// Check that all expected flags are present
|
||||||
|
expectedFlags := map[string]string{
|
||||||
|
"--model": "/test/model.mlx",
|
||||||
|
"--host": "127.0.0.1",
|
||||||
|
"--port": "8080",
|
||||||
|
"--log-level": "DEBUG",
|
||||||
|
"--chat-template": "custom template",
|
||||||
|
"--temp": "0.7",
|
||||||
|
"--top-p": "0.9",
|
||||||
|
"--top-k": "40",
|
||||||
|
"--max-tokens": "2048",
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(args); i++ {
|
||||||
|
if args[i] == "--trust-remote-code" {
|
||||||
|
continue // Boolean flag with no value
|
||||||
|
}
|
||||||
|
if args[i] == "--use-default-chat-template" {
|
||||||
|
continue // Boolean flag with no value
|
||||||
|
}
|
||||||
|
|
||||||
|
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
|
||||||
|
if args[i+1] != expectedValue {
|
||||||
|
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check boolean flags
|
||||||
|
foundTrustRemoteCode := false
|
||||||
|
for _, arg := range args {
|
||||||
|
if arg == "--trust-remote-code" {
|
||||||
|
foundTrustRemoteCode = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundTrustRemoteCode {
|
||||||
|
t.Errorf("expected --trust-remote-code flag to be present")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,254 +0,0 @@
|
|||||||
package mlx
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
|
|
||||||
// Supports multiple formats:
|
|
||||||
// 1. Full command: "mlx_lm.server --model model/path"
|
|
||||||
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
|
|
||||||
// 3. Args only: "--model model/path --host 0.0.0.0"
|
|
||||||
// 4. Multiline commands with backslashes
|
|
||||||
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
|
|
||||||
// 1. Normalize the command - handle multiline with backslashes
|
|
||||||
trimmed := normalizeMultilineCommand(command)
|
|
||||||
if trimmed == "" {
|
|
||||||
return nil, fmt.Errorf("command cannot be empty")
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Extract arguments from command
|
|
||||||
args, err := extractArgumentsFromCommand(trimmed)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Parse arguments into map
|
|
||||||
options := make(map[string]any)
|
|
||||||
|
|
||||||
i := 0
|
|
||||||
for i < len(args) {
|
|
||||||
arg := args[i]
|
|
||||||
|
|
||||||
if !strings.HasPrefix(arg, "-") { // skip positional / stray values
|
|
||||||
i++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
|
|
||||||
if strings.HasPrefix(arg, "---") {
|
|
||||||
return nil, fmt.Errorf("malformed flag: %s", arg)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unified parsing for --flag=value vs --flag value
|
|
||||||
var rawFlag, rawValue string
|
|
||||||
hasEquals := false
|
|
||||||
if strings.Contains(arg, "=") {
|
|
||||||
parts := strings.SplitN(arg, "=", 2)
|
|
||||||
rawFlag = parts[0]
|
|
||||||
rawValue = parts[1] // may be empty string
|
|
||||||
hasEquals = true
|
|
||||||
} else {
|
|
||||||
rawFlag = arg
|
|
||||||
}
|
|
||||||
|
|
||||||
flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
|
|
||||||
flagName := strings.ReplaceAll(flagCore, "-", "_")
|
|
||||||
|
|
||||||
// Detect value if not in equals form
|
|
||||||
valueProvided := hasEquals
|
|
||||||
if !hasEquals {
|
|
||||||
if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
|
|
||||||
rawValue = args[i+1]
|
|
||||||
valueProvided = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if valueProvided {
|
|
||||||
// MLX-specific validation for certain flags
|
|
||||||
if flagName == "log_level" && !isValidLogLevel(rawValue) {
|
|
||||||
return nil, fmt.Errorf("invalid log level: %s", rawValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
options[flagName] = parseValue(rawValue)
|
|
||||||
|
|
||||||
// Advance index: if we consumed a following token as value (non equals form), skip it
|
|
||||||
if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
|
|
||||||
i += 2
|
|
||||||
} else {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Boolean flag (no value) - MLX specific boolean flags
|
|
||||||
if flagName == "trust_remote_code" || flagName == "use_default_chat_template" {
|
|
||||||
options[flagName] = true
|
|
||||||
} else {
|
|
||||||
options[flagName] = true
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Convert to MlxServerOptions using existing UnmarshalJSON
|
|
||||||
jsonData, err := json.Marshal(options)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var mlxOptions MlxServerOptions
|
|
||||||
if err := json.Unmarshal(jsonData, &mlxOptions); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to parse command options: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Return MlxServerOptions
|
|
||||||
return &mlxOptions, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// isValidLogLevel validates MLX log levels
|
|
||||||
func isValidLogLevel(level string) bool {
|
|
||||||
validLevels := []string{"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
|
|
||||||
for _, valid := range validLevels {
|
|
||||||
if level == valid {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseValue attempts to parse a string value into the most appropriate type
|
|
||||||
func parseValue(value string) any {
|
|
||||||
// Surrounding matching quotes (single or double)
|
|
||||||
if l := len(value); l >= 2 {
|
|
||||||
if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
|
|
||||||
value = value[1 : l-1]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
lower := strings.ToLower(value)
|
|
||||||
if lower == "true" {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if lower == "false" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if intVal, err := strconv.Atoi(value); err == nil {
|
|
||||||
return intVal
|
|
||||||
}
|
|
||||||
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
|
|
||||||
return floatVal
|
|
||||||
}
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
|
|
||||||
// normalizeMultilineCommand handles multiline commands with backslashes
|
|
||||||
func normalizeMultilineCommand(command string) string {
|
|
||||||
// Handle escaped newlines (backslash followed by newline)
|
|
||||||
re := regexp.MustCompile(`\\\s*\n\s*`)
|
|
||||||
normalized := re.ReplaceAllString(command, " ")
|
|
||||||
|
|
||||||
// Clean up extra whitespace
|
|
||||||
re = regexp.MustCompile(`\s+`)
|
|
||||||
normalized = re.ReplaceAllString(normalized, " ")
|
|
||||||
|
|
||||||
return strings.TrimSpace(normalized)
|
|
||||||
}
|
|
||||||
|
|
||||||
// extractArgumentsFromCommand extracts arguments from various command formats
|
|
||||||
func extractArgumentsFromCommand(command string) ([]string, error) {
|
|
||||||
// Split command into tokens respecting quotes
|
|
||||||
tokens, err := splitCommandTokens(command)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(tokens) == 0 {
|
|
||||||
return nil, fmt.Errorf("no command tokens found")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if first token looks like an executable
|
|
||||||
firstToken := tokens[0]
|
|
||||||
|
|
||||||
// Case 1: Full path to executable (contains path separator or ends with mlx_lm.server)
|
|
||||||
if strings.Contains(firstToken, string(filepath.Separator)) ||
|
|
||||||
strings.HasSuffix(filepath.Base(firstToken), "mlx_lm.server") {
|
|
||||||
return tokens[1:], nil // Return everything except the executable
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 2: Just "mlx_lm.server" command
|
|
||||||
if strings.ToLower(firstToken) == "mlx_lm.server" {
|
|
||||||
return tokens[1:], nil // Return everything except the command
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 3: Arguments only (starts with a flag)
|
|
||||||
if strings.HasPrefix(firstToken, "-") {
|
|
||||||
return tokens, nil // Return all tokens as arguments
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 4: Unknown format - might be a different executable name
|
|
||||||
// Be permissive and assume it's the executable
|
|
||||||
return tokens[1:], nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// splitCommandTokens splits a command string into tokens, respecting quotes
|
|
||||||
func splitCommandTokens(command string) ([]string, error) {
|
|
||||||
var tokens []string
|
|
||||||
var current strings.Builder
|
|
||||||
inQuotes := false
|
|
||||||
quoteChar := byte(0)
|
|
||||||
escaped := false
|
|
||||||
|
|
||||||
for i := 0; i < len(command); i++ {
|
|
||||||
c := command[i]
|
|
||||||
|
|
||||||
if escaped {
|
|
||||||
current.WriteByte(c)
|
|
||||||
escaped = false
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if c == '\\' {
|
|
||||||
escaped = true
|
|
||||||
current.WriteByte(c)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inQuotes && (c == '"' || c == '\'') {
|
|
||||||
inQuotes = true
|
|
||||||
quoteChar = c
|
|
||||||
current.WriteByte(c)
|
|
||||||
} else if inQuotes && c == quoteChar {
|
|
||||||
inQuotes = false
|
|
||||||
quoteChar = 0
|
|
||||||
current.WriteByte(c)
|
|
||||||
} else if !inQuotes && (c == ' ' || c == '\t' || c == '\n') {
|
|
||||||
if current.Len() > 0 {
|
|
||||||
tokens = append(tokens, current.String())
|
|
||||||
current.Reset()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
current.WriteByte(c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if inQuotes {
|
|
||||||
return nil, fmt.Errorf("unclosed quote in command")
|
|
||||||
}
|
|
||||||
|
|
||||||
if current.Len() > 0 {
|
|
||||||
tokens = append(tokens, current.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
return tokens, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// isFlag checks if a string looks like a command line flag
|
|
||||||
func isFlag(s string) bool {
|
|
||||||
return strings.HasPrefix(s, "-")
|
|
||||||
}
|
|
||||||
213
pkg/backends/parser.go
Normal file
213
pkg/backends/parser.go
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
package backends
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseCommand parses a command string into a target struct
|
||||||
|
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
|
||||||
|
// Normalize multiline commands
|
||||||
|
command = normalizeCommand(command)
|
||||||
|
if command == "" {
|
||||||
|
return fmt.Errorf("command cannot be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract arguments and positional model
|
||||||
|
args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse flags into map
|
||||||
|
options, err := parseFlags(args, multiValuedFlags)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we found a positional model and no --model flag was provided, set the model
|
||||||
|
if modelFromPositional != "" {
|
||||||
|
if _, hasModelFlag := options["model"]; !hasModelFlag {
|
||||||
|
options["model"] = modelFromPositional
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to target struct via JSON
|
||||||
|
jsonData, err := json.Marshal(options)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal options: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.Unmarshal(jsonData, target); err != nil {
|
||||||
|
return fmt.Errorf("failed to unmarshal to target: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeCommand handles multiline commands with backslashes
|
||||||
|
func normalizeCommand(command string) string {
|
||||||
|
re := regexp.MustCompile(`\\\s*\n\s*`)
|
||||||
|
normalized := re.ReplaceAllString(command, " ")
|
||||||
|
re = regexp.MustCompile(`\s+`)
|
||||||
|
return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractArgs extracts arguments from command, removing executable and subcommands
|
||||||
|
// Returns: args, modelFromPositional, error
|
||||||
|
func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
|
||||||
|
// Check for unterminated quotes
|
||||||
|
if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
|
||||||
|
return nil, "", fmt.Errorf("unterminated quoted string")
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens := strings.Fields(command)
|
||||||
|
if len(tokens) == 0 {
|
||||||
|
return nil, "", fmt.Errorf("no tokens found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip executable
|
||||||
|
start := 0
|
||||||
|
firstToken := tokens[0]
|
||||||
|
|
||||||
|
// Check for executable name (with or without path)
|
||||||
|
if strings.Contains(firstToken, string(filepath.Separator)) {
|
||||||
|
baseName := filepath.Base(firstToken)
|
||||||
|
for _, execName := range executableNames {
|
||||||
|
if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
|
||||||
|
start = 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for _, execName := range executableNames {
|
||||||
|
if strings.EqualFold(firstToken, execName) {
|
||||||
|
start = 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip subcommand if present
|
||||||
|
if start < len(tokens) {
|
||||||
|
for _, subCmd := range subcommandNames {
|
||||||
|
if strings.EqualFold(tokens[start], subCmd) {
|
||||||
|
start++
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle case where command starts with subcommand (no executable)
|
||||||
|
if start == 0 {
|
||||||
|
for _, subCmd := range subcommandNames {
|
||||||
|
if strings.EqualFold(firstToken, subCmd) {
|
||||||
|
start = 1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
args := tokens[start:]
|
||||||
|
|
||||||
|
// Extract first positional argument (model) if present and not a flag
|
||||||
|
var modelFromPositional string
|
||||||
|
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
|
||||||
|
modelFromPositional = args[0]
|
||||||
|
args = args[1:] // Remove the model from args to process remaining flags
|
||||||
|
}
|
||||||
|
|
||||||
|
return args, modelFromPositional, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseFlags parses command line flags into a map
|
||||||
|
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
|
||||||
|
options := make(map[string]any)
|
||||||
|
|
||||||
|
for i := 0; i < len(args); i++ {
|
||||||
|
arg := args[i]
|
||||||
|
|
||||||
|
if !strings.HasPrefix(arg, "-") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for malformed flags (more than two leading dashes)
|
||||||
|
if strings.HasPrefix(arg, "---") {
|
||||||
|
return nil, fmt.Errorf("malformed flag: %s", arg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get flag name and value
|
||||||
|
var flagName, value string
|
||||||
|
var hasValue bool
|
||||||
|
|
||||||
|
if strings.Contains(arg, "=") {
|
||||||
|
parts := strings.SplitN(arg, "=", 2)
|
||||||
|
flagName = strings.TrimLeft(parts[0], "-")
|
||||||
|
value = parts[1]
|
||||||
|
hasValue = true
|
||||||
|
} else {
|
||||||
|
flagName = strings.TrimLeft(arg, "-")
|
||||||
|
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
|
||||||
|
value = args[i+1]
|
||||||
|
hasValue = true
|
||||||
|
i++ // Skip next arg since we consumed it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert kebab-case to snake_case for JSON
|
||||||
|
flagName = strings.ReplaceAll(flagName, "-", "_")
|
||||||
|
|
||||||
|
if hasValue {
|
||||||
|
// Handle multi-valued flags
|
||||||
|
if multiValuedFlags[flagName] {
|
||||||
|
if existing, ok := options[flagName].([]string); ok {
|
||||||
|
options[flagName] = append(existing, value)
|
||||||
|
} else {
|
||||||
|
options[flagName] = []string{value}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
options[flagName] = parseValue(value)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Boolean flag
|
||||||
|
options[flagName] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return options, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseValue converts string to appropriate type
|
||||||
|
func parseValue(value string) any {
|
||||||
|
// Remove quotes
|
||||||
|
if len(value) >= 2 {
|
||||||
|
if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
|
||||||
|
value = value[1 : len(value)-1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try boolean
|
||||||
|
switch strings.ToLower(value) {
|
||||||
|
case "true":
|
||||||
|
return true
|
||||||
|
case "false":
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try integer
|
||||||
|
if intVal, err := strconv.Atoi(value); err == nil {
|
||||||
|
return intVal
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try float
|
||||||
|
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
|
||||||
|
return floatVal
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return as string
|
||||||
|
return value
|
||||||
|
}
|
||||||
189
pkg/backends/vllm/vllm.go
Normal file
189
pkg/backends/vllm/vllm.go
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
package vllm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"llamactl/pkg/backends"
|
||||||
|
)
|
||||||
|
|
||||||
|
type VllmServerOptions struct {
|
||||||
|
// Basic connection options (auto-assigned by llamactl)
|
||||||
|
Host string `json:"host,omitempty"`
|
||||||
|
Port int `json:"port,omitempty"`
|
||||||
|
|
||||||
|
// Model and engine configuration
|
||||||
|
Model string `json:"model,omitempty"`
|
||||||
|
Tokenizer string `json:"tokenizer,omitempty"`
|
||||||
|
SkipTokenizerInit bool `json:"skip_tokenizer_init,omitempty"`
|
||||||
|
Revision string `json:"revision,omitempty"`
|
||||||
|
CodeRevision string `json:"code_revision,omitempty"`
|
||||||
|
TokenizerRevision string `json:"tokenizer_revision,omitempty"`
|
||||||
|
TokenizerMode string `json:"tokenizer_mode,omitempty"`
|
||||||
|
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
|
||||||
|
DownloadDir string `json:"download_dir,omitempty"`
|
||||||
|
LoadFormat string `json:"load_format,omitempty"`
|
||||||
|
ConfigFormat string `json:"config_format,omitempty"`
|
||||||
|
Dtype string `json:"dtype,omitempty"`
|
||||||
|
KVCacheDtype string `json:"kv_cache_dtype,omitempty"`
|
||||||
|
QuantizationParamPath string `json:"quantization_param_path,omitempty"`
|
||||||
|
Seed int `json:"seed,omitempty"`
|
||||||
|
MaxModelLen int `json:"max_model_len,omitempty"`
|
||||||
|
GuidedDecodingBackend string `json:"guided_decoding_backend,omitempty"`
|
||||||
|
DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
|
||||||
|
WorkerUseRay bool `json:"worker_use_ray,omitempty"`
|
||||||
|
RayWorkersUseNSight bool `json:"ray_workers_use_nsight,omitempty"`
|
||||||
|
|
||||||
|
// Performance and serving configuration
|
||||||
|
BlockSize int `json:"block_size,omitempty"`
|
||||||
|
EnablePrefixCaching bool `json:"enable_prefix_caching,omitempty"`
|
||||||
|
DisableSlidingWindow bool `json:"disable_sliding_window,omitempty"`
|
||||||
|
UseV2BlockManager bool `json:"use_v2_block_manager,omitempty"`
|
||||||
|
NumLookaheadSlots int `json:"num_lookahead_slots,omitempty"`
|
||||||
|
SwapSpace int `json:"swap_space,omitempty"`
|
||||||
|
CPUOffloadGB int `json:"cpu_offload_gb,omitempty"`
|
||||||
|
GPUMemoryUtilization float64 `json:"gpu_memory_utilization,omitempty"`
|
||||||
|
NumGPUBlocksOverride int `json:"num_gpu_blocks_override,omitempty"`
|
||||||
|
MaxNumBatchedTokens int `json:"max_num_batched_tokens,omitempty"`
|
||||||
|
MaxNumSeqs int `json:"max_num_seqs,omitempty"`
|
||||||
|
MaxLogprobs int `json:"max_logprobs,omitempty"`
|
||||||
|
DisableLogStats bool `json:"disable_log_stats,omitempty"`
|
||||||
|
Quantization string `json:"quantization,omitempty"`
|
||||||
|
RopeScaling string `json:"rope_scaling,omitempty"`
|
||||||
|
RopeTheta float64 `json:"rope_theta,omitempty"`
|
||||||
|
EnforceEager bool `json:"enforce_eager,omitempty"`
|
||||||
|
MaxContextLenToCapture int `json:"max_context_len_to_capture,omitempty"`
|
||||||
|
MaxSeqLenToCapture int `json:"max_seq_len_to_capture,omitempty"`
|
||||||
|
DisableCustomAllReduce bool `json:"disable_custom_all_reduce,omitempty"`
|
||||||
|
TokenizerPoolSize int `json:"tokenizer_pool_size,omitempty"`
|
||||||
|
TokenizerPoolType string `json:"tokenizer_pool_type,omitempty"`
|
||||||
|
TokenizerPoolExtraConfig string `json:"tokenizer_pool_extra_config,omitempty"`
|
||||||
|
EnableLoraBias bool `json:"enable_lora_bias,omitempty"`
|
||||||
|
LoraExtraVocabSize int `json:"lora_extra_vocab_size,omitempty"`
|
||||||
|
LoraRank int `json:"lora_rank,omitempty"`
|
||||||
|
PromptLookbackDistance int `json:"prompt_lookback_distance,omitempty"`
|
||||||
|
PreemptionMode string `json:"preemption_mode,omitempty"`
|
||||||
|
|
||||||
|
// Distributed and parallel processing
|
||||||
|
TensorParallelSize int `json:"tensor_parallel_size,omitempty"`
|
||||||
|
PipelineParallelSize int `json:"pipeline_parallel_size,omitempty"`
|
||||||
|
MaxParallelLoadingWorkers int `json:"max_parallel_loading_workers,omitempty"`
|
||||||
|
DisableAsyncOutputProc bool `json:"disable_async_output_proc,omitempty"`
|
||||||
|
WorkerClass string `json:"worker_class,omitempty"`
|
||||||
|
EnabledLoraModules string `json:"enabled_lora_modules,omitempty"`
|
||||||
|
MaxLoraRank int `json:"max_lora_rank,omitempty"`
|
||||||
|
FullyShardedLoras bool `json:"fully_sharded_loras,omitempty"`
|
||||||
|
LoraModules string `json:"lora_modules,omitempty"`
|
||||||
|
PromptAdapters string `json:"prompt_adapters,omitempty"`
|
||||||
|
MaxPromptAdapterToken int `json:"max_prompt_adapter_token,omitempty"`
|
||||||
|
Device string `json:"device,omitempty"`
|
||||||
|
SchedulerDelay float64 `json:"scheduler_delay,omitempty"`
|
||||||
|
EnableChunkedPrefill bool `json:"enable_chunked_prefill,omitempty"`
|
||||||
|
SpeculativeModel string `json:"speculative_model,omitempty"`
|
||||||
|
SpeculativeModelQuantization string `json:"speculative_model_quantization,omitempty"`
|
||||||
|
SpeculativeRevision string `json:"speculative_revision,omitempty"`
|
||||||
|
SpeculativeMaxModelLen int `json:"speculative_max_model_len,omitempty"`
|
||||||
|
SpeculativeDisableByBatchSize int `json:"speculative_disable_by_batch_size,omitempty"`
|
||||||
|
NgptSpeculativeLength int `json:"ngpt_speculative_length,omitempty"`
|
||||||
|
SpeculativeDisableMqa bool `json:"speculative_disable_mqa,omitempty"`
|
||||||
|
ModelLoaderExtraConfig string `json:"model_loader_extra_config,omitempty"`
|
||||||
|
IgnorePatterns string `json:"ignore_patterns,omitempty"`
|
||||||
|
PreloadedLoraModules string `json:"preloaded_lora_modules,omitempty"`
|
||||||
|
|
||||||
|
// OpenAI server specific options
|
||||||
|
UDS string `json:"uds,omitempty"`
|
||||||
|
UvicornLogLevel string `json:"uvicorn_log_level,omitempty"`
|
||||||
|
ResponseRole string `json:"response_role,omitempty"`
|
||||||
|
SSLKeyfile string `json:"ssl_keyfile,omitempty"`
|
||||||
|
SSLCertfile string `json:"ssl_certfile,omitempty"`
|
||||||
|
SSLCACerts string `json:"ssl_ca_certs,omitempty"`
|
||||||
|
SSLCertReqs int `json:"ssl_cert_reqs,omitempty"`
|
||||||
|
RootPath string `json:"root_path,omitempty"`
|
||||||
|
Middleware []string `json:"middleware,omitempty"`
|
||||||
|
ReturnTokensAsTokenIDS bool `json:"return_tokens_as_token_ids,omitempty"`
|
||||||
|
DisableFrontendMultiprocessing bool `json:"disable_frontend_multiprocessing,omitempty"`
|
||||||
|
EnableAutoToolChoice bool `json:"enable_auto_tool_choice,omitempty"`
|
||||||
|
ToolCallParser string `json:"tool_call_parser,omitempty"`
|
||||||
|
ToolServer string `json:"tool_server,omitempty"`
|
||||||
|
ChatTemplate string `json:"chat_template,omitempty"`
|
||||||
|
ChatTemplateContentFormat string `json:"chat_template_content_format,omitempty"`
|
||||||
|
AllowCredentials bool `json:"allow_credentials,omitempty"`
|
||||||
|
AllowedOrigins []string `json:"allowed_origins,omitempty"`
|
||||||
|
AllowedMethods []string `json:"allowed_methods,omitempty"`
|
||||||
|
AllowedHeaders []string `json:"allowed_headers,omitempty"`
|
||||||
|
APIKey []string `json:"api_key,omitempty"`
|
||||||
|
EnableLogOutputs bool `json:"enable_log_outputs,omitempty"`
|
||||||
|
EnableTokenUsage bool `json:"enable_token_usage,omitempty"`
|
||||||
|
EnableAsyncEngineDebug bool `json:"enable_async_engine_debug,omitempty"`
|
||||||
|
EngineUseRay bool `json:"engine_use_ray,omitempty"`
|
||||||
|
DisableLogRequests bool `json:"disable_log_requests,omitempty"`
|
||||||
|
MaxLogLen int `json:"max_log_len,omitempty"`
|
||||||
|
|
||||||
|
// Additional engine configuration
|
||||||
|
Task string `json:"task,omitempty"`
|
||||||
|
MultiModalConfig string `json:"multi_modal_config,omitempty"`
|
||||||
|
LimitMmPerPrompt string `json:"limit_mm_per_prompt,omitempty"`
|
||||||
|
EnableSleepMode bool `json:"enable_sleep_mode,omitempty"`
|
||||||
|
EnableChunkingRequest bool `json:"enable_chunking_request,omitempty"`
|
||||||
|
CompilationConfig string `json:"compilation_config,omitempty"`
|
||||||
|
DisableSlidingWindowMask bool `json:"disable_sliding_window_mask,omitempty"`
|
||||||
|
EnableTRTLLMEngineLatency bool `json:"enable_trtllm_engine_latency,omitempty"`
|
||||||
|
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
|
||||||
|
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
|
||||||
|
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// BuildCommandArgs converts VllmServerOptions to command line arguments
|
||||||
|
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
|
||||||
|
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
|
||||||
|
func (o *VllmServerOptions) BuildCommandArgs() []string {
|
||||||
|
var args []string
|
||||||
|
|
||||||
|
// Add model as positional argument if specified
|
||||||
|
if o.Model != "" {
|
||||||
|
args = append(args, o.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a copy of the options without the Model field to avoid including it as --model flag
|
||||||
|
optionsCopy := *o
|
||||||
|
optionsCopy.Model = "" // Clear model field so it won't be included as a flag
|
||||||
|
|
||||||
|
multipleFlags := map[string]bool{
|
||||||
|
"api-key": true,
|
||||||
|
"allowed-origins": true,
|
||||||
|
"allowed-methods": true,
|
||||||
|
"allowed-headers": true,
|
||||||
|
"middleware": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the rest of the arguments as flags
|
||||||
|
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
|
||||||
|
args = append(args, flagArgs...)
|
||||||
|
|
||||||
|
return args
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
|
||||||
|
// Supports multiple formats:
|
||||||
|
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
|
||||||
|
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
|
||||||
|
// 3. Serve only: "serve --model MODEL_NAME --other-args"
|
||||||
|
// 4. Args only: "--model MODEL_NAME --other-args"
|
||||||
|
// 5. Multiline commands with backslashes
|
||||||
|
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
|
||||||
|
executableNames := []string{"vllm"}
|
||||||
|
subcommandNames := []string{"serve"}
|
||||||
|
multiValuedFlags := map[string]bool{
|
||||||
|
"middleware": true,
|
||||||
|
"api_key": true,
|
||||||
|
"allowed_origins": true,
|
||||||
|
"allowed_methods": true,
|
||||||
|
"allowed_headers": true,
|
||||||
|
"lora_modules": true,
|
||||||
|
"prompt_adapters": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
var vllmOptions VllmServerOptions
|
||||||
|
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &vllmOptions, nil
|
||||||
|
}
|
||||||
153
pkg/backends/vllm/vllm_test.go
Normal file
153
pkg/backends/vllm/vllm_test.go
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
package vllm_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"llamactl/pkg/backends/vllm"
|
||||||
|
"slices"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseVllmCommand(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
command string
|
||||||
|
expectErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "basic vllm serve command",
|
||||||
|
command: "vllm serve microsoft/DialoGPT-medium",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "serve only command",
|
||||||
|
command: "serve microsoft/DialoGPT-medium",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "positional model with flags",
|
||||||
|
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "model with path",
|
||||||
|
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
|
||||||
|
expectErr: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty command",
|
||||||
|
command: "",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "unterminated quote",
|
||||||
|
command: `vllm serve "unterminated`,
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := vllm.ParseVllmCommand(tt.command)
|
||||||
|
|
||||||
|
if tt.expectErr {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("expected error but got none")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if result == nil {
|
||||||
|
t.Errorf("expected result but got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseVllmCommandValues(t *testing.T) {
|
||||||
|
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
|
||||||
|
result, err := vllm.ParseVllmCommand(command)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if result.Model != "test-model" {
|
||||||
|
t.Errorf("expected model 'test-model', got '%s'", result.Model)
|
||||||
|
}
|
||||||
|
if result.TensorParallelSize != 4 {
|
||||||
|
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
|
||||||
|
}
|
||||||
|
if result.GPUMemoryUtilization != 0.8 {
|
||||||
|
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
|
||||||
|
}
|
||||||
|
if !result.EnableLogOutputs {
|
||||||
|
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildCommandArgs(t *testing.T) {
|
||||||
|
options := vllm.VllmServerOptions{
|
||||||
|
Model: "microsoft/DialoGPT-medium",
|
||||||
|
Port: 8080,
|
||||||
|
Host: "localhost",
|
||||||
|
TensorParallelSize: 2,
|
||||||
|
GPUMemoryUtilization: 0.8,
|
||||||
|
EnableLogOutputs: true,
|
||||||
|
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
|
||||||
|
}
|
||||||
|
|
||||||
|
args := options.BuildCommandArgs()
|
||||||
|
|
||||||
|
// Check that model is the first positional argument (not a --model flag)
|
||||||
|
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
|
||||||
|
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that --model flag is NOT present (since model should be positional)
|
||||||
|
if contains(args, "--model") {
|
||||||
|
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check other flags
|
||||||
|
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
|
||||||
|
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
|
||||||
|
}
|
||||||
|
if !contains(args, "--enable-log-outputs") {
|
||||||
|
t.Errorf("Expected --enable-log-outputs not found in %v", args)
|
||||||
|
}
|
||||||
|
if !contains(args, "--host") {
|
||||||
|
t.Errorf("Expected --host not found in %v", args)
|
||||||
|
}
|
||||||
|
if !contains(args, "--port") {
|
||||||
|
t.Errorf("Expected --port not found in %v", args)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check array handling (multiple flags)
|
||||||
|
allowedOriginsCount := 0
|
||||||
|
for i := range args {
|
||||||
|
if args[i] == "--allowed-origins" {
|
||||||
|
allowedOriginsCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if allowedOriginsCount != 2 {
|
||||||
|
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions
|
||||||
|
func contains(slice []string, item string) bool {
|
||||||
|
return slices.Contains(slice, item)
|
||||||
|
}
|
||||||
|
|
||||||
|
func containsFlagWithValue(args []string, flag, value string) bool {
|
||||||
|
for i, arg := range args {
|
||||||
|
if arg == flag && i+1 < len(args) && args[i+1] == value {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
@@ -17,6 +17,9 @@ type BackendConfig struct {
|
|||||||
|
|
||||||
// Path to mlx_lm executable (MLX-LM backend)
|
// Path to mlx_lm executable (MLX-LM backend)
|
||||||
MLXLMExecutable string `yaml:"mlx_lm_executable"`
|
MLXLMExecutable string `yaml:"mlx_lm_executable"`
|
||||||
|
|
||||||
|
// Path to vllm executable (vLLM backend)
|
||||||
|
VllmExecutable string `yaml:"vllm_executable"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AppConfig represents the configuration for llamactl
|
// AppConfig represents the configuration for llamactl
|
||||||
@@ -122,6 +125,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
Backends: BackendConfig{
|
Backends: BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaExecutable: "llama-server",
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
MLXLMExecutable: "mlx_lm.server",
|
||||||
|
VllmExecutable: "vllm",
|
||||||
},
|
},
|
||||||
Instances: InstancesConfig{
|
Instances: InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
@@ -246,6 +250,9 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
|
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
|
||||||
cfg.Backends.MLXLMExecutable = mlxLMExec
|
cfg.Backends.MLXLMExecutable = mlxLMExec
|
||||||
}
|
}
|
||||||
|
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
|
||||||
|
cfg.Backends.VllmExecutable = vllmExec
|
||||||
|
}
|
||||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||||
if b, err := strconv.ParseBool(autoRestart); err == nil {
|
if b, err := strconv.ParseBool(autoRestart); err == nil {
|
||||||
cfg.Instances.DefaultAutoRestart = b
|
cfg.Instances.DefaultAutoRestart = b
|
||||||
|
|||||||
@@ -105,6 +105,10 @@ func (i *Process) GetPort() int {
|
|||||||
if i.options.MlxServerOptions != nil {
|
if i.options.MlxServerOptions != nil {
|
||||||
return i.options.MlxServerOptions.Port
|
return i.options.MlxServerOptions.Port
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if i.options.VllmServerOptions != nil {
|
||||||
|
return i.options.VllmServerOptions.Port
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
@@ -123,6 +127,10 @@ func (i *Process) GetHost() string {
|
|||||||
if i.options.MlxServerOptions != nil {
|
if i.options.MlxServerOptions != nil {
|
||||||
return i.options.MlxServerOptions.Host
|
return i.options.MlxServerOptions.Host
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if i.options.VllmServerOptions != nil {
|
||||||
|
return i.options.VllmServerOptions.Host
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
@@ -176,6 +184,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
|||||||
host = i.options.MlxServerOptions.Host
|
host = i.options.MlxServerOptions.Host
|
||||||
port = i.options.MlxServerOptions.Port
|
port = i.options.MlxServerOptions.Port
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if i.options.VllmServerOptions != nil {
|
||||||
|
host = i.options.VllmServerOptions.Host
|
||||||
|
port = i.options.VllmServerOptions.Port
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
|
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
|
||||||
|
|||||||
@@ -52,6 +52,8 @@ func (i *Process) Start() error {
|
|||||||
executable = i.globalBackendSettings.LlamaExecutable
|
executable = i.globalBackendSettings.LlamaExecutable
|
||||||
case backends.BackendTypeMlxLm:
|
case backends.BackendTypeMlxLm:
|
||||||
executable = i.globalBackendSettings.MLXLMExecutable
|
executable = i.globalBackendSettings.MLXLMExecutable
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
executable = i.globalBackendSettings.VllmExecutable
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
|
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
|
||||||
}
|
}
|
||||||
@@ -200,6 +202,11 @@ func (i *Process) WaitForHealthy(timeout int) error {
|
|||||||
host = opts.MlxServerOptions.Host
|
host = opts.MlxServerOptions.Host
|
||||||
port = opts.MlxServerOptions.Port
|
port = opts.MlxServerOptions.Port
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if opts.VllmServerOptions != nil {
|
||||||
|
host = opts.VllmServerOptions.Host
|
||||||
|
port = opts.VllmServerOptions.Port
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if host == "" {
|
if host == "" {
|
||||||
host = "localhost"
|
host = "localhost"
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"llamactl/pkg/backends"
|
"llamactl/pkg/backends"
|
||||||
"llamactl/pkg/backends/llamacpp"
|
"llamactl/pkg/backends/llamacpp"
|
||||||
"llamactl/pkg/backends/mlx"
|
"llamactl/pkg/backends/mlx"
|
||||||
|
"llamactl/pkg/backends/vllm"
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"log"
|
"log"
|
||||||
)
|
)
|
||||||
@@ -26,6 +27,7 @@ type CreateInstanceOptions struct {
|
|||||||
// Backend-specific options
|
// Backend-specific options
|
||||||
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
|
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
|
||||||
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
|
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
|
||||||
|
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
|
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
|
||||||
@@ -69,6 +71,18 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
|||||||
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
|
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if c.BackendOptions != nil {
|
||||||
|
optionsData, err := json.Marshal(c.BackendOptions)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to marshal backend options: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
c.VllmServerOptions = &vllm.VllmServerOptions{}
|
||||||
|
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
|
||||||
|
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unknown backend type: %s", c.BackendType)
|
return fmt.Errorf("unknown backend type: %s", c.BackendType)
|
||||||
}
|
}
|
||||||
@@ -114,6 +128,20 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
|
|||||||
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
|
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
aux.BackendOptions = backendOpts
|
||||||
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if c.VllmServerOptions != nil {
|
||||||
|
data, err := json.Marshal(c.VllmServerOptions)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var backendOpts map[string]any
|
||||||
|
if err := json.Unmarshal(data, &backendOpts); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
aux.BackendOptions = backendOpts
|
aux.BackendOptions = backendOpts
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -171,6 +199,13 @@ func (c *CreateInstanceOptions) BuildCommandArgs() []string {
|
|||||||
if c.MlxServerOptions != nil {
|
if c.MlxServerOptions != nil {
|
||||||
return c.MlxServerOptions.BuildCommandArgs()
|
return c.MlxServerOptions.BuildCommandArgs()
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if c.VllmServerOptions != nil {
|
||||||
|
// Prepend "serve" as first argument
|
||||||
|
args := []string{"serve"}
|
||||||
|
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
|
||||||
|
return args
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return []string{}
|
return []string{}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -264,6 +264,10 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
|
|||||||
if options.MlxServerOptions != nil {
|
if options.MlxServerOptions != nil {
|
||||||
return options.MlxServerOptions.Port
|
return options.MlxServerOptions.Port
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if options.VllmServerOptions != nil {
|
||||||
|
return options.VllmServerOptions.Port
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
@@ -279,6 +283,10 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
|
|||||||
if options.MlxServerOptions != nil {
|
if options.MlxServerOptions != nil {
|
||||||
options.MlxServerOptions.Port = port
|
options.MlxServerOptions.Port = port
|
||||||
}
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if options.VllmServerOptions != nil {
|
||||||
|
options.VllmServerOptions.Port = port
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"llamactl/pkg/backends"
|
"llamactl/pkg/backends"
|
||||||
"llamactl/pkg/backends/llamacpp"
|
"llamactl/pkg/backends/llamacpp"
|
||||||
"llamactl/pkg/backends/mlx"
|
"llamactl/pkg/backends/mlx"
|
||||||
|
"llamactl/pkg/backends/vllm"
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
"llamactl/pkg/instance"
|
||||||
"llamactl/pkg/manager"
|
"llamactl/pkg/manager"
|
||||||
@@ -739,3 +740,56 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParseVllmCommand godoc
|
||||||
|
// @Summary Parse vllm serve command
|
||||||
|
// @Description Parses a vLLM serve command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Router /backends/vllm/parse-command [post]
|
||||||
|
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
backendType := backends.BackendTypeVllm
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backendType,
|
||||||
|
VllmServerOptions: vllmOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -58,6 +58,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
|||||||
r.Route("/mlx", func(r chi.Router) {
|
r.Route("/mlx", func(r chi.Router) {
|
||||||
r.Post("/parse-command", handler.ParseMlxCommand())
|
r.Post("/parse-command", handler.ParseMlxCommand())
|
||||||
})
|
})
|
||||||
|
r.Route("/vllm", func(r chi.Router) {
|
||||||
|
r.Post("/parse-command", handler.ParseVllmCommand())
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
// Instance management endpoints
|
// Instance management endpoints
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
|
|||||||
return validateLlamaCppOptions(options)
|
return validateLlamaCppOptions(options)
|
||||||
case backends.BackendTypeMlxLm:
|
case backends.BackendTypeMlxLm:
|
||||||
return validateMlxOptions(options)
|
return validateMlxOptions(options)
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
return validateVllmOptions(options)
|
||||||
default:
|
default:
|
||||||
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
|
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
|
||||||
}
|
}
|
||||||
@@ -88,6 +90,25 @@ func validateMlxOptions(options *instance.CreateInstanceOptions) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validateVllmOptions validates vLLM backend specific options
|
||||||
|
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
|
||||||
|
if options.VllmServerOptions == nil {
|
||||||
|
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use reflection to check all string fields for injection patterns
|
||||||
|
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic network validation for port
|
||||||
|
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
|
||||||
|
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// validateStructStrings recursively validates all string fields in a struct
|
// validateStructStrings recursively validates all string fields in a struct
|
||||||
func validateStructStrings(v any, fieldPath string) error {
|
func validateStructStrings(v any, fieldPath string) error {
|
||||||
val := reflect.ValueOf(v)
|
val := reflect.ValueOf(v)
|
||||||
|
|||||||
65
webui/src/components/BackendBadge.tsx
Normal file
65
webui/src/components/BackendBadge.tsx
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import React from "react";
|
||||||
|
import { Badge } from "@/components/ui/badge";
|
||||||
|
import { BackendType, type BackendTypeValue } from "@/types/instance";
|
||||||
|
import { Cpu, Zap, Server } from "lucide-react";
|
||||||
|
|
||||||
|
interface BackendBadgeProps {
|
||||||
|
backend?: BackendTypeValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
||||||
|
if (!backend) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const getIcon = () => {
|
||||||
|
switch (backend) {
|
||||||
|
case BackendType.LLAMA_CPP:
|
||||||
|
return <Cpu className="h-3 w-3" />;
|
||||||
|
case BackendType.MLX_LM:
|
||||||
|
return <Zap className="h-3 w-3" />;
|
||||||
|
case BackendType.VLLM:
|
||||||
|
return <Server className="h-3 w-3" />;
|
||||||
|
default:
|
||||||
|
return <Server className="h-3 w-3" />;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getText = () => {
|
||||||
|
switch (backend) {
|
||||||
|
case BackendType.LLAMA_CPP:
|
||||||
|
return "llama.cpp";
|
||||||
|
case BackendType.MLX_LM:
|
||||||
|
return "MLX";
|
||||||
|
case BackendType.VLLM:
|
||||||
|
return "vLLM";
|
||||||
|
default:
|
||||||
|
return backend;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getVariant = () => {
|
||||||
|
switch (backend) {
|
||||||
|
case BackendType.LLAMA_CPP:
|
||||||
|
return "secondary";
|
||||||
|
case BackendType.MLX_LM:
|
||||||
|
return "outline";
|
||||||
|
case BackendType.VLLM:
|
||||||
|
return "default";
|
||||||
|
default:
|
||||||
|
return "secondary";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Badge
|
||||||
|
variant={getVariant()}
|
||||||
|
className="flex items-center gap-1.5"
|
||||||
|
>
|
||||||
|
{getIcon()}
|
||||||
|
<span className="text-xs">{getText()}</span>
|
||||||
|
</Badge>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default BackendBadge;
|
||||||
@@ -45,7 +45,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
@@ -72,7 +71,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
@@ -99,7 +97,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import type { Instance } from "@/types/instance";
|
|||||||
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
|
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
|
||||||
import LogsDialog from "@/components/LogDialog";
|
import LogsDialog from "@/components/LogDialog";
|
||||||
import HealthBadge from "@/components/HealthBadge";
|
import HealthBadge from "@/components/HealthBadge";
|
||||||
|
import BackendBadge from "@/components/BackendBadge";
|
||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
|
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
|
||||||
|
|
||||||
@@ -58,7 +59,10 @@ function InstanceCard({
|
|||||||
<CardHeader className="pb-3">
|
<CardHeader className="pb-3">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<CardTitle className="text-lg">{instance.name}</CardTitle>
|
<CardTitle className="text-lg">{instance.name}</CardTitle>
|
||||||
|
<div className="flex flex-col items-end gap-2">
|
||||||
{running && <HealthBadge health={health} />}
|
{running && <HealthBadge health={health} />}
|
||||||
|
<BackendBadge backend={instance.options?.backend_type} />
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
|
|
||||||
|
|||||||
@@ -11,11 +11,13 @@ import {
|
|||||||
DialogTitle,
|
DialogTitle,
|
||||||
} from "@/components/ui/dialog";
|
} from "@/components/ui/dialog";
|
||||||
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
|
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
|
||||||
import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
|
import { getAdvancedFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
|
||||||
import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
|
import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
|
||||||
import ZodFormField from "@/components/ZodFormField";
|
|
||||||
import BackendFormField from "@/components/BackendFormField";
|
|
||||||
import ParseCommandDialog from "@/components/ParseCommandDialog";
|
import ParseCommandDialog from "@/components/ParseCommandDialog";
|
||||||
|
import AutoRestartConfiguration from "@/components/instance/AutoRestartConfiguration";
|
||||||
|
import BasicInstanceFields from "@/components/instance/BasicInstanceFields";
|
||||||
|
import BackendConfiguration from "@/components/instance/BackendConfiguration";
|
||||||
|
import AdvancedInstanceFields from "@/components/instance/AdvancedInstanceFields";
|
||||||
|
|
||||||
interface InstanceDialogProps {
|
interface InstanceDialogProps {
|
||||||
open: boolean;
|
open: boolean;
|
||||||
@@ -39,9 +41,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
const [showParseDialog, setShowParseDialog] = useState(false);
|
const [showParseDialog, setShowParseDialog] = useState(false);
|
||||||
|
|
||||||
// Get field lists dynamically from the type
|
// Get field lists dynamically from the type
|
||||||
const basicFields = getBasicFields();
|
|
||||||
const advancedFields = getAdvancedFields();
|
const advancedFields = getAdvancedFields();
|
||||||
const basicBackendFields = getBasicBackendFields(formData.backend_type);
|
|
||||||
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
|
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type);
|
||||||
|
|
||||||
// Reset form when dialog opens/closes or when instance changes
|
// Reset form when dialog opens/closes or when instance changes
|
||||||
@@ -163,8 +163,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
setShowParseDialog(false);
|
setShowParseDialog(false);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check if auto_restart is enabled
|
|
||||||
const isAutoRestartEnabled = formData.auto_restart === true;
|
|
||||||
|
|
||||||
// Save button label logic
|
// Save button label logic
|
||||||
let saveButtonLabel = "Create Instance";
|
let saveButtonLabel = "Create Instance";
|
||||||
@@ -212,70 +210,23 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Auto Restart Configuration Section */}
|
{/* Auto Restart Configuration Section */}
|
||||||
<div className="space-y-4">
|
<AutoRestartConfiguration
|
||||||
<h3 className="text-lg font-medium">
|
formData={formData}
|
||||||
Auto Restart Configuration
|
|
||||||
</h3>
|
|
||||||
|
|
||||||
{/* Auto Restart Toggle */}
|
|
||||||
<ZodFormField
|
|
||||||
fieldKey="auto_restart"
|
|
||||||
value={formData.auto_restart}
|
|
||||||
onChange={handleFieldChange}
|
onChange={handleFieldChange}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
{/* Show restart options only when auto restart is enabled */}
|
{/* Basic Fields */}
|
||||||
{isAutoRestartEnabled && (
|
<BasicInstanceFields
|
||||||
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
|
formData={formData}
|
||||||
<ZodFormField
|
|
||||||
fieldKey="max_restarts"
|
|
||||||
value={formData.max_restarts}
|
|
||||||
onChange={handleFieldChange}
|
onChange={handleFieldChange}
|
||||||
/>
|
/>
|
||||||
<ZodFormField
|
|
||||||
fieldKey="restart_delay"
|
|
||||||
value={formData.restart_delay}
|
|
||||||
onChange={handleFieldChange}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h3 className="text-lg font-medium">Basic Configuration</h3>
|
|
||||||
{basicFields
|
|
||||||
.filter(
|
|
||||||
(fieldKey) =>
|
|
||||||
fieldKey !== "auto_restart" &&
|
|
||||||
fieldKey !== "max_restarts" &&
|
|
||||||
fieldKey !== "restart_delay" &&
|
|
||||||
fieldKey !== "backend_options" // backend_options is handled separately
|
|
||||||
)
|
|
||||||
.map((fieldKey) => (
|
|
||||||
<ZodFormField
|
|
||||||
key={fieldKey}
|
|
||||||
fieldKey={fieldKey}
|
|
||||||
value={formData[fieldKey]}
|
|
||||||
onChange={handleFieldChange}
|
|
||||||
/>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Backend Configuration Section */}
|
{/* Backend Configuration Section */}
|
||||||
<div className="space-y-4">
|
<BackendConfiguration
|
||||||
<h3 className="text-lg font-medium">Backend Configuration</h3>
|
formData={formData}
|
||||||
|
onBackendFieldChange={handleBackendFieldChange}
|
||||||
{/* Basic backend fields */}
|
showAdvanced={showAdvanced}
|
||||||
{basicBackendFields.map((fieldKey) => (
|
|
||||||
<BackendFormField
|
|
||||||
key={fieldKey}
|
|
||||||
fieldKey={fieldKey}
|
|
||||||
value={(formData.backend_options as any)?.[fieldKey]}
|
|
||||||
onChange={handleBackendFieldChange}
|
|
||||||
/>
|
/>
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{/* Advanced Fields Toggle */}
|
{/* Advanced Fields Toggle */}
|
||||||
<div className="border-t pt-4">
|
<div className="border-t pt-4">
|
||||||
@@ -314,54 +265,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Advanced Fields - Automatically generated from type (excluding restart options) */}
|
{/* Advanced Fields */}
|
||||||
{showAdvanced && (
|
{showAdvanced && (
|
||||||
<div className="space-y-4 pl-6 border-l-2 border-muted">
|
<div className="space-y-4 pl-6 border-l-2 border-muted">
|
||||||
{/* Advanced instance fields */}
|
<AdvancedInstanceFields
|
||||||
{advancedFields
|
formData={formData}
|
||||||
.filter(
|
|
||||||
(fieldKey) =>
|
|
||||||
!["max_restarts", "restart_delay", "backend_options"].includes(
|
|
||||||
fieldKey as string
|
|
||||||
)
|
|
||||||
).length > 0 && (
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
|
|
||||||
{advancedFields
|
|
||||||
.filter(
|
|
||||||
(fieldKey) =>
|
|
||||||
!["max_restarts", "restart_delay", "backend_options"].includes(
|
|
||||||
fieldKey as string
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.sort()
|
|
||||||
.map((fieldKey) => (
|
|
||||||
<ZodFormField
|
|
||||||
key={fieldKey}
|
|
||||||
fieldKey={fieldKey}
|
|
||||||
value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
|
|
||||||
onChange={handleFieldChange}
|
onChange={handleFieldChange}
|
||||||
/>
|
/>
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Advanced backend fields */}
|
|
||||||
{advancedBackendFields.length > 0 && (
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
|
|
||||||
{advancedBackendFields
|
|
||||||
.sort()
|
|
||||||
.map((fieldKey) => (
|
|
||||||
<BackendFormField
|
|
||||||
key={fieldKey}
|
|
||||||
fieldKey={fieldKey}
|
|
||||||
value={(formData.backend_options as any)?.[fieldKey]}
|
|
||||||
onChange={handleBackendFieldChange}
|
|
||||||
/>
|
|
||||||
))}
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import {
|
|||||||
DialogHeader,
|
DialogHeader,
|
||||||
DialogTitle,
|
DialogTitle,
|
||||||
} from "@/components/ui/dialog";
|
} from "@/components/ui/dialog";
|
||||||
import { type CreateInstanceOptions } from "@/types/instance";
|
import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
|
||||||
import { backendsApi } from "@/lib/api";
|
import { backendsApi } from "@/lib/api";
|
||||||
import { toast } from "sonner";
|
import { toast } from "sonner";
|
||||||
|
|
||||||
@@ -25,6 +25,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
|
|||||||
onParsed,
|
onParsed,
|
||||||
}) => {
|
}) => {
|
||||||
const [command, setCommand] = useState('');
|
const [command, setCommand] = useState('');
|
||||||
|
const [backendType, setBackendType] = useState<BackendTypeValue>(BackendType.LLAMA_CPP);
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
@@ -38,18 +39,31 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
|
|||||||
setError(null);
|
setError(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const options = await backendsApi.llamaCpp.parseCommand(command);
|
let options: CreateInstanceOptions;
|
||||||
|
|
||||||
|
// Parse based on selected backend type
|
||||||
|
switch (backendType) {
|
||||||
|
case BackendType.LLAMA_CPP:
|
||||||
|
options = await backendsApi.llamaCpp.parseCommand(command);
|
||||||
|
break;
|
||||||
|
case BackendType.MLX_LM:
|
||||||
|
options = await backendsApi.mlx.parseCommand(command);
|
||||||
|
break;
|
||||||
|
case BackendType.VLLM:
|
||||||
|
options = await backendsApi.vllm.parseCommand(command);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error(`Unsupported backend type: ${backendType}`);
|
||||||
|
}
|
||||||
|
|
||||||
onParsed(options);
|
onParsed(options);
|
||||||
onOpenChange(false);
|
onOpenChange(false);
|
||||||
// Reset form
|
|
||||||
setCommand('');
|
setCommand('');
|
||||||
setError(null);
|
setError(null);
|
||||||
// Show success toast
|
|
||||||
toast.success('Command parsed successfully');
|
toast.success('Command parsed successfully');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
|
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
|
||||||
setError(errorMessage);
|
setError(errorMessage);
|
||||||
// Show error toast
|
|
||||||
toast.error('Failed to parse command', {
|
toast.error('Failed to parse command', {
|
||||||
description: errorMessage
|
description: errorMessage
|
||||||
});
|
});
|
||||||
@@ -60,31 +74,55 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
|
|||||||
|
|
||||||
const handleOpenChange = (open: boolean) => {
|
const handleOpenChange = (open: boolean) => {
|
||||||
if (!open) {
|
if (!open) {
|
||||||
// Reset form when closing
|
|
||||||
setCommand('');
|
setCommand('');
|
||||||
|
setBackendType(BackendType.LLAMA_CPP);
|
||||||
setError(null);
|
setError(null);
|
||||||
}
|
}
|
||||||
onOpenChange(open);
|
onOpenChange(open);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const backendPlaceholders: Record<BackendTypeValue, string> = {
|
||||||
|
[BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
|
||||||
|
[BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
|
||||||
|
[BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
|
||||||
|
};
|
||||||
|
|
||||||
|
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
|
||||||
|
return backendPlaceholders[backendType] || "Enter your command here...";
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog open={open} onOpenChange={handleOpenChange}>
|
<Dialog open={open} onOpenChange={handleOpenChange}>
|
||||||
<DialogContent className="sm:max-w-[600px]">
|
<DialogContent className="sm:max-w-[600px]">
|
||||||
<DialogHeader>
|
<DialogHeader>
|
||||||
<DialogTitle>Parse Llama Server Command</DialogTitle>
|
<DialogTitle>Parse Backend Command</DialogTitle>
|
||||||
<DialogDescription>
|
<DialogDescription>
|
||||||
Paste your llama-server command to automatically populate the form fields
|
Select your backend type and paste the command to automatically populate the form fields
|
||||||
</DialogDescription>
|
</DialogDescription>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
|
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
|
<div>
|
||||||
|
<Label htmlFor="backend-type">Backend Type</Label>
|
||||||
|
<select
|
||||||
|
id="backend-type"
|
||||||
|
value={backendType}
|
||||||
|
onChange={(e) => setBackendType(e.target.value as BackendTypeValue)}
|
||||||
|
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
||||||
|
>
|
||||||
|
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
||||||
|
<option value={BackendType.MLX_LM}>MLX LM</option>
|
||||||
|
<option value={BackendType.VLLM}>vLLM</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<Label htmlFor="command">Command</Label>
|
<Label htmlFor="command">Command</Label>
|
||||||
<textarea
|
<textarea
|
||||||
id="command"
|
id="command"
|
||||||
value={command}
|
value={command}
|
||||||
onChange={(e) => setCommand(e.target.value)}
|
onChange={(e) => setCommand(e.target.value)}
|
||||||
placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
|
placeholder={getPlaceholderForBackend(backendType)}
|
||||||
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
|
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<select
|
<select
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
@@ -39,6 +38,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
>
|
>
|
||||||
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
||||||
<option value={BackendType.MLX_LM}>MLX LM</option>
|
<option value={BackendType.MLX_LM}>MLX LM</option>
|
||||||
|
<option value={BackendType.VLLM}>vLLM</option>
|
||||||
</select>
|
</select>
|
||||||
{config.description && (
|
{config.description && (
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
<p className="text-sm text-muted-foreground">{config.description}</p>
|
||||||
@@ -70,7 +70,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
@@ -97,7 +96,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
@@ -124,7 +122,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<div className="grid gap-2">
|
<div className="grid gap-2">
|
||||||
<Label htmlFor={fieldKey}>
|
<Label htmlFor={fieldKey}>
|
||||||
{config.label}
|
{config.label}
|
||||||
{config.required && <span className="text-red-500 ml-1">*</span>}
|
|
||||||
</Label>
|
</Label>
|
||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
|
|||||||
62
webui/src/components/form/ArrayInput.tsx
Normal file
62
webui/src/components/form/ArrayInput.tsx
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import { Input } from '@/components/ui/input'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
|
||||||
|
interface ArrayInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: string[] | undefined
|
||||||
|
onChange: (value: string[] | undefined) => void
|
||||||
|
placeholder?: string
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const ArrayInput: React.FC<ArrayInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
placeholder = "item1, item2, item3",
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
const handleChange = (inputValue: string) => {
|
||||||
|
if (inputValue === '') {
|
||||||
|
onChange(undefined)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const arrayValue = inputValue
|
||||||
|
.split(',')
|
||||||
|
.map(s => s.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
|
||||||
|
onChange(arrayValue.length > 0 ? arrayValue : undefined)
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="grid gap-2">
|
||||||
|
<Label htmlFor={id}>
|
||||||
|
{label}
|
||||||
|
</Label>
|
||||||
|
<Input
|
||||||
|
id={id}
|
||||||
|
type="text"
|
||||||
|
value={Array.isArray(value) ? value.join(', ') : ''}
|
||||||
|
onChange={(e) => handleChange(e.target.value)}
|
||||||
|
placeholder={placeholder}
|
||||||
|
disabled={disabled}
|
||||||
|
className={className}
|
||||||
|
/>
|
||||||
|
{description && (
|
||||||
|
<p className="text-sm text-muted-foreground">{description}</p>
|
||||||
|
)}
|
||||||
|
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ArrayInput
|
||||||
42
webui/src/components/form/CheckboxInput.tsx
Normal file
42
webui/src/components/form/CheckboxInput.tsx
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import { Checkbox } from '@/components/ui/checkbox'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
|
||||||
|
interface CheckboxInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: boolean | undefined
|
||||||
|
onChange: (value: boolean) => void
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const CheckboxInput: React.FC<CheckboxInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
return (
|
||||||
|
<div className={`flex items-center space-x-2 ${className || ''}`}>
|
||||||
|
<Checkbox
|
||||||
|
id={id}
|
||||||
|
checked={value === true}
|
||||||
|
onCheckedChange={(checked) => onChange(!!checked)}
|
||||||
|
disabled={disabled}
|
||||||
|
/>
|
||||||
|
<Label htmlFor={id} className="text-sm font-normal">
|
||||||
|
{label}
|
||||||
|
{description && (
|
||||||
|
<span className="text-muted-foreground ml-1">- {description}</span>
|
||||||
|
)}
|
||||||
|
</Label>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default CheckboxInput
|
||||||
60
webui/src/components/form/NumberInput.tsx
Normal file
60
webui/src/components/form/NumberInput.tsx
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import { Input } from '@/components/ui/input'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
|
||||||
|
interface NumberInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: number | undefined
|
||||||
|
onChange: (value: number | undefined) => void
|
||||||
|
placeholder?: string
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const NumberInput: React.FC<NumberInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
placeholder,
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
const handleChange = (inputValue: string) => {
|
||||||
|
if (inputValue === '') {
|
||||||
|
onChange(undefined)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const numValue = parseFloat(inputValue)
|
||||||
|
if (!isNaN(numValue)) {
|
||||||
|
onChange(numValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="grid gap-2">
|
||||||
|
<Label htmlFor={id}>
|
||||||
|
{label}
|
||||||
|
</Label>
|
||||||
|
<Input
|
||||||
|
id={id}
|
||||||
|
type="number"
|
||||||
|
step="any"
|
||||||
|
value={value !== undefined ? value : ''}
|
||||||
|
onChange={(e) => handleChange(e.target.value)}
|
||||||
|
placeholder={placeholder}
|
||||||
|
disabled={disabled}
|
||||||
|
className={className}
|
||||||
|
/>
|
||||||
|
{description && (
|
||||||
|
<p className="text-sm text-muted-foreground">{description}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default NumberInput
|
||||||
55
webui/src/components/form/SelectInput.tsx
Normal file
55
webui/src/components/form/SelectInput.tsx
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
|
||||||
|
interface SelectOption {
|
||||||
|
value: string
|
||||||
|
label: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SelectInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: string | undefined
|
||||||
|
onChange: (value: string | undefined) => void
|
||||||
|
options: SelectOption[]
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const SelectInput: React.FC<SelectInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
options,
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
return (
|
||||||
|
<div className="grid gap-2">
|
||||||
|
<Label htmlFor={id}>
|
||||||
|
{label}
|
||||||
|
</Label>
|
||||||
|
<select
|
||||||
|
id={id}
|
||||||
|
value={value || ''}
|
||||||
|
onChange={(e) => onChange(e.target.value || undefined)}
|
||||||
|
disabled={disabled}
|
||||||
|
className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
|
||||||
|
>
|
||||||
|
{options.map(option => (
|
||||||
|
<option key={option.value} value={option.value}>
|
||||||
|
{option.label}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
{description && (
|
||||||
|
<p className="text-sm text-muted-foreground">{description}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default SelectInput
|
||||||
47
webui/src/components/form/TextInput.tsx
Normal file
47
webui/src/components/form/TextInput.tsx
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import { Input } from '@/components/ui/input'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
|
||||||
|
interface TextInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: string | number | undefined
|
||||||
|
onChange: (value: string | undefined) => void
|
||||||
|
placeholder?: string
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const TextInput: React.FC<TextInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
placeholder,
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
return (
|
||||||
|
<div className="grid gap-2">
|
||||||
|
<Label htmlFor={id}>
|
||||||
|
{label}
|
||||||
|
</Label>
|
||||||
|
<Input
|
||||||
|
id={id}
|
||||||
|
type="text"
|
||||||
|
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
||||||
|
onChange={(e) => onChange(e.target.value || undefined)}
|
||||||
|
placeholder={placeholder}
|
||||||
|
disabled={disabled}
|
||||||
|
className={className}
|
||||||
|
/>
|
||||||
|
{description && (
|
||||||
|
<p className="text-sm text-muted-foreground">{description}</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default TextInput
|
||||||
98
webui/src/components/instance/AdvancedInstanceFields.tsx
Normal file
98
webui/src/components/instance/AdvancedInstanceFields.tsx
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import type { CreateInstanceOptions } from '@/types/instance'
|
||||||
|
import { getAdvancedFields, basicFieldsConfig } from '@/lib/zodFormUtils'
|
||||||
|
import { getFieldType } from '@/schemas/instanceOptions'
|
||||||
|
import TextInput from '@/components/form/TextInput'
|
||||||
|
import NumberInput from '@/components/form/NumberInput'
|
||||||
|
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||||
|
import ArrayInput from '@/components/form/ArrayInput'
|
||||||
|
|
||||||
|
interface AdvancedInstanceFieldsProps {
|
||||||
|
formData: CreateInstanceOptions
|
||||||
|
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const AdvancedInstanceFields: React.FC<AdvancedInstanceFieldsProps> = ({
|
||||||
|
formData,
|
||||||
|
onChange
|
||||||
|
}) => {
|
||||||
|
const advancedFields = getAdvancedFields()
|
||||||
|
|
||||||
|
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
|
||||||
|
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
||||||
|
const fieldType = getFieldType(fieldKey)
|
||||||
|
|
||||||
|
switch (fieldType) {
|
||||||
|
case 'boolean':
|
||||||
|
return (
|
||||||
|
<CheckboxInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as boolean | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
case 'number':
|
||||||
|
return (
|
||||||
|
<NumberInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as number | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
placeholder={config.placeholder}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
case 'array':
|
||||||
|
return (
|
||||||
|
<ArrayInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as string[] | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
placeholder={config.placeholder}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return (
|
||||||
|
<TextInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as string | number | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
placeholder={config.placeholder}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out restart options and backend_options (handled separately)
|
||||||
|
const fieldsToRender = advancedFields.filter(
|
||||||
|
fieldKey => !['max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (fieldsToRender.length === 0) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
|
||||||
|
{fieldsToRender
|
||||||
|
.sort()
|
||||||
|
.map(renderField)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default AdvancedInstanceFields
|
||||||
53
webui/src/components/instance/AutoRestartConfiguration.tsx
Normal file
53
webui/src/components/instance/AutoRestartConfiguration.tsx
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import type { CreateInstanceOptions } from '@/types/instance'
|
||||||
|
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||||
|
import NumberInput from '@/components/form/NumberInput'
|
||||||
|
|
||||||
|
interface AutoRestartConfigurationProps {
|
||||||
|
formData: CreateInstanceOptions
|
||||||
|
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
|
||||||
|
formData,
|
||||||
|
onChange
|
||||||
|
}) => {
|
||||||
|
const isAutoRestartEnabled = formData.auto_restart === true
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h3 className="text-lg font-medium">Auto Restart Configuration</h3>
|
||||||
|
|
||||||
|
<CheckboxInput
|
||||||
|
id="auto_restart"
|
||||||
|
label="Auto Restart"
|
||||||
|
value={formData.auto_restart}
|
||||||
|
onChange={(value) => onChange('auto_restart', value)}
|
||||||
|
description="Automatically restart the instance on failure"
|
||||||
|
/>
|
||||||
|
|
||||||
|
{isAutoRestartEnabled && (
|
||||||
|
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
|
||||||
|
<NumberInput
|
||||||
|
id="max_restarts"
|
||||||
|
label="Max Restarts"
|
||||||
|
value={formData.max_restarts}
|
||||||
|
onChange={(value) => onChange('max_restarts', value)}
|
||||||
|
placeholder="3"
|
||||||
|
description="Maximum number of restart attempts (0 = unlimited)"
|
||||||
|
/>
|
||||||
|
<NumberInput
|
||||||
|
id="restart_delay"
|
||||||
|
label="Restart Delay (seconds)"
|
||||||
|
value={formData.restart_delay}
|
||||||
|
onChange={(value) => onChange('restart_delay', value)}
|
||||||
|
placeholder="5"
|
||||||
|
description="Delay in seconds before attempting restart"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default AutoRestartConfiguration
|
||||||
54
webui/src/components/instance/BackendConfiguration.tsx
Normal file
54
webui/src/components/instance/BackendConfiguration.tsx
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import type { CreateInstanceOptions } from '@/types/instance'
|
||||||
|
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
|
||||||
|
import BackendFormField from '@/components/BackendFormField'
|
||||||
|
|
||||||
|
interface BackendConfigurationProps {
|
||||||
|
formData: CreateInstanceOptions
|
||||||
|
onBackendFieldChange: (key: string, value: any) => void
|
||||||
|
showAdvanced?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
|
||||||
|
formData,
|
||||||
|
onBackendFieldChange,
|
||||||
|
showAdvanced = false
|
||||||
|
}) => {
|
||||||
|
const basicBackendFields = getBasicBackendFields(formData.backend_type)
|
||||||
|
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h3 className="text-lg font-medium">Backend Configuration</h3>
|
||||||
|
|
||||||
|
{/* Basic backend fields */}
|
||||||
|
{basicBackendFields.map((fieldKey) => (
|
||||||
|
<BackendFormField
|
||||||
|
key={fieldKey}
|
||||||
|
fieldKey={fieldKey}
|
||||||
|
value={(formData.backend_options as any)?.[fieldKey]}
|
||||||
|
onChange={onBackendFieldChange}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Advanced backend fields */}
|
||||||
|
{showAdvanced && advancedBackendFields.length > 0 && (
|
||||||
|
<div className="space-y-4 pl-6 border-l-2 border-muted">
|
||||||
|
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
|
||||||
|
{advancedBackendFields
|
||||||
|
.sort()
|
||||||
|
.map((fieldKey) => (
|
||||||
|
<BackendFormField
|
||||||
|
key={fieldKey}
|
||||||
|
fieldKey={fieldKey}
|
||||||
|
value={(formData.backend_options as any)?.[fieldKey]}
|
||||||
|
onChange={onBackendFieldChange}
|
||||||
|
/>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default BackendConfiguration
|
||||||
99
webui/src/components/instance/BasicInstanceFields.tsx
Normal file
99
webui/src/components/instance/BasicInstanceFields.tsx
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
import React from 'react'
|
||||||
|
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
||||||
|
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
|
||||||
|
import { getFieldType } from '@/schemas/instanceOptions'
|
||||||
|
import TextInput from '@/components/form/TextInput'
|
||||||
|
import NumberInput from '@/components/form/NumberInput'
|
||||||
|
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||||
|
import SelectInput from '@/components/form/SelectInput'
|
||||||
|
|
||||||
|
interface BasicInstanceFieldsProps {
|
||||||
|
formData: CreateInstanceOptions
|
||||||
|
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
||||||
|
}
|
||||||
|
|
||||||
|
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
|
||||||
|
formData,
|
||||||
|
onChange
|
||||||
|
}) => {
|
||||||
|
const basicFields = getBasicFields()
|
||||||
|
|
||||||
|
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
|
||||||
|
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
||||||
|
const fieldType = getFieldType(fieldKey)
|
||||||
|
|
||||||
|
// Special handling for backend_type field
|
||||||
|
if (fieldKey === 'backend_type') {
|
||||||
|
return (
|
||||||
|
<SelectInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] || BackendType.LLAMA_CPP}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
options={[
|
||||||
|
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
|
||||||
|
{ value: BackendType.MLX_LM, label: 'MLX LM' },
|
||||||
|
{ value: BackendType.VLLM, label: 'vLLM' }
|
||||||
|
]}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Render based on field type
|
||||||
|
switch (fieldType) {
|
||||||
|
case 'boolean':
|
||||||
|
return (
|
||||||
|
<CheckboxInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as boolean | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
case 'number':
|
||||||
|
return (
|
||||||
|
<NumberInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as number | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
placeholder={config.placeholder}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return (
|
||||||
|
<TextInput
|
||||||
|
key={fieldKey}
|
||||||
|
id={fieldKey}
|
||||||
|
label={config.label}
|
||||||
|
value={formData[fieldKey] as string | number | undefined}
|
||||||
|
onChange={(value) => onChange(fieldKey, value)}
|
||||||
|
placeholder={config.placeholder}
|
||||||
|
description={config.description}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out auto restart fields and backend_options (handled separately)
|
||||||
|
const fieldsToRender = basicFields.filter(
|
||||||
|
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-4">
|
||||||
|
<h3 className="text-lg font-medium">Basic Configuration</h3>
|
||||||
|
{fieldsToRender.map(renderField)}
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default BasicInstanceFields
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import type { CreateInstanceOptions, Instance } from "@/types/instance";
|
import type { CreateInstanceOptions, Instance } from "@/types/instance";
|
||||||
|
import { handleApiError } from "./errorUtils";
|
||||||
|
|
||||||
const API_BASE = "/api/v1";
|
const API_BASE = "/api/v1";
|
||||||
|
|
||||||
@@ -30,25 +31,8 @@ async function apiCall<T>(
|
|||||||
headers,
|
headers,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Handle authentication errors
|
// Handle errors using centralized error handler
|
||||||
if (response.status === 401) {
|
await handleApiError(response);
|
||||||
throw new Error('Authentication required');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
// Try to get error message from response
|
|
||||||
let errorMessage = `HTTP ${response.status}`;
|
|
||||||
try {
|
|
||||||
const errorText = await response.text();
|
|
||||||
if (errorText) {
|
|
||||||
errorMessage += `: ${errorText}`;
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// If we can't read the error, just use status
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(errorMessage);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle empty responses (like DELETE)
|
// Handle empty responses (like DELETE)
|
||||||
if (response.status === 204) {
|
if (response.status === 204) {
|
||||||
@@ -60,6 +44,14 @@ async function apiCall<T>(
|
|||||||
const text = await response.text();
|
const text = await response.text();
|
||||||
return text as T;
|
return text as T;
|
||||||
} else {
|
} else {
|
||||||
|
// Handle empty responses for JSON endpoints
|
||||||
|
const contentLength = response.headers.get('content-length');
|
||||||
|
if (contentLength === '0' || contentLength === null) {
|
||||||
|
const text = await response.text();
|
||||||
|
if (text.trim() === '') {
|
||||||
|
return {} as T; // Return empty object for empty JSON responses
|
||||||
|
}
|
||||||
|
}
|
||||||
const data = await response.json() as T;
|
const data = await response.json() as T;
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
@@ -101,6 +93,14 @@ export const backendsApi = {
|
|||||||
body: JSON.stringify({ command }),
|
body: JSON.stringify({ command }),
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
|
vllm: {
|
||||||
|
// POST /backends/vllm/parse-command
|
||||||
|
parseCommand: (command: string) =>
|
||||||
|
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify({ command }),
|
||||||
|
}),
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// Instance API functions
|
// Instance API functions
|
||||||
|
|||||||
32
webui/src/lib/errorUtils.ts
Normal file
32
webui/src/lib/errorUtils.ts
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
/**
|
||||||
|
* Parses error response from API calls and returns a formatted error message
|
||||||
|
*/
|
||||||
|
export async function parseErrorResponse(response: Response): Promise<string> {
|
||||||
|
let errorMessage = `HTTP ${response.status}`
|
||||||
|
|
||||||
|
try {
|
||||||
|
const errorText = await response.text()
|
||||||
|
if (errorText) {
|
||||||
|
errorMessage += `: ${errorText}`
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// If we can't read the error, just use status
|
||||||
|
}
|
||||||
|
|
||||||
|
return errorMessage
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles common API call errors and throws appropriate Error objects
|
||||||
|
*/
|
||||||
|
export async function handleApiError(response: Response): Promise<void> {
|
||||||
|
// Handle authentication errors
|
||||||
|
if (response.status === 401) {
|
||||||
|
throw new Error('Authentication required')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorMessage = await parseErrorResponse(response)
|
||||||
|
throw new Error(errorMessage)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,13 +2,17 @@ import {
|
|||||||
type CreateInstanceOptions,
|
type CreateInstanceOptions,
|
||||||
type LlamaCppBackendOptions,
|
type LlamaCppBackendOptions,
|
||||||
type MlxBackendOptions,
|
type MlxBackendOptions,
|
||||||
|
type VllmBackendOptions,
|
||||||
LlamaCppBackendOptionsSchema,
|
LlamaCppBackendOptionsSchema,
|
||||||
MlxBackendOptionsSchema,
|
MlxBackendOptionsSchema,
|
||||||
|
VllmBackendOptionsSchema,
|
||||||
getAllFieldKeys,
|
getAllFieldKeys,
|
||||||
getAllLlamaCppFieldKeys,
|
getAllLlamaCppFieldKeys,
|
||||||
getAllMlxFieldKeys,
|
getAllMlxFieldKeys,
|
||||||
|
getAllVllmFieldKeys,
|
||||||
getLlamaCppFieldType,
|
getLlamaCppFieldType,
|
||||||
getMlxFieldType
|
getMlxFieldType,
|
||||||
|
getVllmFieldType
|
||||||
} from '@/schemas/instanceOptions'
|
} from '@/schemas/instanceOptions'
|
||||||
|
|
||||||
// Instance-level basic fields (not backend-specific)
|
// Instance-level basic fields (not backend-specific)
|
||||||
@@ -16,7 +20,6 @@ export const basicFieldsConfig: Record<string, {
|
|||||||
label: string
|
label: string
|
||||||
description?: string
|
description?: string
|
||||||
placeholder?: string
|
placeholder?: string
|
||||||
required?: boolean
|
|
||||||
}> = {
|
}> = {
|
||||||
auto_restart: {
|
auto_restart: {
|
||||||
label: 'Auto Restart',
|
label: 'Auto Restart',
|
||||||
@@ -52,13 +55,11 @@ const basicLlamaCppFieldsConfig: Record<string, {
|
|||||||
label: string
|
label: string
|
||||||
description?: string
|
description?: string
|
||||||
placeholder?: string
|
placeholder?: string
|
||||||
required?: boolean
|
|
||||||
}> = {
|
}> = {
|
||||||
model: {
|
model: {
|
||||||
label: 'Model Path',
|
label: 'Model Path',
|
||||||
placeholder: '/path/to/model.gguf',
|
placeholder: '/path/to/model.gguf',
|
||||||
description: 'Path to the model file',
|
description: 'Path to the model file'
|
||||||
required: true
|
|
||||||
},
|
},
|
||||||
hf_repo: {
|
hf_repo: {
|
||||||
label: 'Hugging Face Repository',
|
label: 'Hugging Face Repository',
|
||||||
@@ -82,13 +83,11 @@ const basicMlxFieldsConfig: Record<string, {
|
|||||||
label: string
|
label: string
|
||||||
description?: string
|
description?: string
|
||||||
placeholder?: string
|
placeholder?: string
|
||||||
required?: boolean
|
|
||||||
}> = {
|
}> = {
|
||||||
model: {
|
model: {
|
||||||
label: 'Model',
|
label: 'Model',
|
||||||
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
|
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
|
||||||
description: 'The path to the MLX model weights, tokenizer, and config',
|
description: 'The path to the MLX model weights, tokenizer, and config'
|
||||||
required: true
|
|
||||||
},
|
},
|
||||||
temp: {
|
temp: {
|
||||||
label: 'Temperature',
|
label: 'Temperature',
|
||||||
@@ -117,11 +116,46 @@ const basicMlxFieldsConfig: Record<string, {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vLLM backend-specific basic fields
|
||||||
|
const basicVllmFieldsConfig: Record<string, {
|
||||||
|
label: string
|
||||||
|
description?: string
|
||||||
|
placeholder?: string
|
||||||
|
}> = {
|
||||||
|
model: {
|
||||||
|
label: 'Model',
|
||||||
|
placeholder: 'microsoft/DialoGPT-medium',
|
||||||
|
description: 'The name or path of the Hugging Face model to use'
|
||||||
|
},
|
||||||
|
tensor_parallel_size: {
|
||||||
|
label: 'Tensor Parallel Size',
|
||||||
|
placeholder: '1',
|
||||||
|
description: 'Number of GPUs to use for distributed serving'
|
||||||
|
},
|
||||||
|
gpu_memory_utilization: {
|
||||||
|
label: 'GPU Memory Utilization',
|
||||||
|
placeholder: '0.9',
|
||||||
|
description: 'The fraction of GPU memory to be used for the model executor'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend field configuration lookup
|
||||||
|
const backendFieldConfigs = {
|
||||||
|
mlx_lm: basicMlxFieldsConfig,
|
||||||
|
vllm: basicVllmFieldsConfig,
|
||||||
|
llama_cpp: basicLlamaCppFieldsConfig,
|
||||||
|
} as const
|
||||||
|
|
||||||
|
const backendFieldGetters = {
|
||||||
|
mlx_lm: getAllMlxFieldKeys,
|
||||||
|
vllm: getAllVllmFieldKeys,
|
||||||
|
llama_cpp: getAllLlamaCppFieldKeys,
|
||||||
|
} as const
|
||||||
|
|
||||||
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
||||||
return key in basicFieldsConfig
|
return key in basicFieldsConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
|
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
|
||||||
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
|
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
|
||||||
}
|
}
|
||||||
@@ -130,25 +164,18 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
|
|||||||
return getAllFieldKeys().filter(key => !isBasicField(key))
|
return getAllFieldKeys().filter(key => !isBasicField(key))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export function getBasicBackendFields(backendType?: string): string[] {
|
export function getBasicBackendFields(backendType?: string): string[] {
|
||||||
if (backendType === 'mlx_lm') {
|
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
|
||||||
return Object.keys(basicMlxFieldsConfig)
|
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
||||||
} else if (backendType === 'llama_cpp') {
|
return Object.keys(config)
|
||||||
return Object.keys(basicLlamaCppFieldsConfig)
|
|
||||||
}
|
|
||||||
// Default to LlamaCpp for backward compatibility
|
|
||||||
return Object.keys(basicLlamaCppFieldsConfig)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getAdvancedBackendFields(backendType?: string): string[] {
|
export function getAdvancedBackendFields(backendType?: string): string[] {
|
||||||
if (backendType === 'mlx_lm') {
|
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
|
||||||
return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
|
const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
|
||||||
} else if (backendType === 'llama_cpp') {
|
const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
||||||
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
|
|
||||||
}
|
return fieldGetter().filter(key => !(key in basicConfig))
|
||||||
// Default to LlamaCpp for backward compatibility
|
|
||||||
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combined backend fields config for use in BackendFormField
|
// Combined backend fields config for use in BackendFormField
|
||||||
@@ -156,10 +183,10 @@ export const basicBackendFieldsConfig: Record<string, {
|
|||||||
label: string
|
label: string
|
||||||
description?: string
|
description?: string
|
||||||
placeholder?: string
|
placeholder?: string
|
||||||
required?: boolean
|
|
||||||
}> = {
|
}> = {
|
||||||
...basicLlamaCppFieldsConfig,
|
...basicLlamaCppFieldsConfig,
|
||||||
...basicMlxFieldsConfig
|
...basicMlxFieldsConfig,
|
||||||
|
...basicVllmFieldsConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get field type for any backend option (union type)
|
// Get field type for any backend option (union type)
|
||||||
@@ -182,6 +209,15 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
|
|||||||
// Schema might not be available
|
// Schema might not be available
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try vLLM schema
|
||||||
|
try {
|
||||||
|
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
|
||||||
|
return getVllmFieldType(key as keyof VllmBackendOptions)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Schema might not be available
|
||||||
|
}
|
||||||
|
|
||||||
// Default fallback
|
// Default fallback
|
||||||
return 'text'
|
return 'text'
|
||||||
}
|
}
|
||||||
|
|||||||
4
webui/src/schemas/backends/index.ts
Normal file
4
webui/src/schemas/backends/index.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
// Re-export all backend schemas from one place
|
||||||
|
export * from './llamacpp'
|
||||||
|
export * from './mlx'
|
||||||
|
export * from './vllm'
|
||||||
192
webui/src/schemas/backends/llamacpp.ts
Normal file
192
webui/src/schemas/backends/llamacpp.ts
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
// Define the LlamaCpp backend options schema
|
||||||
|
export const LlamaCppBackendOptionsSchema = z.object({
|
||||||
|
// Common params
|
||||||
|
verbose_prompt: z.boolean().optional(),
|
||||||
|
threads: z.number().optional(),
|
||||||
|
threads_batch: z.number().optional(),
|
||||||
|
cpu_mask: z.string().optional(),
|
||||||
|
cpu_range: z.string().optional(),
|
||||||
|
cpu_strict: z.number().optional(),
|
||||||
|
prio: z.number().optional(),
|
||||||
|
poll: z.number().optional(),
|
||||||
|
cpu_mask_batch: z.string().optional(),
|
||||||
|
cpu_range_batch: z.string().optional(),
|
||||||
|
cpu_strict_batch: z.number().optional(),
|
||||||
|
prio_batch: z.number().optional(),
|
||||||
|
poll_batch: z.number().optional(),
|
||||||
|
ctx_size: z.number().optional(),
|
||||||
|
predict: z.number().optional(),
|
||||||
|
batch_size: z.number().optional(),
|
||||||
|
ubatch_size: z.number().optional(),
|
||||||
|
keep: z.number().optional(),
|
||||||
|
flash_attn: z.boolean().optional(),
|
||||||
|
no_perf: z.boolean().optional(),
|
||||||
|
escape: z.boolean().optional(),
|
||||||
|
no_escape: z.boolean().optional(),
|
||||||
|
rope_scaling: z.string().optional(),
|
||||||
|
rope_scale: z.number().optional(),
|
||||||
|
rope_freq_base: z.number().optional(),
|
||||||
|
rope_freq_scale: z.number().optional(),
|
||||||
|
yarn_orig_ctx: z.number().optional(),
|
||||||
|
yarn_ext_factor: z.number().optional(),
|
||||||
|
yarn_attn_factor: z.number().optional(),
|
||||||
|
yarn_beta_slow: z.number().optional(),
|
||||||
|
yarn_beta_fast: z.number().optional(),
|
||||||
|
dump_kv_cache: z.boolean().optional(),
|
||||||
|
no_kv_offload: z.boolean().optional(),
|
||||||
|
cache_type_k: z.string().optional(),
|
||||||
|
cache_type_v: z.string().optional(),
|
||||||
|
defrag_thold: z.number().optional(),
|
||||||
|
parallel: z.number().optional(),
|
||||||
|
mlock: z.boolean().optional(),
|
||||||
|
no_mmap: z.boolean().optional(),
|
||||||
|
numa: z.string().optional(),
|
||||||
|
device: z.string().optional(),
|
||||||
|
override_tensor: z.array(z.string()).optional(),
|
||||||
|
gpu_layers: z.number().optional(),
|
||||||
|
split_mode: z.string().optional(),
|
||||||
|
tensor_split: z.string().optional(),
|
||||||
|
main_gpu: z.number().optional(),
|
||||||
|
check_tensors: z.boolean().optional(),
|
||||||
|
override_kv: z.array(z.string()).optional(),
|
||||||
|
lora: z.array(z.string()).optional(),
|
||||||
|
lora_scaled: z.array(z.string()).optional(),
|
||||||
|
control_vector: z.array(z.string()).optional(),
|
||||||
|
control_vector_scaled: z.array(z.string()).optional(),
|
||||||
|
control_vector_layer_range: z.string().optional(),
|
||||||
|
model: z.string().optional(),
|
||||||
|
model_url: z.string().optional(),
|
||||||
|
hf_repo: z.string().optional(),
|
||||||
|
hf_repo_draft: z.string().optional(),
|
||||||
|
hf_file: z.string().optional(),
|
||||||
|
hf_repo_v: z.string().optional(),
|
||||||
|
hf_file_v: z.string().optional(),
|
||||||
|
hf_token: z.string().optional(),
|
||||||
|
log_disable: z.boolean().optional(),
|
||||||
|
log_file: z.string().optional(),
|
||||||
|
log_colors: z.boolean().optional(),
|
||||||
|
verbose: z.boolean().optional(),
|
||||||
|
verbosity: z.number().optional(),
|
||||||
|
log_prefix: z.boolean().optional(),
|
||||||
|
log_timestamps: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Sampling params
|
||||||
|
samplers: z.string().optional(),
|
||||||
|
seed: z.number().optional(),
|
||||||
|
sampling_seq: z.string().optional(),
|
||||||
|
ignore_eos: z.boolean().optional(),
|
||||||
|
temp: z.number().optional(),
|
||||||
|
top_k: z.number().optional(),
|
||||||
|
top_p: z.number().optional(),
|
||||||
|
min_p: z.number().optional(),
|
||||||
|
xtc_probability: z.number().optional(),
|
||||||
|
xtc_threshold: z.number().optional(),
|
||||||
|
typical: z.number().optional(),
|
||||||
|
repeat_last_n: z.number().optional(),
|
||||||
|
repeat_penalty: z.number().optional(),
|
||||||
|
presence_penalty: z.number().optional(),
|
||||||
|
frequency_penalty: z.number().optional(),
|
||||||
|
dry_multiplier: z.number().optional(),
|
||||||
|
dry_base: z.number().optional(),
|
||||||
|
dry_allowed_length: z.number().optional(),
|
||||||
|
dry_penalty_last_n: z.number().optional(),
|
||||||
|
dry_sequence_breaker: z.array(z.string()).optional(),
|
||||||
|
dynatemp_range: z.number().optional(),
|
||||||
|
dynatemp_exp: z.number().optional(),
|
||||||
|
mirostat: z.number().optional(),
|
||||||
|
mirostat_lr: z.number().optional(),
|
||||||
|
mirostat_ent: z.number().optional(),
|
||||||
|
logit_bias: z.array(z.string()).optional(),
|
||||||
|
grammar: z.string().optional(),
|
||||||
|
grammar_file: z.string().optional(),
|
||||||
|
json_schema: z.string().optional(),
|
||||||
|
json_schema_file: z.string().optional(),
|
||||||
|
|
||||||
|
// Example-specific params
|
||||||
|
no_context_shift: z.boolean().optional(),
|
||||||
|
special: z.boolean().optional(),
|
||||||
|
no_warmup: z.boolean().optional(),
|
||||||
|
spm_infill: z.boolean().optional(),
|
||||||
|
pooling: z.string().optional(),
|
||||||
|
cont_batching: z.boolean().optional(),
|
||||||
|
no_cont_batching: z.boolean().optional(),
|
||||||
|
mmproj: z.string().optional(),
|
||||||
|
mmproj_url: z.string().optional(),
|
||||||
|
no_mmproj: z.boolean().optional(),
|
||||||
|
no_mmproj_offload: z.boolean().optional(),
|
||||||
|
alias: z.string().optional(),
|
||||||
|
host: z.string().optional(),
|
||||||
|
port: z.number().optional(),
|
||||||
|
path: z.string().optional(),
|
||||||
|
no_webui: z.boolean().optional(),
|
||||||
|
embedding: z.boolean().optional(),
|
||||||
|
reranking: z.boolean().optional(),
|
||||||
|
api_key: z.string().optional(),
|
||||||
|
api_key_file: z.string().optional(),
|
||||||
|
ssl_key_file: z.string().optional(),
|
||||||
|
ssl_cert_file: z.string().optional(),
|
||||||
|
chat_template_kwargs: z.string().optional(),
|
||||||
|
timeout: z.number().optional(),
|
||||||
|
threads_http: z.number().optional(),
|
||||||
|
cache_reuse: z.number().optional(),
|
||||||
|
metrics: z.boolean().optional(),
|
||||||
|
slots: z.boolean().optional(),
|
||||||
|
props: z.boolean().optional(),
|
||||||
|
no_slots: z.boolean().optional(),
|
||||||
|
slot_save_path: z.string().optional(),
|
||||||
|
jinja: z.boolean().optional(),
|
||||||
|
reasoning_format: z.string().optional(),
|
||||||
|
reasoning_budget: z.number().optional(),
|
||||||
|
chat_template: z.string().optional(),
|
||||||
|
chat_template_file: z.string().optional(),
|
||||||
|
no_prefill_assistant: z.boolean().optional(),
|
||||||
|
slot_prompt_similarity: z.number().optional(),
|
||||||
|
lora_init_without_apply: z.boolean().optional(),
|
||||||
|
draft_max: z.number().optional(),
|
||||||
|
draft_min: z.number().optional(),
|
||||||
|
draft_p_min: z.number().optional(),
|
||||||
|
ctx_size_draft: z.number().optional(),
|
||||||
|
device_draft: z.string().optional(),
|
||||||
|
gpu_layers_draft: z.number().optional(),
|
||||||
|
model_draft: z.string().optional(),
|
||||||
|
cache_type_k_draft: z.string().optional(),
|
||||||
|
cache_type_v_draft: z.string().optional(),
|
||||||
|
|
||||||
|
// Audio/TTS params
|
||||||
|
model_vocoder: z.string().optional(),
|
||||||
|
tts_use_guide_tokens: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Default model params
|
||||||
|
embd_bge_small_en_default: z.boolean().optional(),
|
||||||
|
embd_e5_small_en_default: z.boolean().optional(),
|
||||||
|
embd_gte_small_default: z.boolean().optional(),
|
||||||
|
fim_qwen_1_5b_default: z.boolean().optional(),
|
||||||
|
fim_qwen_3b_default: z.boolean().optional(),
|
||||||
|
fim_qwen_7b_default: z.boolean().optional(),
|
||||||
|
fim_qwen_7b_spec: z.boolean().optional(),
|
||||||
|
fim_qwen_14b_spec: z.boolean().optional(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Infer the TypeScript type from the schema
|
||||||
|
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
|
||||||
|
|
||||||
|
// Helper to get all LlamaCpp backend option field keys
|
||||||
|
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
|
||||||
|
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get field type for LlamaCpp backend options
|
||||||
|
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
||||||
|
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
|
||||||
|
if (!fieldSchema) return 'text'
|
||||||
|
|
||||||
|
// Handle ZodOptional wrapper
|
||||||
|
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
||||||
|
|
||||||
|
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
||||||
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
|
return 'text' // ZodString and others default to text
|
||||||
|
}
|
||||||
51
webui/src/schemas/backends/mlx.ts
Normal file
51
webui/src/schemas/backends/mlx.ts
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
// Define the MLX backend options schema
|
||||||
|
export const MlxBackendOptionsSchema = z.object({
|
||||||
|
// Basic connection options
|
||||||
|
model: z.string().optional(),
|
||||||
|
host: z.string().optional(),
|
||||||
|
port: z.number().optional(),
|
||||||
|
|
||||||
|
// Model and adapter options
|
||||||
|
adapter_path: z.string().optional(),
|
||||||
|
draft_model: z.string().optional(),
|
||||||
|
num_draft_tokens: z.number().optional(),
|
||||||
|
trust_remote_code: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Logging and templates
|
||||||
|
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
|
||||||
|
chat_template: z.string().optional(),
|
||||||
|
use_default_chat_template: z.boolean().optional(),
|
||||||
|
chat_template_args: z.string().optional(), // JSON string
|
||||||
|
|
||||||
|
// Sampling defaults
|
||||||
|
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
|
||||||
|
top_p: z.number().optional(),
|
||||||
|
top_k: z.number().optional(),
|
||||||
|
min_p: z.number().optional(),
|
||||||
|
max_tokens: z.number().optional(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Infer the TypeScript type from the schema
|
||||||
|
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
|
||||||
|
|
||||||
|
// Helper to get all MLX backend option field keys
|
||||||
|
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
|
||||||
|
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get field type for MLX backend options
|
||||||
|
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
||||||
|
const fieldSchema = MlxBackendOptionsSchema.shape[key]
|
||||||
|
if (!fieldSchema) return 'text'
|
||||||
|
|
||||||
|
// Handle ZodOptional wrapper
|
||||||
|
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
||||||
|
|
||||||
|
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
||||||
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
|
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
|
||||||
|
return 'text' // ZodString and others default to text
|
||||||
|
}
|
||||||
150
webui/src/schemas/backends/vllm.ts
Normal file
150
webui/src/schemas/backends/vllm.ts
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
// Define the vLLM backend options schema
|
||||||
|
export const VllmBackendOptionsSchema = z.object({
|
||||||
|
// Basic connection options (auto-assigned by llamactl)
|
||||||
|
host: z.string().optional(),
|
||||||
|
port: z.number().optional(),
|
||||||
|
|
||||||
|
// Model and engine configuration
|
||||||
|
model: z.string().optional(),
|
||||||
|
tokenizer: z.string().optional(),
|
||||||
|
skip_tokenizer_init: z.boolean().optional(),
|
||||||
|
revision: z.string().optional(),
|
||||||
|
code_revision: z.string().optional(),
|
||||||
|
tokenizer_revision: z.string().optional(),
|
||||||
|
tokenizer_mode: z.string().optional(),
|
||||||
|
trust_remote_code: z.boolean().optional(),
|
||||||
|
download_dir: z.string().optional(),
|
||||||
|
load_format: z.string().optional(),
|
||||||
|
config_format: z.string().optional(),
|
||||||
|
dtype: z.string().optional(),
|
||||||
|
kv_cache_dtype: z.string().optional(),
|
||||||
|
quantization_param_path: z.string().optional(),
|
||||||
|
seed: z.number().optional(),
|
||||||
|
max_model_len: z.number().optional(),
|
||||||
|
guided_decoding_backend: z.string().optional(),
|
||||||
|
distributed_executor_backend: z.string().optional(),
|
||||||
|
worker_use_ray: z.boolean().optional(),
|
||||||
|
ray_workers_use_nsight: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Performance and serving configuration
|
||||||
|
block_size: z.number().optional(),
|
||||||
|
enable_prefix_caching: z.boolean().optional(),
|
||||||
|
disable_sliding_window: z.boolean().optional(),
|
||||||
|
use_v2_block_manager: z.boolean().optional(),
|
||||||
|
num_lookahead_slots: z.number().optional(),
|
||||||
|
swap_space: z.number().optional(),
|
||||||
|
cpu_offload_gb: z.number().optional(),
|
||||||
|
gpu_memory_utilization: z.number().optional(),
|
||||||
|
num_gpu_blocks_override: z.number().optional(),
|
||||||
|
max_num_batched_tokens: z.number().optional(),
|
||||||
|
max_num_seqs: z.number().optional(),
|
||||||
|
max_logprobs: z.number().optional(),
|
||||||
|
disable_log_stats: z.boolean().optional(),
|
||||||
|
quantization: z.string().optional(),
|
||||||
|
rope_scaling: z.string().optional(),
|
||||||
|
rope_theta: z.number().optional(),
|
||||||
|
enforce_eager: z.boolean().optional(),
|
||||||
|
max_context_len_to_capture: z.number().optional(),
|
||||||
|
max_seq_len_to_capture: z.number().optional(),
|
||||||
|
disable_custom_all_reduce: z.boolean().optional(),
|
||||||
|
tokenizer_pool_size: z.number().optional(),
|
||||||
|
tokenizer_pool_type: z.string().optional(),
|
||||||
|
tokenizer_pool_extra_config: z.string().optional(),
|
||||||
|
enable_lora_bias: z.boolean().optional(),
|
||||||
|
lora_extra_vocab_size: z.number().optional(),
|
||||||
|
lora_rank: z.number().optional(),
|
||||||
|
prompt_lookback_distance: z.number().optional(),
|
||||||
|
preemption_mode: z.string().optional(),
|
||||||
|
|
||||||
|
// Distributed and parallel processing
|
||||||
|
tensor_parallel_size: z.number().optional(),
|
||||||
|
pipeline_parallel_size: z.number().optional(),
|
||||||
|
max_parallel_loading_workers: z.number().optional(),
|
||||||
|
disable_async_output_proc: z.boolean().optional(),
|
||||||
|
worker_class: z.string().optional(),
|
||||||
|
enabled_lora_modules: z.string().optional(),
|
||||||
|
max_lora_rank: z.number().optional(),
|
||||||
|
fully_sharded_loras: z.boolean().optional(),
|
||||||
|
lora_modules: z.string().optional(),
|
||||||
|
prompt_adapters: z.string().optional(),
|
||||||
|
max_prompt_adapter_token: z.number().optional(),
|
||||||
|
device: z.string().optional(),
|
||||||
|
scheduler_delay: z.number().optional(),
|
||||||
|
enable_chunked_prefill: z.boolean().optional(),
|
||||||
|
speculative_model: z.string().optional(),
|
||||||
|
speculative_model_quantization: z.string().optional(),
|
||||||
|
speculative_revision: z.string().optional(),
|
||||||
|
speculative_max_model_len: z.number().optional(),
|
||||||
|
speculative_disable_by_batch_size: z.number().optional(),
|
||||||
|
ngpt_speculative_length: z.number().optional(),
|
||||||
|
speculative_disable_mqa: z.boolean().optional(),
|
||||||
|
model_loader_extra_config: z.string().optional(),
|
||||||
|
ignore_patterns: z.string().optional(),
|
||||||
|
preloaded_lora_modules: z.string().optional(),
|
||||||
|
|
||||||
|
// OpenAI server specific options
|
||||||
|
uds: z.string().optional(),
|
||||||
|
uvicorn_log_level: z.string().optional(),
|
||||||
|
response_role: z.string().optional(),
|
||||||
|
ssl_keyfile: z.string().optional(),
|
||||||
|
ssl_certfile: z.string().optional(),
|
||||||
|
ssl_ca_certs: z.string().optional(),
|
||||||
|
ssl_cert_reqs: z.number().optional(),
|
||||||
|
root_path: z.string().optional(),
|
||||||
|
middleware: z.array(z.string()).optional(),
|
||||||
|
return_tokens_as_token_ids: z.boolean().optional(),
|
||||||
|
disable_frontend_multiprocessing: z.boolean().optional(),
|
||||||
|
enable_auto_tool_choice: z.boolean().optional(),
|
||||||
|
tool_call_parser: z.string().optional(),
|
||||||
|
tool_server: z.string().optional(),
|
||||||
|
chat_template: z.string().optional(),
|
||||||
|
chat_template_content_format: z.string().optional(),
|
||||||
|
allow_credentials: z.boolean().optional(),
|
||||||
|
allowed_origins: z.array(z.string()).optional(),
|
||||||
|
allowed_methods: z.array(z.string()).optional(),
|
||||||
|
allowed_headers: z.array(z.string()).optional(),
|
||||||
|
api_key: z.array(z.string()).optional(),
|
||||||
|
enable_log_outputs: z.boolean().optional(),
|
||||||
|
enable_token_usage: z.boolean().optional(),
|
||||||
|
enable_async_engine_debug: z.boolean().optional(),
|
||||||
|
engine_use_ray: z.boolean().optional(),
|
||||||
|
disable_log_requests: z.boolean().optional(),
|
||||||
|
max_log_len: z.number().optional(),
|
||||||
|
|
||||||
|
// Additional engine configuration
|
||||||
|
task: z.string().optional(),
|
||||||
|
multi_modal_config: z.string().optional(),
|
||||||
|
limit_mm_per_prompt: z.string().optional(),
|
||||||
|
enable_sleep_mode: z.boolean().optional(),
|
||||||
|
enable_chunking_request: z.boolean().optional(),
|
||||||
|
compilation_config: z.string().optional(),
|
||||||
|
disable_sliding_window_mask: z.boolean().optional(),
|
||||||
|
enable_trtllm_engine_latency: z.boolean().optional(),
|
||||||
|
override_pooling_config: z.string().optional(),
|
||||||
|
override_neuron_config: z.string().optional(),
|
||||||
|
override_kv_cache_align_size: z.number().optional(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Infer the TypeScript type from the schema
|
||||||
|
export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
|
||||||
|
|
||||||
|
// Helper to get all vLLM backend option field keys
|
||||||
|
export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
|
||||||
|
return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get field type for vLLM backend options
|
||||||
|
export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
||||||
|
const fieldSchema = VllmBackendOptionsSchema.shape[key]
|
||||||
|
if (!fieldSchema) return 'text'
|
||||||
|
|
||||||
|
// Handle ZodOptional wrapper
|
||||||
|
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
||||||
|
|
||||||
|
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
||||||
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
|
return 'text' // ZodString and others default to text
|
||||||
|
}
|
||||||
@@ -1,206 +1,27 @@
|
|||||||
import { BackendType } from '@/types/instance'
|
import { BackendType } from '@/types/instance'
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
// Define the LlamaCpp backend options schema
|
// Import backend schemas from separate files
|
||||||
export const LlamaCppBackendOptionsSchema = z.object({
|
import {
|
||||||
// Common params
|
LlamaCppBackendOptionsSchema,
|
||||||
verbose_prompt: z.boolean().optional(),
|
type LlamaCppBackendOptions,
|
||||||
threads: z.number().optional(),
|
getAllLlamaCppFieldKeys,
|
||||||
threads_batch: z.number().optional(),
|
getLlamaCppFieldType,
|
||||||
cpu_mask: z.string().optional(),
|
MlxBackendOptionsSchema,
|
||||||
cpu_range: z.string().optional(),
|
type MlxBackendOptions,
|
||||||
cpu_strict: z.number().optional(),
|
getAllMlxFieldKeys,
|
||||||
prio: z.number().optional(),
|
getMlxFieldType,
|
||||||
poll: z.number().optional(),
|
VllmBackendOptionsSchema,
|
||||||
cpu_mask_batch: z.string().optional(),
|
type VllmBackendOptions,
|
||||||
cpu_range_batch: z.string().optional(),
|
getAllVllmFieldKeys,
|
||||||
cpu_strict_batch: z.number().optional(),
|
getVllmFieldType
|
||||||
prio_batch: z.number().optional(),
|
} from './backends'
|
||||||
poll_batch: z.number().optional(),
|
|
||||||
ctx_size: z.number().optional(),
|
|
||||||
predict: z.number().optional(),
|
|
||||||
batch_size: z.number().optional(),
|
|
||||||
ubatch_size: z.number().optional(),
|
|
||||||
keep: z.number().optional(),
|
|
||||||
flash_attn: z.boolean().optional(),
|
|
||||||
no_perf: z.boolean().optional(),
|
|
||||||
escape: z.boolean().optional(),
|
|
||||||
no_escape: z.boolean().optional(),
|
|
||||||
rope_scaling: z.string().optional(),
|
|
||||||
rope_scale: z.number().optional(),
|
|
||||||
rope_freq_base: z.number().optional(),
|
|
||||||
rope_freq_scale: z.number().optional(),
|
|
||||||
yarn_orig_ctx: z.number().optional(),
|
|
||||||
yarn_ext_factor: z.number().optional(),
|
|
||||||
yarn_attn_factor: z.number().optional(),
|
|
||||||
yarn_beta_slow: z.number().optional(),
|
|
||||||
yarn_beta_fast: z.number().optional(),
|
|
||||||
dump_kv_cache: z.boolean().optional(),
|
|
||||||
no_kv_offload: z.boolean().optional(),
|
|
||||||
cache_type_k: z.string().optional(),
|
|
||||||
cache_type_v: z.string().optional(),
|
|
||||||
defrag_thold: z.number().optional(),
|
|
||||||
parallel: z.number().optional(),
|
|
||||||
mlock: z.boolean().optional(),
|
|
||||||
no_mmap: z.boolean().optional(),
|
|
||||||
numa: z.string().optional(),
|
|
||||||
device: z.string().optional(),
|
|
||||||
override_tensor: z.array(z.string()).optional(),
|
|
||||||
gpu_layers: z.number().optional(),
|
|
||||||
split_mode: z.string().optional(),
|
|
||||||
tensor_split: z.string().optional(),
|
|
||||||
main_gpu: z.number().optional(),
|
|
||||||
check_tensors: z.boolean().optional(),
|
|
||||||
override_kv: z.array(z.string()).optional(),
|
|
||||||
lora: z.array(z.string()).optional(),
|
|
||||||
lora_scaled: z.array(z.string()).optional(),
|
|
||||||
control_vector: z.array(z.string()).optional(),
|
|
||||||
control_vector_scaled: z.array(z.string()).optional(),
|
|
||||||
control_vector_layer_range: z.string().optional(),
|
|
||||||
model: z.string().optional(),
|
|
||||||
model_url: z.string().optional(),
|
|
||||||
hf_repo: z.string().optional(),
|
|
||||||
hf_repo_draft: z.string().optional(),
|
|
||||||
hf_file: z.string().optional(),
|
|
||||||
hf_repo_v: z.string().optional(),
|
|
||||||
hf_file_v: z.string().optional(),
|
|
||||||
hf_token: z.string().optional(),
|
|
||||||
log_disable: z.boolean().optional(),
|
|
||||||
log_file: z.string().optional(),
|
|
||||||
log_colors: z.boolean().optional(),
|
|
||||||
verbose: z.boolean().optional(),
|
|
||||||
verbosity: z.number().optional(),
|
|
||||||
log_prefix: z.boolean().optional(),
|
|
||||||
log_timestamps: z.boolean().optional(),
|
|
||||||
|
|
||||||
// Sampling params
|
|
||||||
samplers: z.string().optional(),
|
|
||||||
seed: z.number().optional(),
|
|
||||||
sampling_seq: z.string().optional(),
|
|
||||||
ignore_eos: z.boolean().optional(),
|
|
||||||
temp: z.number().optional(),
|
|
||||||
top_k: z.number().optional(),
|
|
||||||
top_p: z.number().optional(),
|
|
||||||
min_p: z.number().optional(),
|
|
||||||
xtc_probability: z.number().optional(),
|
|
||||||
xtc_threshold: z.number().optional(),
|
|
||||||
typical: z.number().optional(),
|
|
||||||
repeat_last_n: z.number().optional(),
|
|
||||||
repeat_penalty: z.number().optional(),
|
|
||||||
presence_penalty: z.number().optional(),
|
|
||||||
frequency_penalty: z.number().optional(),
|
|
||||||
dry_multiplier: z.number().optional(),
|
|
||||||
dry_base: z.number().optional(),
|
|
||||||
dry_allowed_length: z.number().optional(),
|
|
||||||
dry_penalty_last_n: z.number().optional(),
|
|
||||||
dry_sequence_breaker: z.array(z.string()).optional(),
|
|
||||||
dynatemp_range: z.number().optional(),
|
|
||||||
dynatemp_exp: z.number().optional(),
|
|
||||||
mirostat: z.number().optional(),
|
|
||||||
mirostat_lr: z.number().optional(),
|
|
||||||
mirostat_ent: z.number().optional(),
|
|
||||||
logit_bias: z.array(z.string()).optional(),
|
|
||||||
grammar: z.string().optional(),
|
|
||||||
grammar_file: z.string().optional(),
|
|
||||||
json_schema: z.string().optional(),
|
|
||||||
json_schema_file: z.string().optional(),
|
|
||||||
|
|
||||||
// Example-specific params
|
|
||||||
no_context_shift: z.boolean().optional(),
|
|
||||||
special: z.boolean().optional(),
|
|
||||||
no_warmup: z.boolean().optional(),
|
|
||||||
spm_infill: z.boolean().optional(),
|
|
||||||
pooling: z.string().optional(),
|
|
||||||
cont_batching: z.boolean().optional(),
|
|
||||||
no_cont_batching: z.boolean().optional(),
|
|
||||||
mmproj: z.string().optional(),
|
|
||||||
mmproj_url: z.string().optional(),
|
|
||||||
no_mmproj: z.boolean().optional(),
|
|
||||||
no_mmproj_offload: z.boolean().optional(),
|
|
||||||
alias: z.string().optional(),
|
|
||||||
host: z.string().optional(),
|
|
||||||
port: z.number().optional(),
|
|
||||||
path: z.string().optional(),
|
|
||||||
no_webui: z.boolean().optional(),
|
|
||||||
embedding: z.boolean().optional(),
|
|
||||||
reranking: z.boolean().optional(),
|
|
||||||
api_key: z.string().optional(),
|
|
||||||
api_key_file: z.string().optional(),
|
|
||||||
ssl_key_file: z.string().optional(),
|
|
||||||
ssl_cert_file: z.string().optional(),
|
|
||||||
chat_template_kwargs: z.string().optional(),
|
|
||||||
timeout: z.number().optional(),
|
|
||||||
threads_http: z.number().optional(),
|
|
||||||
cache_reuse: z.number().optional(),
|
|
||||||
metrics: z.boolean().optional(),
|
|
||||||
slots: z.boolean().optional(),
|
|
||||||
props: z.boolean().optional(),
|
|
||||||
no_slots: z.boolean().optional(),
|
|
||||||
slot_save_path: z.string().optional(),
|
|
||||||
jinja: z.boolean().optional(),
|
|
||||||
reasoning_format: z.string().optional(),
|
|
||||||
reasoning_budget: z.number().optional(),
|
|
||||||
chat_template: z.string().optional(),
|
|
||||||
chat_template_file: z.string().optional(),
|
|
||||||
no_prefill_assistant: z.boolean().optional(),
|
|
||||||
slot_prompt_similarity: z.number().optional(),
|
|
||||||
lora_init_without_apply: z.boolean().optional(),
|
|
||||||
draft_max: z.number().optional(),
|
|
||||||
draft_min: z.number().optional(),
|
|
||||||
draft_p_min: z.number().optional(),
|
|
||||||
ctx_size_draft: z.number().optional(),
|
|
||||||
device_draft: z.string().optional(),
|
|
||||||
gpu_layers_draft: z.number().optional(),
|
|
||||||
model_draft: z.string().optional(),
|
|
||||||
cache_type_k_draft: z.string().optional(),
|
|
||||||
cache_type_v_draft: z.string().optional(),
|
|
||||||
|
|
||||||
// Audio/TTS params
|
|
||||||
model_vocoder: z.string().optional(),
|
|
||||||
tts_use_guide_tokens: z.boolean().optional(),
|
|
||||||
|
|
||||||
// Default model params
|
|
||||||
embd_bge_small_en_default: z.boolean().optional(),
|
|
||||||
embd_e5_small_en_default: z.boolean().optional(),
|
|
||||||
embd_gte_small_default: z.boolean().optional(),
|
|
||||||
fim_qwen_1_5b_default: z.boolean().optional(),
|
|
||||||
fim_qwen_3b_default: z.boolean().optional(),
|
|
||||||
fim_qwen_7b_default: z.boolean().optional(),
|
|
||||||
fim_qwen_7b_spec: z.boolean().optional(),
|
|
||||||
fim_qwen_14b_spec: z.boolean().optional(),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Define the MLX backend options schema
|
|
||||||
export const MlxBackendOptionsSchema = z.object({
|
|
||||||
// Basic connection options
|
|
||||||
model: z.string().optional(),
|
|
||||||
host: z.string().optional(),
|
|
||||||
port: z.number().optional(),
|
|
||||||
|
|
||||||
// Model and adapter options
|
|
||||||
adapter_path: z.string().optional(),
|
|
||||||
draft_model: z.string().optional(),
|
|
||||||
num_draft_tokens: z.number().optional(),
|
|
||||||
trust_remote_code: z.boolean().optional(),
|
|
||||||
|
|
||||||
// Logging and templates
|
|
||||||
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
|
|
||||||
chat_template: z.string().optional(),
|
|
||||||
use_default_chat_template: z.boolean().optional(),
|
|
||||||
chat_template_args: z.string().optional(), // JSON string
|
|
||||||
|
|
||||||
// Sampling defaults
|
|
||||||
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
|
|
||||||
top_p: z.number().optional(),
|
|
||||||
top_k: z.number().optional(),
|
|
||||||
min_p: z.number().optional(),
|
|
||||||
max_tokens: z.number().optional(),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Backend options union
|
// Backend options union
|
||||||
export const BackendOptionsSchema = z.union([
|
export const BackendOptionsSchema = z.union([
|
||||||
LlamaCppBackendOptionsSchema,
|
LlamaCppBackendOptionsSchema,
|
||||||
MlxBackendOptionsSchema,
|
MlxBackendOptionsSchema,
|
||||||
|
VllmBackendOptionsSchema,
|
||||||
])
|
])
|
||||||
|
|
||||||
// Define the main create instance options schema
|
// Define the main create instance options schema
|
||||||
@@ -213,13 +34,27 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
on_demand_start: z.boolean().optional(),
|
on_demand_start: z.boolean().optional(),
|
||||||
|
|
||||||
// Backend configuration
|
// Backend configuration
|
||||||
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
|
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
||||||
backend_options: BackendOptionsSchema.optional(),
|
backend_options: BackendOptionsSchema.optional(),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Re-export types and schemas from backend files
|
||||||
|
export {
|
||||||
|
LlamaCppBackendOptionsSchema,
|
||||||
|
MlxBackendOptionsSchema,
|
||||||
|
VllmBackendOptionsSchema,
|
||||||
|
type LlamaCppBackendOptions,
|
||||||
|
type MlxBackendOptions,
|
||||||
|
type VllmBackendOptions,
|
||||||
|
getAllLlamaCppFieldKeys,
|
||||||
|
getAllMlxFieldKeys,
|
||||||
|
getAllVllmFieldKeys,
|
||||||
|
getLlamaCppFieldType,
|
||||||
|
getMlxFieldType,
|
||||||
|
getVllmFieldType
|
||||||
|
}
|
||||||
|
|
||||||
// Infer the TypeScript types from the schemas
|
// Infer the TypeScript types from the schemas
|
||||||
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
|
|
||||||
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
|
|
||||||
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
|
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
|
||||||
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
|
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
|
||||||
|
|
||||||
@@ -228,16 +63,6 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
|
|||||||
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
|
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to get all LlamaCpp backend option field keys
|
|
||||||
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
|
|
||||||
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper to get all MLX backend option field keys
|
|
||||||
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
|
|
||||||
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get field type from Zod schema
|
// Get field type from Zod schema
|
||||||
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
|
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
|
||||||
const fieldSchema = CreateInstanceOptionsSchema.shape[key]
|
const fieldSchema = CreateInstanceOptionsSchema.shape[key]
|
||||||
@@ -252,32 +77,3 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
|
|||||||
if (innerSchema instanceof z.ZodObject) return 'object'
|
if (innerSchema instanceof z.ZodObject) return 'object'
|
||||||
return 'text' // ZodString and others default to text
|
return 'text' // ZodString and others default to text
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get field type for LlamaCpp backend options
|
|
||||||
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
|
||||||
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
|
|
||||||
if (!fieldSchema) return 'text'
|
|
||||||
|
|
||||||
// Handle ZodOptional wrapper
|
|
||||||
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
|
||||||
|
|
||||||
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
|
||||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
|
||||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
|
||||||
return 'text' // ZodString and others default to text
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get field type for MLX backend options
|
|
||||||
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
|
||||||
const fieldSchema = MlxBackendOptionsSchema.shape[key]
|
|
||||||
if (!fieldSchema) return 'text'
|
|
||||||
|
|
||||||
// Handle ZodOptional wrapper
|
|
||||||
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
|
||||||
|
|
||||||
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
|
||||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
|
||||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
|
||||||
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
|
|
||||||
return 'text' // ZodString and others default to text
|
|
||||||
}
|
|
||||||
@@ -5,6 +5,7 @@ export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
|
|||||||
export const BackendType = {
|
export const BackendType = {
|
||||||
LLAMA_CPP: 'llama_cpp',
|
LLAMA_CPP: 'llama_cpp',
|
||||||
MLX_LM: 'mlx_lm',
|
MLX_LM: 'mlx_lm',
|
||||||
|
VLLM: 'vllm',
|
||||||
// MLX_VLM: 'mlx_vlm', // Future expansion
|
// MLX_VLM: 'mlx_vlm', // Future expansion
|
||||||
} as const
|
} as const
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user