diff --git a/.gitignore b/.gitignore
index 4075d71..d96fc8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,4 +42,7 @@ site/
llamactl.dev.yaml
# Debug files
-__debug*
\ No newline at end of file
+__debug*
+
+# Binary
+llamactl-*
\ No newline at end of file
diff --git a/docs/docs.go b/docs/docs.go
index f46ac36..8d6a8f1 100644
--- a/docs/docs.go
+++ b/docs/docs.go
@@ -256,6 +256,34 @@ const docTemplate = `{
}
}
},
+ "/api/v1/config": {
+ "get": {
+ "security": [
+ {
+ "ApiKeyAuth": []
+ }
+ ],
+ "description": "Returns the current server configuration (sanitized)",
+ "tags": [
+ "System"
+ ],
+ "summary": "Get server configuration",
+ "responses": {
+ "200": {
+ "description": "Sanitized configuration",
+ "schema": {
+ "$ref": "#/definitions/config.AppConfig"
+ }
+ },
+ "500": {
+ "description": "Internal Server Error",
+ "schema": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
"/api/v1/instances": {
"get": {
"security": [
@@ -1475,6 +1503,247 @@ const docTemplate = `{
}
},
"definitions": {
+ "config.AppConfig": {
+ "type": "object",
+ "properties": {
+ "auth": {
+ "$ref": "#/definitions/config.AuthConfig"
+ },
+ "backends": {
+ "$ref": "#/definitions/config.BackendConfig"
+ },
+ "build_time": {
+ "type": "string"
+ },
+ "commit_hash": {
+ "type": "string"
+ },
+ "instances": {
+ "$ref": "#/definitions/config.InstancesConfig"
+ },
+ "local_node": {
+ "type": "string"
+ },
+ "nodes": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/definitions/config.NodeConfig"
+ }
+ },
+ "server": {
+ "$ref": "#/definitions/config.ServerConfig"
+ },
+ "version": {
+ "type": "string"
+ }
+ }
+ },
+ "config.AuthConfig": {
+ "type": "object",
+ "properties": {
+ "inference_keys": {
+ "description": "List of keys for OpenAI compatible inference endpoints",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "management_keys": {
+ "description": "List of keys for management endpoints",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "require_inference_auth": {
+ "description": "Require authentication for OpenAI compatible inference endpoints",
+ "type": "boolean"
+ },
+ "require_management_auth": {
+ "description": "Require authentication for management endpoints",
+ "type": "boolean"
+ }
+ }
+ },
+ "config.BackendConfig": {
+ "type": "object",
+ "properties": {
+ "llama-cpp": {
+ "$ref": "#/definitions/config.BackendSettings"
+ },
+ "mlx": {
+ "$ref": "#/definitions/config.BackendSettings"
+ },
+ "vllm": {
+ "$ref": "#/definitions/config.BackendSettings"
+ }
+ }
+ },
+ "config.BackendSettings": {
+ "type": "object",
+ "properties": {
+ "args": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "command": {
+ "type": "string"
+ },
+ "docker": {
+ "$ref": "#/definitions/config.DockerSettings"
+ },
+ "environment": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "response_headers": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "config.DockerSettings": {
+ "type": "object",
+ "properties": {
+ "args": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "enabled": {
+ "type": "boolean"
+ },
+ "environment": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "image": {
+ "type": "string"
+ }
+ }
+ },
+ "config.InstancesConfig": {
+ "type": "object",
+ "properties": {
+ "auto_create_dirs": {
+ "description": "Automatically create the data directory if it doesn't exist",
+ "type": "boolean"
+ },
+ "configs_dir": {
+ "description": "Instance config directory override",
+ "type": "string"
+ },
+ "data_dir": {
+ "description": "Directory where all llamactl data will be stored (instances.json, logs, etc.)",
+ "type": "string"
+ },
+ "default_auto_restart": {
+ "description": "Default auto-restart setting for new instances",
+ "type": "boolean"
+ },
+ "default_max_restarts": {
+ "description": "Default max restarts for new instances",
+ "type": "integer"
+ },
+ "default_on_demand_start": {
+ "description": "Default on-demand start setting for new instances",
+ "type": "boolean"
+ },
+ "default_restart_delay": {
+ "description": "Default restart delay for new instances (in seconds)",
+ "type": "integer"
+ },
+ "enable_lru_eviction": {
+ "description": "Enable LRU eviction for instance logs",
+ "type": "boolean"
+ },
+ "logs_dir": {
+ "description": "Logs directory override",
+ "type": "string"
+ },
+ "max_instances": {
+ "description": "Maximum number of instances that can be created",
+ "type": "integer"
+ },
+ "max_running_instances": {
+ "description": "Maximum number of instances that can be running at the same time",
+ "type": "integer"
+ },
+ "on_demand_start_timeout": {
+ "description": "How long to wait for an instance to start on demand (in seconds)",
+ "type": "integer"
+ },
+ "port_range": {
+ "description": "Port range for instances (e.g., 8000,9000)",
+ "type": "array",
+ "items": {
+ "type": "integer"
+ }
+ },
+ "timeout_check_interval": {
+ "description": "Interval for checking instance timeouts (in minutes)",
+ "type": "integer"
+ }
+ }
+ },
+ "config.NodeConfig": {
+ "type": "object",
+ "properties": {
+ "address": {
+ "type": "string"
+ },
+ "api_key": {
+ "type": "string"
+ }
+ }
+ },
+ "config.ServerConfig": {
+ "type": "object",
+ "properties": {
+ "allowed_headers": {
+ "description": "Allowed headers for CORS (e.g., \"Accept\", \"Authorization\", \"Content-Type\", \"X-CSRF-Token\")",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "allowed_origins": {
+ "description": "Allowed origins for CORS (e.g., \"http://localhost:3000\")",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "enable_swagger": {
+ "description": "Enable Swagger UI for API documentation",
+ "type": "boolean"
+ },
+ "host": {
+ "description": "Server host to bind to",
+ "type": "string"
+ },
+ "port": {
+ "description": "Server port to bind to",
+ "type": "integer"
+ },
+ "response_headers": {
+ "description": "Response headers to send with responses",
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ },
"instance.Instance": {
"type": "object",
"properties": {
@@ -1494,6 +1763,13 @@ const docTemplate = `{
"description": "Auto restart",
"type": "boolean"
},
+ "command_override": {
+ "type": "string"
+ },
+ "docker_enabled": {
+ "description": "Execution context overrides",
+ "type": "boolean"
+ },
"environment": {
"description": "Environment variables",
"type": "object",
diff --git a/docs/managing-instances.md b/docs/managing-instances.md
index be5e768..7504905 100644
--- a/docs/managing-instances.md
+++ b/docs/managing-instances.md
@@ -42,33 +42,41 @@ Each instance is displayed as a card showing:

1. Click the **"Create Instance"** button on the dashboard
-2. *Optional*: Click **"Import"** in the dialog header to load a previously exported configuration
-2. Enter a unique **Name** for your instance (only required field)
-3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
-4. **Choose Backend Type**:
- - **llama.cpp**: For GGUF models using llama-server
- - **MLX**: For MLX-optimized models (macOS only)
+2. *Optional*: Click **"Import"** to load a previously exported configuration
+
+**Instance Settings:**
+
+3. Enter a unique **Instance Name** (required)
+4. **Select Node**: Choose which node to deploy the instance to
+5. Configure **Auto Restart** settings:
+ - Enable automatic restart on failure
+ - Set max restarts and delay between attempts
+6. Configure basic instance options:
+ - **Idle Timeout**: Minutes before stopping idle instance
+ - **On Demand Start**: Start instance only when needed
+
+**Backend Configuration:**
+
+7. **Select Backend Type**:
+ - **Llama Server**: For GGUF models using llama-server
+ - **MLX LM**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
-5. Configure model source:
- - **For llama.cpp**: GGUF model path or HuggingFace repo
- - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
-6. Configure optional instance management settings:
- - **Auto Restart**: Automatically restart instance on failure
- - **Max Restarts**: Maximum number of restart attempts
- - **Restart Delay**: Delay in seconds between restart attempts
- - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- - **Environment Variables**: Set custom environment variables for the instance process
-7. Configure backend-specific options:
- - **llama.cpp**: Threads, context size, GPU layers, port, etc.
- - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
+8. *Optional*: Click **"Parse Command"** to import settings from an existing backend command
+9. Configure **Execution Context**:
+ - **Enable Docker**: Run backend in Docker container
+ - **Command Override**: Custom path to backend executable
+ - **Environment Variables**: Custom environment variables
!!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
-
-8. Click **"Create"** to save the instance
+
+10. Configure **Basic Backend Options** (varies by backend):
+ - **llama.cpp**: Model path, threads, context size, GPU layers, etc.
+ - **MLX**: Model identifier, temperature, max tokens, etc.
+ - **vLLM**: Model identifier, tensor parallel size, GPU memory utilization, etc.
+11. *Optional*: Expand **Advanced Backend Options** for additional settings
+12. *Optional*: Add **Extra Args** as key-value pairs for custom command-line arguments
+13. Click **"Create"** to save the instance
**Via API**
@@ -83,11 +91,34 @@ curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096,
- "gpu_layers": 32
+ "gpu_layers": 32,
+ "flash_attn": "on"
},
+ "auto_restart": true,
+ "max_restarts": 3,
+ "docker_enabled": false,
+ "command_override": "/opt/llama-server-dev",
"nodes": ["main"]
}'
+# Create vLLM instance with environment variables
+curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer " \
+ -d '{
+ "backend_type": "vllm",
+ "backend_options": {
+ "model": "microsoft/DialoGPT-medium",
+ "tensor_parallel_size": 2,
+ "gpu_memory_utilization": 0.9
+ },
+ "on_demand_start": true,
+ "environment": {
+ "CUDA_VISIBLE_DEVICES": "0,1"
+ },
+ "nodes": ["worker1", "worker2"]
+ }'
+
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
-H "Content-Type: application/json" \
@@ -97,74 +128,10 @@ curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
- "top_p": 0.9,
"max_tokens": 2048
},
- "auto_restart": true,
- "max_restarts": 3,
"nodes": ["main"]
}'
-
-# Create vLLM instance
-curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer " \
- -d '{
- "backend_type": "vllm",
- "backend_options": {
- "model": "microsoft/DialoGPT-medium",
- "tensor_parallel_size": 2,
- "gpu_memory_utilization": 0.9
- },
- "auto_restart": true,
- "on_demand_start": true,
- "environment": {
- "CUDA_VISIBLE_DEVICES": "0,1",
- "NCCL_DEBUG": "INFO",
- "PYTHONPATH": "/custom/path"
- },
- "nodes": ["main"]
- }'
-
-# Create llama.cpp instance with HuggingFace model
-curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer " \
- -d '{
- "backend_type": "llama_cpp",
- "backend_options": {
- "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
- "hf_file": "gemma-3-27b-it-GGUF.gguf",
- "gpu_layers": 32
- },
- "nodes": ["main"]
- }'
-
-# Create instance on specific remote node
-curl -X POST http://localhost:8080/api/v1/instances/remote-llama \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer " \
- -d '{
- "backend_type": "llama_cpp",
- "backend_options": {
- "model": "/models/llama-7b.gguf",
- "gpu_layers": 32
- },
- "nodes": ["worker1"]
- }'
-
-# Create instance on multiple nodes for high availability
-curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \
- -H "Content-Type: application/json" \
- -H "Authorization: Bearer " \
- -d '{
- "backend_type": "llama_cpp",
- "backend_options": {
- "model": "/models/llama-7b.gguf",
- "gpu_layers": 32
- },
- "nodes": ["worker1", "worker2", "worker3"]
- }'
```
## Start Instance
diff --git a/docs/swagger.json b/docs/swagger.json
index 26f9662..f79a008 100644
--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -249,6 +249,34 @@
}
}
},
+ "/api/v1/config": {
+ "get": {
+ "security": [
+ {
+ "ApiKeyAuth": []
+ }
+ ],
+ "description": "Returns the current server configuration (sanitized)",
+ "tags": [
+ "System"
+ ],
+ "summary": "Get server configuration",
+ "responses": {
+ "200": {
+ "description": "Sanitized configuration",
+ "schema": {
+ "$ref": "#/definitions/config.AppConfig"
+ }
+ },
+ "500": {
+ "description": "Internal Server Error",
+ "schema": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
"/api/v1/instances": {
"get": {
"security": [
@@ -1468,6 +1496,247 @@
}
},
"definitions": {
+ "config.AppConfig": {
+ "type": "object",
+ "properties": {
+ "auth": {
+ "$ref": "#/definitions/config.AuthConfig"
+ },
+ "backends": {
+ "$ref": "#/definitions/config.BackendConfig"
+ },
+ "build_time": {
+ "type": "string"
+ },
+ "commit_hash": {
+ "type": "string"
+ },
+ "instances": {
+ "$ref": "#/definitions/config.InstancesConfig"
+ },
+ "local_node": {
+ "type": "string"
+ },
+ "nodes": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/definitions/config.NodeConfig"
+ }
+ },
+ "server": {
+ "$ref": "#/definitions/config.ServerConfig"
+ },
+ "version": {
+ "type": "string"
+ }
+ }
+ },
+ "config.AuthConfig": {
+ "type": "object",
+ "properties": {
+ "inference_keys": {
+ "description": "List of keys for OpenAI compatible inference endpoints",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "management_keys": {
+ "description": "List of keys for management endpoints",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "require_inference_auth": {
+ "description": "Require authentication for OpenAI compatible inference endpoints",
+ "type": "boolean"
+ },
+ "require_management_auth": {
+ "description": "Require authentication for management endpoints",
+ "type": "boolean"
+ }
+ }
+ },
+ "config.BackendConfig": {
+ "type": "object",
+ "properties": {
+ "llama-cpp": {
+ "$ref": "#/definitions/config.BackendSettings"
+ },
+ "mlx": {
+ "$ref": "#/definitions/config.BackendSettings"
+ },
+ "vllm": {
+ "$ref": "#/definitions/config.BackendSettings"
+ }
+ }
+ },
+ "config.BackendSettings": {
+ "type": "object",
+ "properties": {
+ "args": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "command": {
+ "type": "string"
+ },
+ "docker": {
+ "$ref": "#/definitions/config.DockerSettings"
+ },
+ "environment": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "response_headers": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "config.DockerSettings": {
+ "type": "object",
+ "properties": {
+ "args": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "enabled": {
+ "type": "boolean"
+ },
+ "environment": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ },
+ "image": {
+ "type": "string"
+ }
+ }
+ },
+ "config.InstancesConfig": {
+ "type": "object",
+ "properties": {
+ "auto_create_dirs": {
+ "description": "Automatically create the data directory if it doesn't exist",
+ "type": "boolean"
+ },
+ "configs_dir": {
+ "description": "Instance config directory override",
+ "type": "string"
+ },
+ "data_dir": {
+ "description": "Directory where all llamactl data will be stored (instances.json, logs, etc.)",
+ "type": "string"
+ },
+ "default_auto_restart": {
+ "description": "Default auto-restart setting for new instances",
+ "type": "boolean"
+ },
+ "default_max_restarts": {
+ "description": "Default max restarts for new instances",
+ "type": "integer"
+ },
+ "default_on_demand_start": {
+ "description": "Default on-demand start setting for new instances",
+ "type": "boolean"
+ },
+ "default_restart_delay": {
+ "description": "Default restart delay for new instances (in seconds)",
+ "type": "integer"
+ },
+ "enable_lru_eviction": {
+ "description": "Enable LRU eviction for instance logs",
+ "type": "boolean"
+ },
+ "logs_dir": {
+ "description": "Logs directory override",
+ "type": "string"
+ },
+ "max_instances": {
+ "description": "Maximum number of instances that can be created",
+ "type": "integer"
+ },
+ "max_running_instances": {
+ "description": "Maximum number of instances that can be running at the same time",
+ "type": "integer"
+ },
+ "on_demand_start_timeout": {
+ "description": "How long to wait for an instance to start on demand (in seconds)",
+ "type": "integer"
+ },
+ "port_range": {
+ "description": "Port range for instances (e.g., 8000,9000)",
+ "type": "array",
+ "items": {
+ "type": "integer"
+ }
+ },
+ "timeout_check_interval": {
+ "description": "Interval for checking instance timeouts (in minutes)",
+ "type": "integer"
+ }
+ }
+ },
+ "config.NodeConfig": {
+ "type": "object",
+ "properties": {
+ "address": {
+ "type": "string"
+ },
+ "api_key": {
+ "type": "string"
+ }
+ }
+ },
+ "config.ServerConfig": {
+ "type": "object",
+ "properties": {
+ "allowed_headers": {
+ "description": "Allowed headers for CORS (e.g., \"Accept\", \"Authorization\", \"Content-Type\", \"X-CSRF-Token\")",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "allowed_origins": {
+ "description": "Allowed origins for CORS (e.g., \"http://localhost:3000\")",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "enable_swagger": {
+ "description": "Enable Swagger UI for API documentation",
+ "type": "boolean"
+ },
+ "host": {
+ "description": "Server host to bind to",
+ "type": "string"
+ },
+ "port": {
+ "description": "Server port to bind to",
+ "type": "integer"
+ },
+ "response_headers": {
+ "description": "Response headers to send with responses",
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
+ }
+ }
+ }
+ },
"instance.Instance": {
"type": "object",
"properties": {
@@ -1487,6 +1756,13 @@
"description": "Auto restart",
"type": "boolean"
},
+ "command_override": {
+ "type": "string"
+ },
+ "docker_enabled": {
+ "description": "Execution context overrides",
+ "type": "boolean"
+ },
"environment": {
"description": "Environment variables",
"type": "object",
diff --git a/docs/swagger.yaml b/docs/swagger.yaml
index 7506036..2888ce1 100644
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -1,5 +1,173 @@
basePath: /api/v1
definitions:
+ config.AppConfig:
+ properties:
+ auth:
+ $ref: '#/definitions/config.AuthConfig'
+ backends:
+ $ref: '#/definitions/config.BackendConfig'
+ build_time:
+ type: string
+ commit_hash:
+ type: string
+ instances:
+ $ref: '#/definitions/config.InstancesConfig'
+ local_node:
+ type: string
+ nodes:
+ additionalProperties:
+ $ref: '#/definitions/config.NodeConfig'
+ type: object
+ server:
+ $ref: '#/definitions/config.ServerConfig'
+ version:
+ type: string
+ type: object
+ config.AuthConfig:
+ properties:
+ inference_keys:
+ description: List of keys for OpenAI compatible inference endpoints
+ items:
+ type: string
+ type: array
+ management_keys:
+ description: List of keys for management endpoints
+ items:
+ type: string
+ type: array
+ require_inference_auth:
+ description: Require authentication for OpenAI compatible inference endpoints
+ type: boolean
+ require_management_auth:
+ description: Require authentication for management endpoints
+ type: boolean
+ type: object
+ config.BackendConfig:
+ properties:
+ llama-cpp:
+ $ref: '#/definitions/config.BackendSettings'
+ mlx:
+ $ref: '#/definitions/config.BackendSettings'
+ vllm:
+ $ref: '#/definitions/config.BackendSettings'
+ type: object
+ config.BackendSettings:
+ properties:
+ args:
+ items:
+ type: string
+ type: array
+ command:
+ type: string
+ docker:
+ $ref: '#/definitions/config.DockerSettings'
+ environment:
+ additionalProperties:
+ type: string
+ type: object
+ response_headers:
+ additionalProperties:
+ type: string
+ type: object
+ type: object
+ config.DockerSettings:
+ properties:
+ args:
+ items:
+ type: string
+ type: array
+ enabled:
+ type: boolean
+ environment:
+ additionalProperties:
+ type: string
+ type: object
+ image:
+ type: string
+ type: object
+ config.InstancesConfig:
+ properties:
+ auto_create_dirs:
+ description: Automatically create the data directory if it doesn't exist
+ type: boolean
+ configs_dir:
+ description: Instance config directory override
+ type: string
+ data_dir:
+ description: Directory where all llamactl data will be stored (instances.json,
+ logs, etc.)
+ type: string
+ default_auto_restart:
+ description: Default auto-restart setting for new instances
+ type: boolean
+ default_max_restarts:
+ description: Default max restarts for new instances
+ type: integer
+ default_on_demand_start:
+ description: Default on-demand start setting for new instances
+ type: boolean
+ default_restart_delay:
+ description: Default restart delay for new instances (in seconds)
+ type: integer
+ enable_lru_eviction:
+ description: Enable LRU eviction for instance logs
+ type: boolean
+ logs_dir:
+ description: Logs directory override
+ type: string
+ max_instances:
+ description: Maximum number of instances that can be created
+ type: integer
+ max_running_instances:
+ description: Maximum number of instances that can be running at the same time
+ type: integer
+ on_demand_start_timeout:
+ description: How long to wait for an instance to start on demand (in seconds)
+ type: integer
+ port_range:
+ description: Port range for instances (e.g., 8000,9000)
+ items:
+ type: integer
+ type: array
+ timeout_check_interval:
+ description: Interval for checking instance timeouts (in minutes)
+ type: integer
+ type: object
+ config.NodeConfig:
+ properties:
+ address:
+ type: string
+ api_key:
+ type: string
+ type: object
+ config.ServerConfig:
+ properties:
+ allowed_headers:
+ description: Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type",
+ "X-CSRF-Token")
+ items:
+ type: string
+ type: array
+ allowed_origins:
+ description: Allowed origins for CORS (e.g., "http://localhost:3000")
+ items:
+ type: string
+ type: array
+ enable_swagger:
+ description: Enable Swagger UI for API documentation
+ type: boolean
+ host:
+ description: Server host to bind to
+ type: string
+ port:
+ description: Server port to bind to
+ type: integer
+ response_headers:
+ additionalProperties:
+ type: string
+ description: Response headers to send with responses
+ type: object
+ type: object
instance.Instance:
properties:
created:
@@ -13,6 +181,11 @@ definitions:
auto_restart:
description: Auto restart
type: boolean
+ command_override:
+ type: string
+ docker_enabled:
+ description: Execution context overrides
+ type: boolean
environment:
additionalProperties:
type: string
@@ -216,6 +389,23 @@ paths:
summary: Parse vllm serve command
tags:
- Backends
+ /api/v1/config:
+ get:
+ description: Returns the current server configuration (sanitized)
+ responses:
+ "200":
+ description: Sanitized configuration
+ schema:
+ $ref: '#/definitions/config.AppConfig'
+ "500":
+ description: Internal Server Error
+ schema:
+ type: string
+ security:
+ - ApiKeyAuth: []
+ summary: Get server configuration
+ tags:
+ - System
/api/v1/instances:
get:
description: Returns a list of all instances managed by the server
diff --git a/pkg/backends/backend.go b/pkg/backends/backend.go
index 022e778..ad138a5 100644
--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -79,14 +79,8 @@ func (o *Options) UnmarshalJSON(data []byte) error {
}
func (o *Options) MarshalJSON() ([]byte, error) {
- type Alias Options
- aux := &struct {
- *Alias
- }{
- Alias: (*Alias)(o),
- }
-
// Get backend and marshal it
+ var backendOptions map[string]any
backend := o.getBackend()
if backend != nil {
optionsData, err := json.Marshal(backend)
@@ -94,13 +88,19 @@ func (o *Options) MarshalJSON() ([]byte, error) {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
// Create a new map to avoid concurrent map writes
- aux.BackendOptions = make(map[string]any)
- if err := json.Unmarshal(optionsData, &aux.BackendOptions); err != nil {
+ backendOptions = make(map[string]any)
+ if err := json.Unmarshal(optionsData, &backendOptions); err != nil {
return nil, fmt.Errorf("failed to unmarshal backend options to map: %w", err)
}
}
- return json.Marshal(aux)
+ return json.Marshal(&struct {
+ BackendType BackendType `json:"backend_type"`
+ BackendOptions map[string]any `json:"backend_options,omitempty"`
+ }{
+ BackendType: o.BackendType,
+ BackendOptions: backendOptions,
+ })
}
// setBackendOptions stores the backend in the appropriate typed field
@@ -142,32 +142,54 @@ func (o *Options) getBackend() backend {
}
}
-func (o *Options) isDockerEnabled(backend *config.BackendSettings) bool {
- if backend.Docker != nil && backend.Docker.Enabled && o.BackendType != BackendTypeMlxLm {
- return true
+// isDockerEnabled checks if Docker is enabled with an optional override
+func (o *Options) isDockerEnabled(backend *config.BackendSettings, dockerEnabledOverride *bool) bool {
+ // Check if backend supports Docker
+ if backend.Docker == nil {
+ return false
}
- return false
+
+ // MLX doesn't support Docker
+ if o.BackendType == BackendTypeMlxLm {
+ return false
+ }
+
+ // Check for instance-level override
+ if dockerEnabledOverride != nil {
+ return *dockerEnabledOverride
+ }
+
+ // Fall back to config value
+ return backend.Docker.Enabled
}
-func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig) bool {
+func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig, dockerEnabled *bool) bool {
backendSettings := o.getBackendSettings(backendConfig)
- return o.isDockerEnabled(backendSettings)
+ return o.isDockerEnabled(backendSettings, dockerEnabled)
}
// GetCommand builds the command to run the backend
-func (o *Options) GetCommand(backendConfig *config.BackendConfig) string {
-
+func (o *Options) GetCommand(backendConfig *config.BackendConfig, dockerEnabled *bool, commandOverride string) string {
backendSettings := o.getBackendSettings(backendConfig)
- if o.isDockerEnabled(backendSettings) {
+ // Determine if Docker is enabled
+ useDocker := o.isDockerEnabled(backendSettings, dockerEnabled)
+
+ if useDocker {
return "docker"
}
+ // Check for command override (only applies when not in Docker mode)
+ if commandOverride != "" {
+ return commandOverride
+ }
+
+ // Fall back to config command
return backendSettings.Command
}
// buildCommandArgs builds command line arguments for the backend
-func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string {
+func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig, dockerEnabled *bool) []string {
var args []string
@@ -177,7 +199,7 @@ func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string
return args
}
- if o.isDockerEnabled(backendSettings) {
+ if o.isDockerEnabled(backendSettings, dockerEnabled) {
// For Docker, start with Docker args
args = append(args, backendSettings.Docker.Args...)
args = append(args, backendSettings.Docker.Image)
@@ -193,7 +215,7 @@ func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string
}
// BuildEnvironment builds the environment variables for the backend process
-func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environment map[string]string) map[string]string {
+func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, dockerEnabled *bool, environment map[string]string) map[string]string {
backendSettings := o.getBackendSettings(backendConfig)
env := map[string]string{}
@@ -202,7 +224,7 @@ func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environm
maps.Copy(env, backendSettings.Environment)
}
- if o.isDockerEnabled(backendSettings) {
+ if o.isDockerEnabled(backendSettings, dockerEnabled) {
if backendSettings.Docker.Environment != nil {
maps.Copy(env, backendSettings.Docker.Environment)
}
diff --git a/pkg/backends/llama_test.go b/pkg/backends/llama_test.go
index 961967b..4440092 100644
--- a/pkg/backends/llama_test.go
+++ b/pkg/backends/llama_test.go
@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
+ "llamactl/pkg/config"
"llamactl/pkg/testutil"
"reflect"
"testing"
@@ -549,3 +550,79 @@ func TestParseLlamaCommand_ExtraArgs(t *testing.T) {
})
}
}
+func TestLlamaCppGetCommand_WithOverrides(t *testing.T) {
+ tests := []struct {
+ name string
+ dockerInConfig bool
+ dockerEnabled *bool
+ commandOverride string
+ expected string
+ }{
+ {
+ name: "no overrides - use config command",
+ dockerInConfig: false,
+ dockerEnabled: nil,
+ commandOverride: "",
+ expected: "/usr/bin/llama-server",
+ },
+ {
+ name: "override to enable docker",
+ dockerInConfig: false,
+ dockerEnabled: boolPtr(true),
+ commandOverride: "",
+ expected: "docker",
+ },
+ {
+ name: "override to disable docker",
+ dockerInConfig: true,
+ dockerEnabled: boolPtr(false),
+ commandOverride: "",
+ expected: "/usr/bin/llama-server",
+ },
+ {
+ name: "command override",
+ dockerInConfig: false,
+ dockerEnabled: nil,
+ commandOverride: "/custom/llama-server",
+ expected: "/custom/llama-server",
+ },
+ {
+ name: "docker takes precedence over command override",
+ dockerInConfig: false,
+ dockerEnabled: boolPtr(true),
+ commandOverride: "/custom/llama-server",
+ expected: "docker",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ backendConfig := &config.BackendConfig{
+ LlamaCpp: config.BackendSettings{
+ Command: "/usr/bin/llama-server",
+ Docker: &config.DockerSettings{
+ Enabled: tt.dockerInConfig,
+ Image: "test-image",
+ },
+ },
+ }
+
+ opts := backends.Options{
+ BackendType: backends.BackendTypeLlamaCpp,
+ LlamaServerOptions: &backends.LlamaServerOptions{
+ Model: "test-model.gguf",
+ },
+ }
+
+ result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride)
+ if result != tt.expected {
+ t.Errorf("GetCommand() = %v, want %v", result, tt.expected)
+ }
+ })
+ }
+}
+
+// Helper function to create bool pointer
+func boolPtr(b bool) *bool {
+ return &b
+}
diff --git a/pkg/backends/mlx_test.go b/pkg/backends/mlx_test.go
index f8a2ee5..f24d1a5 100644
--- a/pkg/backends/mlx_test.go
+++ b/pkg/backends/mlx_test.go
@@ -2,6 +2,7 @@ package backends_test
import (
"llamactl/pkg/backends"
+ "llamactl/pkg/config"
"llamactl/pkg/testutil"
"testing"
)
@@ -274,3 +275,57 @@ func TestParseMlxCommand_ExtraArgs(t *testing.T) {
})
}
}
+func TestMlxGetCommand_NoDocker(t *testing.T) {
+ // MLX backend should never use Docker
+ backendConfig := &config.BackendConfig{
+ MLX: config.BackendSettings{
+ Command: "/usr/bin/mlx-server",
+ Docker: &config.DockerSettings{
+ Enabled: true, // Even if enabled in config
+ Image: "test-image",
+ },
+ },
+ }
+
+ opts := backends.Options{
+ BackendType: backends.BackendTypeMlxLm,
+ MlxServerOptions: &backends.MlxServerOptions{
+ Model: "test-model",
+ },
+ }
+
+ tests := []struct {
+ name string
+ dockerEnabled *bool
+ commandOverride string
+ expected string
+ }{
+ {
+ name: "ignores docker in config",
+ dockerEnabled: nil,
+ commandOverride: "",
+ expected: "/usr/bin/mlx-server",
+ },
+ {
+ name: "ignores docker override",
+ dockerEnabled: boolPtr(true),
+ commandOverride: "",
+ expected: "/usr/bin/mlx-server",
+ },
+ {
+ name: "respects command override",
+ dockerEnabled: nil,
+ commandOverride: "/custom/mlx-server",
+ expected: "/custom/mlx-server",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride)
+ if result != tt.expected {
+ t.Errorf("GetCommand() = %v, want %v", result, tt.expected)
+ }
+ })
+ }
+}
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 6df9e42..5f85f20 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -1,6 +1,7 @@
package config
import (
+ "encoding/json"
"fmt"
"log"
"os"
@@ -14,126 +15,126 @@ import (
// BackendSettings contains structured backend configuration
type BackendSettings struct {
- Command string `yaml:"command"`
- Args []string `yaml:"args"`
- Environment map[string]string `yaml:"environment,omitempty"`
- Docker *DockerSettings `yaml:"docker,omitempty"`
- ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
+ Command string `yaml:"command" json:"command"`
+ Args []string `yaml:"args" json:"args"`
+ Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
+ Docker *DockerSettings `yaml:"docker,omitempty" json:"docker,omitempty"`
+ ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
- Enabled bool `yaml:"enabled"`
- Image string `yaml:"image"`
- Args []string `yaml:"args"`
- Environment map[string]string `yaml:"environment,omitempty"`
+ Enabled bool `yaml:"enabled" json:"enabled"`
+ Image string `yaml:"image" json:"image"`
+ Args []string `yaml:"args" json:"args"`
+ Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
- LlamaCpp BackendSettings `yaml:"llama-cpp"`
- VLLM BackendSettings `yaml:"vllm"`
- MLX BackendSettings `yaml:"mlx"`
+ LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
+ VLLM BackendSettings `yaml:"vllm" json:"vllm"`
+ MLX BackendSettings `yaml:"mlx" json:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
- Server ServerConfig `yaml:"server"`
- Backends BackendConfig `yaml:"backends"`
- Instances InstancesConfig `yaml:"instances"`
- Auth AuthConfig `yaml:"auth"`
- LocalNode string `yaml:"local_node,omitempty"`
- Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
- Version string `yaml:"-"`
- CommitHash string `yaml:"-"`
- BuildTime string `yaml:"-"`
+ Server ServerConfig `yaml:"server" json:"server"`
+ Backends BackendConfig `yaml:"backends" json:"backends"`
+ Instances InstancesConfig `yaml:"instances" json:"instances"`
+ Auth AuthConfig `yaml:"auth" json:"auth"`
+ LocalNode string `yaml:"local_node,omitempty" json:"local_node,omitempty"`
+ Nodes map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
+ Version string `yaml:"-" json:"version"`
+ CommitHash string `yaml:"-" json:"commit_hash"`
+ BuildTime string `yaml:"-" json:"build_time"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
- Host string `yaml:"host"`
+ Host string `yaml:"host" json:"host"`
// Server port to bind to
- Port int `yaml:"port"`
+ Port int `yaml:"port" json:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
- AllowedOrigins []string `yaml:"allowed_origins"`
+ AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
- AllowedHeaders []string `yaml:"allowed_headers"`
+ AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
// Enable Swagger UI for API documentation
- EnableSwagger bool `yaml:"enable_swagger"`
+ EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
// Response headers to send with responses
- ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
+ ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
- PortRange [2]int `yaml:"port_range"`
+ PortRange [2]int `yaml:"port_range" json:"port_range"`
// Directory where all llamactl data will be stored (instances.json, logs, etc.)
- DataDir string `yaml:"data_dir"`
+ DataDir string `yaml:"data_dir" json:"data_dir"`
// Instance config directory override
- InstancesDir string `yaml:"configs_dir"`
+ InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
// Logs directory override
- LogsDir string `yaml:"logs_dir"`
+ LogsDir string `yaml:"logs_dir" json:"logs_dir"`
// Automatically create the data directory if it doesn't exist
- AutoCreateDirs bool `yaml:"auto_create_dirs"`
+ AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
// Maximum number of instances that can be created
- MaxInstances int `yaml:"max_instances"`
+ MaxInstances int `yaml:"max_instances" json:"max_instances"`
// Maximum number of instances that can be running at the same time
- MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
+ MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
- EnableLRUEviction bool `yaml:"enable_lru_eviction"`
+ EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
// Default auto-restart setting for new instances
- DefaultAutoRestart bool `yaml:"default_auto_restart"`
+ DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
// Default max restarts for new instances
- DefaultMaxRestarts int `yaml:"default_max_restarts"`
+ DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
- DefaultRestartDelay int `yaml:"default_restart_delay"`
+ DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
// Default on-demand start setting for new instances
- DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
+ DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
- OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
+ OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
- TimeoutCheckInterval int `yaml:"timeout_check_interval"`
+ TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
- RequireInferenceAuth bool `yaml:"require_inference_auth"`
+ RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
- InferenceKeys []string `yaml:"inference_keys"`
+ InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
// Require authentication for management endpoints
- RequireManagementAuth bool `yaml:"require_management_auth"`
+ RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
// List of keys for management endpoints
- ManagementKeys []string `yaml:"management_keys"`
+ ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
}
type NodeConfig struct {
- Address string `yaml:"address"`
- APIKey string `yaml:"api_key,omitempty"`
+ Address string `yaml:"address" json:"address"`
+ APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence:
@@ -610,3 +611,31 @@ func getDefaultConfigLocations() []string {
return locations
}
+
+// SanitizedCopy returns a copy of the AppConfig with sensitive information removed
+func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
+ // Deep copy via JSON marshal/unmarshal to avoid concurrent map access
+ data, err := json.Marshal(cfg)
+ if err != nil {
+ log.Printf("Failed to marshal config for sanitization: %v", err)
+ return AppConfig{}, err
+ }
+
+ var sanitized AppConfig
+ if err := json.Unmarshal(data, &sanitized); err != nil {
+ log.Printf("Failed to unmarshal config for sanitization: %v", err)
+ return AppConfig{}, err
+ }
+
+ // Clear sensitive information
+ sanitized.Auth.InferenceKeys = []string{}
+ sanitized.Auth.ManagementKeys = []string{}
+
+ // Clear API keys from nodes
+ for nodeName, node := range sanitized.Nodes {
+ node.APIKey = ""
+ sanitized.Nodes[nodeName] = node
+ }
+
+ return sanitized, nil
+}
diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go
index 5e5dc27..376cc0c 100644
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -255,7 +255,7 @@ func (i *Instance) getCommand() string {
return ""
}
- return opts.BackendOptions.GetCommand(i.globalBackendSettings)
+ return opts.BackendOptions.GetCommand(i.globalBackendSettings, opts.DockerEnabled, opts.CommandOverride)
}
func (i *Instance) buildCommandArgs() []string {
@@ -264,7 +264,7 @@ func (i *Instance) buildCommandArgs() []string {
return nil
}
- return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings)
+ return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings, opts.DockerEnabled)
}
func (i *Instance) buildEnvironment() map[string]string {
@@ -273,29 +273,21 @@ func (i *Instance) buildEnvironment() map[string]string {
return nil
}
- return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.Environment)
+ return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.DockerEnabled, opts.Environment)
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Instance) MarshalJSON() ([]byte, error) {
- // Get options
- opts := i.GetOptions()
-
- // Determine if docker is enabled for this instance's backend
- dockerEnabled := opts.BackendOptions.IsDockerEnabled(i.globalBackendSettings)
-
return json.Marshal(&struct {
- Name string `json:"name"`
- Status *status `json:"status"`
- Created int64 `json:"created,omitempty"`
- Options *options `json:"options,omitempty"`
- DockerEnabled bool `json:"docker_enabled,omitempty"`
+ Name string `json:"name"`
+ Status *status `json:"status"`
+ Created int64 `json:"created,omitempty"`
+ Options *options `json:"options,omitempty"`
}{
- Name: i.Name,
- Status: i.status,
- Created: i.Created,
- Options: i.options,
- DockerEnabled: dockerEnabled,
+ Name: i.Name,
+ Status: i.status,
+ Created: i.Created,
+ Options: i.options,
})
}
diff --git a/pkg/instance/options.go b/pkg/instance/options.go
index 0c4b582..57a3ce9 100644
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -5,7 +5,9 @@ import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/config"
+ "llamactl/pkg/validation"
"log"
+ "maps"
"slices"
"sync"
)
@@ -22,6 +24,11 @@ type Options struct {
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
// Environment variables
Environment map[string]string `json:"environment,omitempty"`
+
+ // Execution context overrides
+ DockerEnabled *bool `json:"docker_enabled,omitempty"`
+ CommandOverride string `json:"command_override,omitempty"`
+
// Assigned nodes
Nodes map[string]struct{} `json:"-"`
// Backend options
@@ -138,15 +145,25 @@ func (c *Options) UnmarshalJSON(data []byte) error {
// MarshalJSON implements custom JSON marshaling for Options
func (c *Options) MarshalJSON() ([]byte, error) {
- // Use anonymous struct to avoid recursion
type Alias Options
- aux := struct {
+
+ // Make a copy of the struct
+ temp := *c
+
+ // Copy environment map to avoid concurrent access issues
+ if temp.Environment != nil {
+ envCopy := make(map[string]string, len(temp.Environment))
+ maps.Copy(envCopy, temp.Environment)
+ temp.Environment = envCopy
+ }
+
+ aux := &struct {
Nodes []string `json:"nodes,omitempty"` // Output as JSON array
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
*Alias
}{
- Alias: (*Alias)(c),
+ Alias: (*Alias)(&temp),
}
// Convert nodes map to array (sorted for consistency)
@@ -163,13 +180,12 @@ func (c *Options) MarshalJSON() ([]byte, error) {
aux.BackendType = c.BackendOptions.BackendType
// Marshal the backends.Options struct to get the properly formatted backend options
- // Marshal a pointer to trigger the pointer receiver MarshalJSON method
backendData, err := json.Marshal(&c.BackendOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
- // Unmarshal into a temporary struct to extract the backend_options map
+ // Unmarshal into a new temporary map to extract the backend_options
var tempBackend struct {
BackendOptions map[string]any `json:"backend_options,omitempty"`
}
@@ -200,6 +216,28 @@ func (c *Options) validateAndApplyDefaults(name string, globalSettings *config.I
*c.IdleTimeout = 0
}
+ // Validate docker_enabled and command_override relationship
+ if c.DockerEnabled != nil && *c.DockerEnabled && c.CommandOverride != "" {
+ log.Printf("Instance %s: command_override cannot be set when docker_enabled is true, ignoring command_override", name)
+ c.CommandOverride = "" // Clear invalid configuration
+ }
+
+ // Validate command_override if set
+ if c.CommandOverride != "" {
+ if err := validation.ValidateStringForInjection(c.CommandOverride); err != nil {
+ log.Printf("Instance %s: invalid command_override: %v, clearing value", name, err)
+ c.CommandOverride = "" // Clear invalid value
+ }
+ }
+
+ // Validate docker_enabled for MLX backend
+ if c.BackendOptions.BackendType == backends.BackendTypeMlxLm {
+ if c.DockerEnabled != nil && *c.DockerEnabled {
+ log.Printf("Instance %s: docker_enabled is not supported for MLX backend, ignoring", name)
+ c.DockerEnabled = nil // Clear invalid configuration
+ }
+ }
+
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
diff --git a/pkg/server/handlers_system.go b/pkg/server/handlers_system.go
index 46410f3..0af346c 100644
--- a/pkg/server/handlers_system.go
+++ b/pkg/server/handlers_system.go
@@ -20,3 +20,23 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
writeText(w, http.StatusOK, versionInfo)
}
}
+
+// ConfigHandler godoc
+// @Summary Get server configuration
+// @Description Returns the current server configuration (sanitized)
+// @Tags System
+// @Security ApiKeyAuth
+// @Produces application/json
+// @Success 200 {object} config.AppConfig "Sanitized configuration"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/config [get]
+func (h *Handler) ConfigHandler() http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ sanitizedConfig, err := h.cfg.SanitizedCopy()
+ if err != nil {
+ writeError(w, http.StatusInternalServerError, "sanitized_copy_error", "Failed to get sanitized config")
+ return
+ }
+ writeJSON(w, http.StatusOK, sanitizedConfig)
+ }
+}
diff --git a/pkg/server/routes.go b/pkg/server/routes.go
index 618dbc0..b159968 100644
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -42,7 +42,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
}
- r.Get("/version", handler.VersionHandler()) // Get server version
+ r.Get("/version", handler.VersionHandler())
+
+ r.Get("/config", handler.ConfigHandler())
// Backend-specific endpoints
r.Route("/backends", func(r chi.Router) {
diff --git a/webui/src/__tests__/App.test.tsx b/webui/src/__tests__/App.test.tsx
index 7497c31..eb212a4 100644
--- a/webui/src/__tests__/App.test.tsx
+++ b/webui/src/__tests__/App.test.tsx
@@ -4,8 +4,7 @@ import userEvent from '@testing-library/user-event'
import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api'
-import type { Instance } from '@/types/instance'
-import { BackendType } from '@/types/instance'
+import {BackendType, type Instance } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API
@@ -49,6 +48,21 @@ vi.mock('@/lib/healthService', () => ({
})),
}))
+// Mock the ConfigContext helper hooks
+vi.mock('@/hooks/useConfig', () => ({
+ useInstanceDefaults: () => ({
+ autoRestart: true,
+ maxRestarts: 3,
+ restartDelay: 5,
+ onDemandStart: false,
+ }),
+ useBackendSettings: () => ({
+ command: '/usr/bin/llama-server',
+ dockerEnabled: false,
+ dockerImage: '',
+ }),
+}))
+
function renderApp() {
return render(
@@ -119,8 +133,12 @@ describe('App Component - Critical Business Logic Only', () => {
// Verify correct API call
await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
- auto_restart: true, // Default value
- backend_type: BackendType.LLAMA_CPP
+ auto_restart: true, // Default value from config
+ backend_type: BackendType.LLAMA_CPP,
+ docker_enabled: false,
+ max_restarts: 3,
+ on_demand_start: false,
+ restart_delay: 5
})
})
diff --git a/webui/src/components/InstanceCard.tsx b/webui/src/components/InstanceCard.tsx
index 657f1ab..d889655 100644
--- a/webui/src/components/InstanceCard.tsx
+++ b/webui/src/components/InstanceCard.tsx
@@ -59,12 +59,8 @@ function InstanceCard({
// Fetch the most up-to-date instance data from the backend
const instanceData = await instancesApi.get(instance.name);
- // Remove docker_enabled as it's a computed field, not persisted to disk
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
- const { docker_enabled, ...persistedData } = instanceData;
-
// Convert to JSON string with pretty formatting (matching backend format)
- const jsonString = JSON.stringify(persistedData, null, 2);
+ const jsonString = JSON.stringify(instanceData, null, 2);
// Create a blob and download link
const blob = new Blob([jsonString], { type: "application/json" });
@@ -101,7 +97,7 @@ function InstanceCard({
{/* Badges row */}
-
+
{running && }
diff --git a/webui/src/components/InstanceDialog.tsx b/webui/src/components/InstanceDialog.tsx
index 25d48a3..638a45a 100644
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -14,6 +14,7 @@ import ParseCommandDialog from "@/components/ParseCommandDialog";
import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard";
import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard";
import { Upload } from "lucide-react";
+import { useInstanceDefaults, useBackendSettings } from "@/hooks/useConfig";
interface InstanceDialogProps {
open: boolean;
@@ -29,6 +30,7 @@ const InstanceDialog: React.FC = ({
instance,
}) => {
const isEditing = !!instance;
+ const instanceDefaults = useInstanceDefaults();
const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState({});
@@ -36,6 +38,10 @@ const InstanceDialog: React.FC = ({
const [showParseDialog, setShowParseDialog] = useState(false);
const fileInputRef = useRef(null);
+ // Get backend settings for all backends (we'll use this to update docker_enabled on backend type change)
+ const llamaCppSettings = useBackendSettings(BackendType.LLAMA_CPP);
+ const vllmSettings = useBackendSettings(BackendType.VLLM);
+ const mlxSettings = useBackendSettings(BackendType.MLX_LM);
// Reset form when dialog opens/closes or when instance changes
useEffect(() => {
@@ -45,25 +51,40 @@ const InstanceDialog: React.FC = ({
setInstanceName(instance.name);
setFormData(instance.options || {});
} else {
- // Reset form for new instance
+ // Reset form for new instance with defaults from config
setInstanceName("");
setFormData({
- auto_restart: true, // Default value
+ auto_restart: instanceDefaults?.autoRestart ?? true,
+ max_restarts: instanceDefaults?.maxRestarts,
+ restart_delay: instanceDefaults?.restartDelay,
+ on_demand_start: instanceDefaults?.onDemandStart,
backend_type: BackendType.LLAMA_CPP, // Default backend type
+ docker_enabled: llamaCppSettings?.dockerEnabled ?? false,
backend_options: {},
});
}
setNameError(""); // Reset any name errors
}
+ // eslint-disable-next-line react-hooks/exhaustive-deps
}, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: unknown) => {
setFormData((prev) => {
- // If backend_type is changing, clear backend_options
+ // If backend_type is changing, update docker_enabled default and clear backend_options
if (key === 'backend_type' && prev.backend_type !== value) {
+ let dockerEnabled = false;
+ if (value === BackendType.LLAMA_CPP) {
+ dockerEnabled = llamaCppSettings?.dockerEnabled ?? false;
+ } else if (value === BackendType.VLLM) {
+ dockerEnabled = vllmSettings?.dockerEnabled ?? false;
+ } else if (value === BackendType.MLX_LM) {
+ dockerEnabled = mlxSettings?.dockerEnabled ?? false;
+ }
+
return {
...prev,
backend_type: value as CreateInstanceOptions['backend_type'],
+ docker_enabled: dockerEnabled,
backend_options: {}, // Clear backend options when backend type changes
};
}
@@ -106,6 +127,14 @@ const InstanceDialog: React.FC = ({
return;
}
+ // Validate docker_enabled and command_override relationship
+ if (formData.backend_type !== BackendType.MLX_LM) {
+ if (formData.docker_enabled === true && formData.command_override) {
+ setNameError("Command override cannot be set when Docker is enabled");
+ return;
+ }
+ }
+
// Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {} as CreateInstanceOptions;
Object.entries(formData).forEach(([key, value]) => {
diff --git a/webui/src/components/__tests__/InstanceModal.test.tsx b/webui/src/components/__tests__/InstanceModal.test.tsx
index a931ae9..3b84d25 100644
--- a/webui/src/components/__tests__/InstanceModal.test.tsx
+++ b/webui/src/components/__tests__/InstanceModal.test.tsx
@@ -2,8 +2,22 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceDialog from '@/components/InstanceDialog'
-import type { Instance } from '@/types/instance'
-import { BackendType } from '@/types/instance'
+import { BackendType, type Instance } from '@/types/instance'
+
+// Mock the ConfigContext helper hooks
+vi.mock('@/hooks/useConfig', () => ({
+ useInstanceDefaults: () => ({
+ autoRestart: true,
+ maxRestarts: 3,
+ restartDelay: 5,
+ onDemandStart: false,
+ }),
+ useBackendSettings: () => ({
+ command: '/usr/bin/llama-server',
+ dockerEnabled: false,
+ dockerImage: '',
+ }),
+}))
describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn()
@@ -75,7 +89,7 @@ afterEach(() => {
it('submits form with correct data structure', async () => {
const user = userEvent.setup()
-
+
render(
{
// Fill required name
await user.type(screen.getByLabelText(/Instance Name/), 'my-instance')
-
+
// Submit form
await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
- auto_restart: true, // Default value
- backend_type: BackendType.LLAMA_CPP
+ auto_restart: true, // Default value from config
+ backend_type: BackendType.LLAMA_CPP,
+ docker_enabled: false,
+ max_restarts: 3,
+ on_demand_start: false,
+ restart_delay: 5
})
})
@@ -253,7 +271,7 @@ afterEach(() => {
it('includes restart options in form submission when enabled', async () => {
const user = userEvent.setup()
-
+
render(
{
// Fill form
await user.type(screen.getByLabelText(/Instance Name/), 'test-instance')
-
- // Set restart options
- await user.type(screen.getByLabelText(/Max Restarts/), '5')
- await user.type(screen.getByLabelText(/Restart Delay/), '10')
+
+ // Clear default values and set new restart options
+ const maxRestartsInput = screen.getByLabelText(/Max Restarts/)
+ const restartDelayInput = screen.getByLabelText(/Restart Delay/)
+ await user.clear(maxRestartsInput)
+ await user.type(maxRestartsInput, '5')
+ await user.clear(restartDelayInput)
+ await user.type(restartDelayInput, '10')
await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
+ docker_enabled: false,
max_restarts: 5,
+ on_demand_start: false,
restart_delay: 10
})
})
@@ -284,7 +308,7 @@ afterEach(() => {
describe('Form Data Handling', () => {
it('cleans up undefined values before submission', async () => {
const user = userEvent.setup()
-
+
render(
{
await user.click(screen.getByTestId('dialog-save-button'))
- // Should only include non-empty values
+ // Should include default values from config
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
- auto_restart: true, // Only this default value should be included
- backend_type: BackendType.LLAMA_CPP
+ auto_restart: true,
+ backend_type: BackendType.LLAMA_CPP,
+ docker_enabled: false,
+ max_restarts: 3,
+ on_demand_start: false,
+ restart_delay: 5
})
})
it('handles numeric fields correctly', async () => {
const user = userEvent.setup()
-
+
render(
{
)
await user.type(screen.getByLabelText(/Instance Name/), 'numeric-test')
-
+
// Test GPU layers field (numeric)
const gpuLayersInput = screen.getByLabelText(/GPU Layers/)
await user.type(gpuLayersInput, '15')
@@ -328,6 +356,10 @@ afterEach(() => {
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
+ docker_enabled: false,
+ max_restarts: 3,
+ on_demand_start: false,
+ restart_delay: 5
})
})
})
diff --git a/webui/src/components/form/EnvVarsInput.tsx b/webui/src/components/form/EnvVarsInput.tsx
index 476a98a..5c77433 100644
--- a/webui/src/components/form/EnvVarsInput.tsx
+++ b/webui/src/components/form/EnvVarsInput.tsx
@@ -18,7 +18,6 @@ const EnvVarsInput: React.FC = (props) => {
keyPlaceholder="Variable name"
valuePlaceholder="Variable value"
addButtonText="Add Variable"
- helperText="Environment variables that will be passed to the backend process"
allowEmptyValues={false}
/>
)
diff --git a/webui/src/components/instance/BackendConfigurationCard.tsx b/webui/src/components/instance/BackendConfigurationCard.tsx
index 799ea2b..00fa371 100644
--- a/webui/src/components/instance/BackendConfigurationCard.tsx
+++ b/webui/src/components/instance/BackendConfigurationCard.tsx
@@ -6,6 +6,7 @@ import { Terminal, ChevronDown, ChevronRight } from 'lucide-react'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
import SelectInput from '@/components/form/SelectInput'
+import ExecutionContextSection from '@/components/instance/ExecutionContextSection'
interface BackendConfigurationCardProps {
formData: CreateInstanceOptions
@@ -59,6 +60,12 @@ const BackendConfigurationCard: React.FC = ({
+ {/* Execution Context Section */}
+
+
{/* Basic Backend Options */}
{basicBackendFields.length > 0 && (
diff --git a/webui/src/components/instance/ExecutionContextSection.tsx b/webui/src/components/instance/ExecutionContextSection.tsx
new file mode 100644
index 0000000..cdd73cd
--- /dev/null
+++ b/webui/src/components/instance/ExecutionContextSection.tsx
@@ -0,0 +1,76 @@
+import React from 'react'
+import { BackendType, type CreateInstanceOptions } from '@/types/instance'
+import CheckboxInput from '@/components/form/CheckboxInput'
+import TextInput from '@/components/form/TextInput'
+import EnvVarsInput from '@/components/form/EnvVarsInput'
+import { useBackendSettings } from '@/hooks/useConfig'
+
+interface ExecutionContextSectionProps {
+ formData: CreateInstanceOptions
+ onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
+}
+
+const ExecutionContextSection: React.FC
= ({
+ formData,
+ onChange
+}) => {
+ const backendSettings = useBackendSettings(formData.backend_type)
+
+ // Get placeholder for command override based on backend type and config
+ const getCommandPlaceholder = () => {
+ if (backendSettings?.command) {
+ return backendSettings.command
+ }
+
+ // Fallback placeholders if config is not loaded
+ switch (formData.backend_type) {
+ case BackendType.LLAMA_CPP:
+ return "llama-server"
+ case BackendType.VLLM:
+ return "vllm"
+ case BackendType.MLX_LM:
+ return "mlx_lm.server"
+ default:
+ return ""
+ }
+ }
+
+ return (
+
+
Execution Context
+
+ {/* Docker Mode Toggle - only for backends that support Docker */}
+ {formData.backend_type !== BackendType.MLX_LM && (
+ onChange('docker_enabled', value)}
+ description="Run backend in Docker container"
+ />
+ )}
+
+ {/* Command Override - only shown when Docker is disabled or backend is MLX */}
+ {(formData.backend_type === BackendType.MLX_LM || formData.docker_enabled !== true) && (
+ onChange('command_override', value)}
+ placeholder={getCommandPlaceholder()}
+ description="Custom path to backend executable (leave empty to use config default)"
+ />
+ )}
+
+ onChange('environment', value)}
+ description="Custom environment variables for the instance"
+ />
+
+ )
+}
+
+export default ExecutionContextSection
diff --git a/webui/src/components/instance/InstanceSettingsCard.tsx b/webui/src/components/instance/InstanceSettingsCard.tsx
index 7b853cb..b5f945c 100644
--- a/webui/src/components/instance/InstanceSettingsCard.tsx
+++ b/webui/src/components/instance/InstanceSettingsCard.tsx
@@ -1,12 +1,11 @@
import React, { useState, useEffect } from 'react'
-import type { CreateInstanceOptions } from '@/types/instance'
+import { type CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
-import EnvVarsInput from '@/components/form/EnvVarsInput'
import SelectInput from '@/components/form/SelectInput'
import { nodesApi, type NodesMap } from '@/lib/api'
@@ -131,14 +130,6 @@ const InstanceSettingsCard: React.FC = ({
onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed"
/>
-
- onChange('environment', value)}
- description="Custom environment variables for the instance"
- />
diff --git a/webui/src/contexts/AuthContext.tsx b/webui/src/contexts/AuthContext.tsx
index d3b8a12..edeb501 100644
--- a/webui/src/contexts/AuthContext.tsx
+++ b/webui/src/contexts/AuthContext.tsx
@@ -147,16 +147,3 @@ export const useAuth = (): AuthContextType => {
}
return context
}
-
-// Helper hook for getting auth headers
-export const useAuthHeaders = (): HeadersInit => {
- const { apiKey, isAuthenticated } = useAuth()
-
- if (!isAuthenticated || !apiKey) {
- return {}
- }
-
- return {
- 'Authorization': `Bearer ${apiKey}`
- }
-}
diff --git a/webui/src/contexts/ConfigContext.tsx b/webui/src/contexts/ConfigContext.tsx
new file mode 100644
index 0000000..a42e9b1
--- /dev/null
+++ b/webui/src/contexts/ConfigContext.tsx
@@ -0,0 +1,62 @@
+import { type ReactNode, createContext, useContext, useEffect, useState, useRef } from 'react'
+import { serverApi } from '@/lib/api'
+import type { AppConfig } from '@/types/config'
+import { useAuth } from './AuthContext'
+
+interface ConfigContextType {
+ config: AppConfig | null
+ isLoading: boolean
+ error: string | null
+}
+
+const ConfigContext = createContext(undefined)
+
+interface ConfigProviderProps {
+ children: ReactNode
+}
+
+export const ConfigProvider = ({ children }: ConfigProviderProps) => {
+ const { isAuthenticated } = useAuth()
+ const [config, setConfig] = useState(null)
+ const [isLoading, setIsLoading] = useState(true)
+ const [error, setError] = useState(null)
+ const loadedRef = useRef(false)
+
+ useEffect(() => {
+ if (!isAuthenticated || loadedRef.current) {
+ setIsLoading(false)
+ return
+ }
+
+ loadedRef.current = true
+
+ const loadConfig = async () => {
+ try {
+ const data = await serverApi.getConfig()
+ setConfig(data)
+ } catch (err) {
+ const errorMessage = err instanceof Error ? err.message : 'Failed to load configuration'
+ setError(errorMessage)
+ console.error('Error loading config:', err)
+ } finally {
+ setIsLoading(false)
+ }
+ }
+
+ void loadConfig()
+ }, [isAuthenticated])
+
+ return (
+
+ {children}
+
+ )
+}
+
+export const useConfig = (): ConfigContextType => {
+ const context = useContext(ConfigContext)
+ if (context === undefined) {
+ throw new Error('useConfig must be used within a ConfigProvider')
+ }
+ return context
+}
diff --git a/webui/src/hooks/useConfig.ts b/webui/src/hooks/useConfig.ts
new file mode 100644
index 0000000..4615be5
--- /dev/null
+++ b/webui/src/hooks/useConfig.ts
@@ -0,0 +1,51 @@
+import { useConfig } from '@/contexts/ConfigContext'
+
+// Helper hook to get instance default values from config
+export const useInstanceDefaults = () => {
+ const { config } = useConfig()
+
+ if (!config || !config.instances) {
+ return null
+ }
+
+ return {
+ autoRestart: config.instances.default_auto_restart,
+ maxRestarts: config.instances.default_max_restarts,
+ restartDelay: config.instances.default_restart_delay,
+ onDemandStart: config.instances.default_on_demand_start,
+ }
+}
+
+// Helper hook to get specific backend settings by backend type
+export const useBackendSettings = (backendType: string | undefined) => {
+ const { config } = useConfig()
+
+ if (!config || !config.backends || !backendType) {
+ return null
+ }
+
+ // Map backend type to config key
+ const backendKey = backendType === 'llama_cpp'
+ ? 'llama-cpp'
+ : backendType === 'mlx_lm'
+ ? 'mlx'
+ : backendType === 'vllm'
+ ? 'vllm'
+ : null
+
+ if (!backendKey) {
+ return null
+ }
+
+ const backendConfig = config.backends[backendKey as keyof typeof config.backends]
+
+ if (!backendConfig) {
+ return null
+ }
+
+ return {
+ command: backendConfig.command || '',
+ dockerEnabled: backendConfig.docker?.enabled ?? false,
+ dockerImage: backendConfig.docker?.image || '',
+ }
+}
diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts
index ef03408..2ac679c 100644
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,4 +1,5 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
+import type { AppConfig } from "@/types/config";
import { handleApiError } from "./errorUtils";
// Adding baseURI as a prefix to support being served behind a subpath
@@ -73,6 +74,9 @@ export const serverApi = {
// GET /backends/llama-cpp/devices
getDevices: () => apiCall("/backends/llama-cpp/devices", {}, "text"),
+
+ // GET /config
+ getConfig: () => apiCall("/config"),
};
// Backend API functions
diff --git a/webui/src/main.tsx b/webui/src/main.tsx
index ab046c2..6418a1e 100644
--- a/webui/src/main.tsx
+++ b/webui/src/main.tsx
@@ -4,13 +4,16 @@ import App from './App'
import { InstancesProvider } from './contexts/InstancesContext'
import './index.css'
import { AuthProvider } from './contexts/AuthContext'
+import { ConfigProvider } from './contexts/ConfigContext'
ReactDOM.createRoot(document.getElementById('root')!).render(
-
-
-
+
+
+
+
+
,
)
\ No newline at end of file
diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts
index 3cbf523..b7530e1 100644
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -36,6 +36,10 @@ export const CreateInstanceOptionsSchema = z.object({
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
+ // Execution context overrides
+ docker_enabled: z.boolean().optional(),
+ command_override: z.string().optional(),
+
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(),
diff --git a/webui/src/types/config.ts b/webui/src/types/config.ts
new file mode 100644
index 0000000..21f15fa
--- /dev/null
+++ b/webui/src/types/config.ts
@@ -0,0 +1,70 @@
+export interface BackendSettings {
+ command: string
+ args: string[]
+ environment?: Record
+ docker?: DockerSettings
+ response_headers?: Record
+}
+
+export interface DockerSettings {
+ enabled: boolean
+ image: string
+ args: string[]
+ environment?: Record
+}
+
+export interface BackendConfig {
+ 'llama-cpp': BackendSettings
+ vllm: BackendSettings
+ mlx: BackendSettings
+}
+
+export interface ServerConfig {
+ host: string
+ port: number
+ allowed_origins: string[]
+ allowed_headers: string[]
+ enable_swagger: boolean
+ response_headers?: Record
+}
+
+export interface InstancesConfig {
+ port_range: [number, number]
+ data_dir: string
+ configs_dir: string
+ logs_dir: string
+ auto_create_dirs: boolean
+ max_instances: number
+ max_running_instances: number
+ enable_lru_eviction: boolean
+ default_auto_restart: boolean
+ default_max_restarts: number
+ default_restart_delay: number
+ default_on_demand_start: boolean
+ on_demand_start_timeout: number
+ timeout_check_interval: number
+}
+
+export interface AuthConfig {
+ require_inference_auth: boolean
+ inference_keys: string[] // Will be empty in sanitized response
+ require_management_auth: boolean
+ management_keys: string[] // Will be empty in sanitized response
+}
+
+export interface NodeConfig {
+ address: string
+ api_key: string // Will be empty in sanitized response
+}
+
+export interface AppConfig {
+ server: ServerConfig
+ backends: BackendConfig
+ instances: InstancesConfig
+ auth: AuthConfig
+ local_node: string
+ nodes: Record
+ version?: string
+ commit_hash?: string
+ build_time?: string
+}
diff --git a/webui/src/types/instance.ts b/webui/src/types/instance.ts
index e243b72..0977233 100644
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -27,5 +27,4 @@ export interface Instance {
name: string;
status: InstanceStatus;
options?: CreateInstanceOptions;
- docker_enabled?: boolean; // indicates backend is running via Docker
}
\ No newline at end of file