mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-07 09:34:22 +00:00
Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 12bbf34236 | |||
| 9a7255a52d | |||
| 97a7c9a4e3 | |||
| fa9335663a | |||
| d092518114 | |||
| ffa0a0c161 | |||
| 1fbf809a2d | |||
| c984d95723 | |||
| 50e1355205 | |||
| 7994fd05b3 | |||
|
|
f496a28f04 | ||
| f9371e876d | |||
|
|
3a979da815 | ||
| a824f066ec | |||
| 2cd9d374a7 | |||
| 031d6c7017 | |||
| 282344af23 | |||
| bc9e0535c3 | |||
| 2d925b473d | |||
| ba0f877185 | |||
| 840a7bc650 | |||
| 76ac93bedc | |||
| 72d2a601c8 | |||
| 9a56660f68 |
58
README.md
58
README.md
@@ -14,6 +14,7 @@
|
|||||||
### 🔗 Universal Compatibility
|
### 🔗 Universal Compatibility
|
||||||
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
|
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
|
||||||
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
|
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
|
||||||
|
- **Docker Support**: Run backends in containers
|
||||||
|
|
||||||
### 🌐 User-Friendly Interface
|
### 🌐 User-Friendly Interface
|
||||||
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||||
@@ -22,6 +23,7 @@
|
|||||||
### ⚡ Smart Operations
|
### ⚡ Smart Operations
|
||||||
- **Instance Monitoring**: Health checks, auto-restart, log management
|
- **Instance Monitoring**: Health checks, auto-restart, log management
|
||||||
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
||||||
|
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -32,6 +34,7 @@
|
|||||||
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
|
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
|
||||||
# For MLX on macOS: pip install mlx-lm
|
# For MLX on macOS: pip install mlx-lm
|
||||||
# For vLLM: pip install vllm
|
# For vLLM: pip install vllm
|
||||||
|
# Or use Docker - no local installation required
|
||||||
|
|
||||||
# 2. Download and run llamactl
|
# 2. Download and run llamactl
|
||||||
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||||
@@ -50,7 +53,8 @@ llamactl
|
|||||||
2. Click "Create Instance"
|
2. Click "Create Instance"
|
||||||
3. Choose backend type (llama.cpp, MLX, or vLLM)
|
3. Choose backend type (llama.cpp, MLX, or vLLM)
|
||||||
4. Set model path and backend-specific options
|
4. Set model path and backend-specific options
|
||||||
5. Start or stop the instance
|
5. Configure environment variables if needed (optional)
|
||||||
|
6. Start or stop the instance
|
||||||
|
|
||||||
### Or use the REST API:
|
### Or use the REST API:
|
||||||
```bash
|
```bash
|
||||||
@@ -64,10 +68,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
|
|||||||
-H "Authorization: Bearer your-key" \
|
-H "Authorization: Bearer your-key" \
|
||||||
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
||||||
|
|
||||||
# Create vLLM instance
|
# Create vLLM instance with environment variables
|
||||||
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
|
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
|
||||||
-H "Authorization: Bearer your-key" \
|
-H "Authorization: Bearer your-key" \
|
||||||
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
|
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
|
||||||
|
|
||||||
# Use with OpenAI SDK
|
# Use with OpenAI SDK
|
||||||
curl -X POST localhost:8080/v1/chat/completions \
|
curl -X POST localhost:8080/v1/chat/completions \
|
||||||
@@ -112,6 +116,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
|
|||||||
brew install llama.cpp
|
brew install llama.cpp
|
||||||
|
|
||||||
# Or build from source - see llama.cpp docs
|
# Or build from source - see llama.cpp docs
|
||||||
|
# Or use Docker - no local installation required
|
||||||
```
|
```
|
||||||
|
|
||||||
**For MLX backend (macOS only):**
|
**For MLX backend (macOS only):**
|
||||||
@@ -139,9 +144,27 @@ python -m venv vllm-env
|
|||||||
source vllm-env/bin/activate
|
source vllm-env/bin/activate
|
||||||
pip install vllm
|
pip install vllm
|
||||||
|
|
||||||
# For production deployments, consider container-based installation
|
# Or use Docker - no local installation required
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Docker Support
|
||||||
|
|
||||||
|
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
backends:
|
||||||
|
llama-cpp:
|
||||||
|
docker:
|
||||||
|
enabled: true
|
||||||
|
vllm:
|
||||||
|
docker:
|
||||||
|
enabled: true
|
||||||
|
```
|
||||||
|
|
||||||
|
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
|
||||||
|
|
||||||
|
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
llamactl works out of the box with sensible defaults.
|
llamactl works out of the box with sensible defaults.
|
||||||
@@ -154,9 +177,30 @@ server:
|
|||||||
enable_swagger: false # Enable Swagger UI for API docs
|
enable_swagger: false # Enable Swagger UI for API docs
|
||||||
|
|
||||||
backends:
|
backends:
|
||||||
llama_executable: llama-server # Path to llama-server executable
|
llama-cpp:
|
||||||
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
command: "llama-server"
|
||||||
vllm_executable: vllm # Path to vllm executable
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
docker:
|
||||||
|
enabled: false
|
||||||
|
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||||
|
environment: {} # Environment variables for the container
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
command: "vllm"
|
||||||
|
args: ["serve"]
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
docker:
|
||||||
|
enabled: false
|
||||||
|
image: "vllm/vllm-openai:latest"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
|
environment: {} # Environment variables for the container
|
||||||
|
|
||||||
|
mlx:
|
||||||
|
command: "mlx_lm.server"
|
||||||
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances
|
port_range: [8000, 9000] # Port range for instances
|
||||||
|
|||||||
@@ -20,9 +20,30 @@ server:
|
|||||||
enable_swagger: false # Enable Swagger UI for API docs
|
enable_swagger: false # Enable Swagger UI for API docs
|
||||||
|
|
||||||
backends:
|
backends:
|
||||||
llama_executable: llama-server # Path to llama-server executable
|
llama-cpp:
|
||||||
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
command: "llama-server"
|
||||||
vllm_executable: vllm # Path to vllm executable
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
docker:
|
||||||
|
enabled: false
|
||||||
|
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||||
|
environment: {}
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
command: "vllm"
|
||||||
|
args: ["serve"]
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
docker:
|
||||||
|
enabled: false
|
||||||
|
image: "vllm/vllm-openai:latest"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
|
environment: {}
|
||||||
|
|
||||||
|
mlx:
|
||||||
|
command: "mlx_lm.server"
|
||||||
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances
|
port_range: [8000, 9000] # Port range for instances
|
||||||
@@ -90,18 +111,69 @@ server:
|
|||||||
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
|
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
|
||||||
|
|
||||||
### Backend Configuration
|
### Backend Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
backends:
|
backends:
|
||||||
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
|
llama-cpp:
|
||||||
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
|
command: "llama-server"
|
||||||
vllm_executable: "vllm" # Path to vllm executable (default: "vllm")
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
docker:
|
||||||
|
enabled: false # Enable Docker runtime (default: false)
|
||||||
|
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||||
|
environment: {}
|
||||||
|
|
||||||
|
vllm:
|
||||||
|
command: "vllm"
|
||||||
|
args: ["serve"]
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
docker:
|
||||||
|
enabled: false
|
||||||
|
image: "vllm/vllm-openai:latest"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
|
environment: {}
|
||||||
|
|
||||||
|
mlx:
|
||||||
|
command: "mlx_lm.server"
|
||||||
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
# MLX does not support Docker
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Backend Configuration Fields:**
|
||||||
|
- `command`: Executable name/path for the backend
|
||||||
|
- `args`: Default arguments prepended to all instances
|
||||||
|
- `environment`: Environment variables for the backend process (optional)
|
||||||
|
- `docker`: Docker-specific configuration (optional)
|
||||||
|
- `enabled`: Boolean flag to enable Docker runtime
|
||||||
|
- `image`: Docker image to use
|
||||||
|
- `args`: Additional arguments passed to `docker run`
|
||||||
|
- `environment`: Environment variables for the container (optional)
|
||||||
|
|
||||||
**Environment Variables:**
|
**Environment Variables:**
|
||||||
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
|
|
||||||
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
|
**LlamaCpp Backend:**
|
||||||
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
|
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
|
||||||
|
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
|
||||||
|
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
|
||||||
|
**VLLM Backend:**
|
||||||
|
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
|
||||||
|
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
|
||||||
|
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
|
||||||
|
**MLX Backend:**
|
||||||
|
- `LLAMACTL_MLX_COMMAND` - MLX executable command
|
||||||
|
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
|
||||||
|
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
|
||||||
### Instance Configuration
|
### Instance Configuration
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,21 @@ Here are basic example configurations for each backend:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Docker Support
|
||||||
|
|
||||||
|
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
backends:
|
||||||
|
vllm:
|
||||||
|
command: "vllm"
|
||||||
|
args: ["serve"]
|
||||||
|
docker:
|
||||||
|
enabled: true
|
||||||
|
image: "vllm/vllm-openai:latest"
|
||||||
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
|
```
|
||||||
|
|
||||||
## Using the API
|
## Using the API
|
||||||
|
|
||||||
You can also manage instances via the REST API:
|
You can also manage instances via the REST API:
|
||||||
|
|||||||
@@ -116,7 +116,18 @@ Create and start a new instance.
|
|||||||
POST /api/v1/instances/{name}
|
POST /api/v1/instances/{name}
|
||||||
```
|
```
|
||||||
|
|
||||||
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
|
**Request Body:** JSON object with instance configuration. Common fields include:
|
||||||
|
|
||||||
|
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
|
||||||
|
- `backend_options`: Backend-specific configuration
|
||||||
|
- `auto_restart`: Enable automatic restart on failure
|
||||||
|
- `max_restarts`: Maximum restart attempts
|
||||||
|
- `restart_delay`: Delay between restarts in seconds
|
||||||
|
- `on_demand_start`: Start instance when receiving requests
|
||||||
|
- `idle_timeout`: Idle timeout in minutes
|
||||||
|
- `environment`: Environment variables as key-value pairs
|
||||||
|
|
||||||
|
See [Managing Instances](managing-instances.md) for complete configuration options.
|
||||||
|
|
||||||
**Response:**
|
**Response:**
|
||||||
```json
|
```json
|
||||||
@@ -354,7 +365,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
|
|||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer your-api-key" \
|
-H "Authorization: Bearer your-api-key" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "/models/llama-2-7b.gguf"
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/models/llama-2-7b.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"environment": {
|
||||||
|
"CUDA_VISIBLE_DEVICES": "0",
|
||||||
|
"OMP_NUM_THREADS": "8"
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
# Check instance status
|
# Check instance status
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ Each instance is displayed as a card showing:
|
|||||||
- **Restart Delay**: Delay in seconds between restart attempts
|
- **Restart Delay**: Delay in seconds between restart attempts
|
||||||
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
||||||
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
||||||
|
- **Environment Variables**: Set custom environment variables for the instance process
|
||||||
6. Configure backend-specific options:
|
6. Configure backend-specific options:
|
||||||
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
||||||
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
||||||
@@ -101,7 +102,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
|
|||||||
"gpu_memory_utilization": 0.9
|
"gpu_memory_utilization": 0.9
|
||||||
},
|
},
|
||||||
"auto_restart": true,
|
"auto_restart": true,
|
||||||
"on_demand_start": true
|
"on_demand_start": true,
|
||||||
|
"environment": {
|
||||||
|
"CUDA_VISIBLE_DEVICES": "0,1",
|
||||||
|
"NCCL_DEBUG": "INFO",
|
||||||
|
"PYTHONPATH": "/custom/path"
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
# Create llama.cpp instance with HuggingFace model
|
# Create llama.cpp instance with HuggingFace model
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package backends
|
package backends
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"llamactl/pkg/config"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -68,3 +70,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
|
|||||||
|
|
||||||
return args
|
return args
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
|
||||||
|
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
|
||||||
|
// Start with configured Docker arguments (should include "run", "--rm", etc.)
|
||||||
|
dockerArgs := make([]string, len(backendConfig.Docker.Args))
|
||||||
|
copy(dockerArgs, backendConfig.Docker.Args)
|
||||||
|
|
||||||
|
// Add environment variables
|
||||||
|
for key, value := range backendConfig.Docker.Environment {
|
||||||
|
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add image name
|
||||||
|
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
|
||||||
|
|
||||||
|
// Add backend args and instance args
|
||||||
|
dockerArgs = append(dockerArgs, backendConfig.Args...)
|
||||||
|
dockerArgs = append(dockerArgs, instanceArgs...)
|
||||||
|
|
||||||
|
return "docker", dockerArgs, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,6 +7,28 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
|
||||||
|
// Used for both parsing (with underscores) and building (with dashes)
|
||||||
|
var multiValuedFlags = map[string]bool{
|
||||||
|
// Parsing keys (with underscores)
|
||||||
|
"override_tensor": true,
|
||||||
|
"override_kv": true,
|
||||||
|
"lora": true,
|
||||||
|
"lora_scaled": true,
|
||||||
|
"control_vector": true,
|
||||||
|
"control_vector_scaled": true,
|
||||||
|
"dry_sequence_breaker": true,
|
||||||
|
"logit_bias": true,
|
||||||
|
// Building keys (with dashes)
|
||||||
|
"override-tensor": true,
|
||||||
|
"override-kv": true,
|
||||||
|
"lora-scaled": true,
|
||||||
|
"control-vector": true,
|
||||||
|
"control-vector-scaled": true,
|
||||||
|
"dry-sequence-breaker": true,
|
||||||
|
"logit-bias": true,
|
||||||
|
}
|
||||||
|
|
||||||
type LlamaServerOptions struct {
|
type LlamaServerOptions struct {
|
||||||
// Common params
|
// Common params
|
||||||
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
|
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
|
||||||
@@ -316,17 +338,13 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
|
|||||||
// BuildCommandArgs converts InstanceOptions to command line arguments
|
// BuildCommandArgs converts InstanceOptions to command line arguments
|
||||||
func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
||||||
// Llama uses multiple flags for arrays by default (not comma-separated)
|
// Llama uses multiple flags for arrays by default (not comma-separated)
|
||||||
multipleFlags := map[string]bool{
|
// Use package-level multiValuedFlags variable
|
||||||
"override-tensor": true,
|
return backends.BuildCommandArgs(o, multiValuedFlags)
|
||||||
"override-kv": true,
|
}
|
||||||
"lora": true,
|
|
||||||
"lora-scaled": true,
|
func (o *LlamaServerOptions) BuildDockerArgs() []string {
|
||||||
"control-vector": true,
|
// For llama, Docker args are the same as normal args
|
||||||
"control-vector-scaled": true,
|
return o.BuildCommandArgs()
|
||||||
"dry-sequence-breaker": true,
|
|
||||||
"logit-bias": true,
|
|
||||||
}
|
|
||||||
return backends.BuildCommandArgs(o, multipleFlags)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
|
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
|
||||||
@@ -338,16 +356,7 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
|||||||
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
|
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
|
||||||
executableNames := []string{"llama-server"}
|
executableNames := []string{"llama-server"}
|
||||||
var subcommandNames []string // Llama has no subcommands
|
var subcommandNames []string // Llama has no subcommands
|
||||||
multiValuedFlags := map[string]bool{
|
// Use package-level multiValuedFlags variable
|
||||||
"override_tensor": true,
|
|
||||||
"override_kv": true,
|
|
||||||
"lora": true,
|
|
||||||
"lora_scaled": true,
|
|
||||||
"control_vector": true,
|
|
||||||
"control_vector_scaled": true,
|
|
||||||
"dry_sequence_breaker": true,
|
|
||||||
"logit_bias": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
var llamaOptions LlamaServerOptions
|
var llamaOptions LlamaServerOptions
|
||||||
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
|
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
|
||||||
|
|||||||
@@ -4,6 +4,15 @@ import (
|
|||||||
"llamactl/pkg/backends"
|
"llamactl/pkg/backends"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
|
||||||
|
var multiValuedFlags = map[string]bool{
|
||||||
|
"api-key": true,
|
||||||
|
"allowed-origins": true,
|
||||||
|
"allowed-methods": true,
|
||||||
|
"allowed-headers": true,
|
||||||
|
"middleware": true,
|
||||||
|
}
|
||||||
|
|
||||||
type VllmServerOptions struct {
|
type VllmServerOptions struct {
|
||||||
// Basic connection options (auto-assigned by llamactl)
|
// Basic connection options (auto-assigned by llamactl)
|
||||||
Host string `json:"host,omitempty"`
|
Host string `json:"host,omitempty"`
|
||||||
@@ -131,30 +140,32 @@ type VllmServerOptions struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// BuildCommandArgs converts VllmServerOptions to command line arguments
|
// BuildCommandArgs converts VllmServerOptions to command line arguments
|
||||||
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
|
// For vLLM native, model is a positional argument after "serve"
|
||||||
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
|
|
||||||
func (o *VllmServerOptions) BuildCommandArgs() []string {
|
func (o *VllmServerOptions) BuildCommandArgs() []string {
|
||||||
var args []string
|
var args []string
|
||||||
|
|
||||||
// Add model as positional argument if specified
|
// Add model as positional argument if specified (for native execution)
|
||||||
if o.Model != "" {
|
if o.Model != "" {
|
||||||
args = append(args, o.Model)
|
args = append(args, o.Model)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a copy of the options without the Model field to avoid including it as --model flag
|
// Create a copy without Model field to avoid --model flag
|
||||||
optionsCopy := *o
|
optionsCopy := *o
|
||||||
optionsCopy.Model = "" // Clear model field so it won't be included as a flag
|
optionsCopy.Model = ""
|
||||||
|
|
||||||
multipleFlags := map[string]bool{
|
// Use package-level multipleFlags variable
|
||||||
"api-key": true,
|
|
||||||
"allowed-origins": true,
|
|
||||||
"allowed-methods": true,
|
|
||||||
"allowed-headers": true,
|
|
||||||
"middleware": true,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build the rest of the arguments as flags
|
flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
|
||||||
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
|
args = append(args, flagArgs...)
|
||||||
|
|
||||||
|
return args
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *VllmServerOptions) BuildDockerArgs() []string {
|
||||||
|
var args []string
|
||||||
|
|
||||||
|
// Use package-level multipleFlags variable
|
||||||
|
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
|
||||||
args = append(args, flagArgs...)
|
args = append(args, flagArgs...)
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
@@ -10,16 +11,27 @@ import (
|
|||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// BackendSettings contains structured backend configuration
|
||||||
|
type BackendSettings struct {
|
||||||
|
Command string `yaml:"command"`
|
||||||
|
Args []string `yaml:"args"`
|
||||||
|
Environment map[string]string `yaml:"environment,omitempty"`
|
||||||
|
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// DockerSettings contains Docker-specific configuration
|
||||||
|
type DockerSettings struct {
|
||||||
|
Enabled bool `yaml:"enabled"`
|
||||||
|
Image string `yaml:"image"`
|
||||||
|
Args []string `yaml:"args"`
|
||||||
|
Environment map[string]string `yaml:"environment,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// BackendConfig contains backend executable configurations
|
// BackendConfig contains backend executable configurations
|
||||||
type BackendConfig struct {
|
type BackendConfig struct {
|
||||||
// Path to llama-server executable (llama.cpp backend)
|
LlamaCpp BackendSettings `yaml:"llama-cpp"`
|
||||||
LlamaExecutable string `yaml:"llama_executable"`
|
VLLM BackendSettings `yaml:"vllm"`
|
||||||
|
MLX BackendSettings `yaml:"mlx"`
|
||||||
// Path to mlx_lm executable (MLX-LM backend)
|
|
||||||
MLXLMExecutable string `yaml:"mlx_lm_executable"`
|
|
||||||
|
|
||||||
// Path to vllm executable (vLLM backend)
|
|
||||||
VllmExecutable string `yaml:"vllm_executable"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AppConfig represents the configuration for llamactl
|
// AppConfig represents the configuration for llamactl
|
||||||
@@ -123,15 +135,45 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
EnableSwagger: false,
|
EnableSwagger: false,
|
||||||
},
|
},
|
||||||
Backends: BackendConfig{
|
Backends: BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
VllmExecutable: "vllm",
|
Args: []string{},
|
||||||
|
Environment: map[string]string{},
|
||||||
|
Docker: &DockerSettings{
|
||||||
|
Enabled: false,
|
||||||
|
Image: "ghcr.io/ggml-org/llama.cpp:server",
|
||||||
|
Args: []string{
|
||||||
|
"run", "--rm", "--network", "host", "--gpus", "all",
|
||||||
|
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
|
||||||
|
Environment: map[string]string{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
VLLM: BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
Docker: &DockerSettings{
|
||||||
|
Enabled: false,
|
||||||
|
Image: "vllm/vllm-openai:latest",
|
||||||
|
Args: []string{
|
||||||
|
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
|
||||||
|
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
|
||||||
|
},
|
||||||
|
Environment: map[string]string{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
MLX: BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
// No Docker section for MLX - not supported
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Instances: InstancesConfig{
|
Instances: InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
DataDir: getDefaultDataDirectory(),
|
DataDir: getDefaultDataDirectory(),
|
||||||
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
|
||||||
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
// should be relative path to DataDir if not explicitly set.
|
||||||
|
InstancesDir: "",
|
||||||
|
LogsDir: "",
|
||||||
AutoCreateDirs: true,
|
AutoCreateDirs: true,
|
||||||
MaxInstances: -1, // -1 means unlimited
|
MaxInstances: -1, // -1 means unlimited
|
||||||
MaxRunningInstances: -1, // -1 means unlimited
|
MaxRunningInstances: -1, // -1 means unlimited
|
||||||
@@ -159,6 +201,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
// 3. Override with environment variables
|
// 3. Override with environment variables
|
||||||
loadEnvVars(&cfg)
|
loadEnvVars(&cfg)
|
||||||
|
|
||||||
|
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
|
||||||
|
if cfg.Instances.InstancesDir == "" {
|
||||||
|
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
|
||||||
|
}
|
||||||
|
if cfg.Instances.LogsDir == "" {
|
||||||
|
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
|
||||||
|
}
|
||||||
|
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,6 +229,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
|
|||||||
if err := yaml.Unmarshal(data, cfg); err != nil {
|
if err := yaml.Unmarshal(data, cfg); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
log.Printf("Read config at %s", path)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -244,15 +295,107 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Backend config
|
// Backend config
|
||||||
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
|
// LlamaCpp backend
|
||||||
cfg.Backends.LlamaExecutable = llamaExec
|
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
|
||||||
|
cfg.Backends.LlamaCpp.Command = llamaCmd
|
||||||
}
|
}
|
||||||
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
|
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
|
||||||
cfg.Backends.MLXLMExecutable = mlxLMExec
|
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
|
||||||
}
|
}
|
||||||
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
|
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
|
||||||
cfg.Backends.VllmExecutable = vllmExec
|
if cfg.Backends.LlamaCpp.Environment == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
|
||||||
}
|
}
|
||||||
|
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
|
||||||
|
}
|
||||||
|
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
|
||||||
|
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
|
||||||
|
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
cfg.Backends.LlamaCpp.Docker.Enabled = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
|
||||||
|
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
|
||||||
|
}
|
||||||
|
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
|
||||||
|
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
|
||||||
|
}
|
||||||
|
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
|
||||||
|
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
// vLLM backend
|
||||||
|
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
||||||
|
cfg.Backends.VLLM.Command = vllmCmd
|
||||||
|
}
|
||||||
|
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
|
||||||
|
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
|
||||||
|
}
|
||||||
|
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
|
||||||
|
if cfg.Backends.VLLM.Environment == nil {
|
||||||
|
cfg.Backends.VLLM.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
|
||||||
|
}
|
||||||
|
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
|
||||||
|
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
|
||||||
|
if cfg.Backends.VLLM.Docker == nil {
|
||||||
|
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
cfg.Backends.VLLM.Docker.Enabled = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
|
||||||
|
if cfg.Backends.VLLM.Docker == nil {
|
||||||
|
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
|
||||||
|
}
|
||||||
|
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
|
||||||
|
if cfg.Backends.VLLM.Docker == nil {
|
||||||
|
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
|
||||||
|
}
|
||||||
|
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
|
||||||
|
if cfg.Backends.VLLM.Docker == nil {
|
||||||
|
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||||
|
}
|
||||||
|
if cfg.Backends.VLLM.Docker.Environment == nil {
|
||||||
|
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MLX backend
|
||||||
|
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
|
||||||
|
cfg.Backends.MLX.Command = mlxCmd
|
||||||
|
}
|
||||||
|
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
|
||||||
|
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
|
||||||
|
}
|
||||||
|
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
|
||||||
|
if cfg.Backends.MLX.Environment == nil {
|
||||||
|
cfg.Backends.MLX.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Instance defaults
|
||||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||||
if b, err := strconv.ParseBool(autoRestart); err == nil {
|
if b, err := strconv.ParseBool(autoRestart); err == nil {
|
||||||
cfg.Instances.DefaultAutoRestart = b
|
cfg.Instances.DefaultAutoRestart = b
|
||||||
@@ -325,6 +468,19 @@ func ParsePortRange(s string) [2]int {
|
|||||||
return [2]int{0, 0} // Invalid format
|
return [2]int{0, 0} // Invalid format
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
// and populates the provided environment map
|
||||||
|
func parseEnvVars(envString string, envMap map[string]string) {
|
||||||
|
if envString == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, envPair := range strings.Split(envString, ",") {
|
||||||
|
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||||
|
envMap[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// getDefaultDataDirectory returns platform-specific default data directory
|
// getDefaultDataDirectory returns platform-specific default data directory
|
||||||
func getDefaultDataDirectory() string {
|
func getDefaultDataDirectory() string {
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
@@ -357,6 +513,10 @@ func getDefaultDataDirectory() string {
|
|||||||
// getDefaultConfigLocations returns platform-specific config file locations
|
// getDefaultConfigLocations returns platform-specific config file locations
|
||||||
func getDefaultConfigLocations() []string {
|
func getDefaultConfigLocations() []string {
|
||||||
var locations []string
|
var locations []string
|
||||||
|
// Use ./llamactl.yaml and ./config.yaml as the default config file
|
||||||
|
locations = append(locations, "llamactl.yaml")
|
||||||
|
locations = append(locations, "config.yaml")
|
||||||
|
|
||||||
homeDir, _ := os.UserHomeDir()
|
homeDir, _ := os.UserHomeDir()
|
||||||
|
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
@@ -386,3 +546,17 @@ func getDefaultConfigLocations() []string {
|
|||||||
|
|
||||||
return locations
|
return locations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetBackendSettings resolves backend settings
|
||||||
|
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
|
||||||
|
switch backendType {
|
||||||
|
case "llama-cpp":
|
||||||
|
return bc.LlamaCpp
|
||||||
|
case "vllm":
|
||||||
|
return bc.VLLM
|
||||||
|
case "mlx":
|
||||||
|
return bc.MLX
|
||||||
|
default:
|
||||||
|
return BackendSettings{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
|
|||||||
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
|
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
|
||||||
"LLAMACTL_LOGS_DIR": "/env/logs",
|
"LLAMACTL_LOGS_DIR": "/env/logs",
|
||||||
"LLAMACTL_MAX_INSTANCES": "20",
|
"LLAMACTL_MAX_INSTANCES": "20",
|
||||||
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
|
|
||||||
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
|
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
|
||||||
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
|
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
|
||||||
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
|
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
|
||||||
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
|
|||||||
if cfg.Instances.MaxInstances != 20 {
|
if cfg.Instances.MaxInstances != 20 {
|
||||||
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
|
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
|
||||||
}
|
}
|
||||||
if cfg.Backends.LlamaExecutable != "/env/llama-server" {
|
if cfg.Backends.LlamaCpp.Command != "llama-server" {
|
||||||
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
|
t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
|
||||||
}
|
}
|
||||||
if cfg.Instances.DefaultAutoRestart {
|
if cfg.Instances.DefaultAutoRestart {
|
||||||
t.Error("Expected auto restart to be false")
|
t.Error("Expected auto restart to be false")
|
||||||
@@ -349,3 +348,165 @@ server:
|
|||||||
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
|
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
|
||||||
|
bc := &config.BackendConfig{
|
||||||
|
LlamaCpp: config.BackendSettings{
|
||||||
|
Command: "custom-llama",
|
||||||
|
Args: []string{"--verbose"},
|
||||||
|
Docker: &config.DockerSettings{
|
||||||
|
Enabled: true,
|
||||||
|
Image: "custom-llama:latest",
|
||||||
|
Args: []string{"--gpus", "all"},
|
||||||
|
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "custom-vllm",
|
||||||
|
Args: []string{"serve", "--debug"},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "custom-mlx",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test llama-cpp with Docker
|
||||||
|
settings := bc.GetBackendSettings("llama-cpp")
|
||||||
|
if settings.Command != "custom-llama" {
|
||||||
|
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
|
||||||
|
}
|
||||||
|
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
|
||||||
|
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
|
||||||
|
}
|
||||||
|
if settings.Docker == nil || !settings.Docker.Enabled {
|
||||||
|
t.Error("Expected Docker to be enabled")
|
||||||
|
}
|
||||||
|
if settings.Docker.Image != "custom-llama:latest" {
|
||||||
|
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test vLLM without Docker
|
||||||
|
settings = bc.GetBackendSettings("vllm")
|
||||||
|
if settings.Command != "custom-vllm" {
|
||||||
|
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
|
||||||
|
}
|
||||||
|
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
|
||||||
|
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
|
||||||
|
}
|
||||||
|
if settings.Docker != nil && settings.Docker.Enabled {
|
||||||
|
t.Error("Expected Docker to be disabled or nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test MLX
|
||||||
|
settings = bc.GetBackendSettings("mlx")
|
||||||
|
if settings.Command != "custom-mlx" {
|
||||||
|
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
|
||||||
|
bc := &config.BackendConfig{}
|
||||||
|
|
||||||
|
// Test empty llama-cpp
|
||||||
|
settings := bc.GetBackendSettings("llama-cpp")
|
||||||
|
if settings.Command != "" {
|
||||||
|
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test empty vLLM
|
||||||
|
settings = bc.GetBackendSettings("vllm")
|
||||||
|
if settings.Command != "" {
|
||||||
|
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test empty MLX
|
||||||
|
settings = bc.GetBackendSettings("mlx")
|
||||||
|
if settings.Command != "" {
|
||||||
|
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
|
||||||
|
// Test that backend environment variables work correctly
|
||||||
|
envVars := map[string]string{
|
||||||
|
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
|
||||||
|
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
|
||||||
|
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
|
||||||
|
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
|
||||||
|
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
|
||||||
|
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
|
||||||
|
"LLAMACTL_VLLM_COMMAND": "env-vllm",
|
||||||
|
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
|
||||||
|
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
|
||||||
|
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
|
||||||
|
"LLAMACTL_MLX_COMMAND": "env-mlx",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set env vars and ensure cleanup
|
||||||
|
for key, value := range envVars {
|
||||||
|
os.Setenv(key, value)
|
||||||
|
defer os.Unsetenv(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify llama-cpp environment overrides
|
||||||
|
if cfg.Backends.LlamaCpp.Command != "env-llama" {
|
||||||
|
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
|
||||||
|
}
|
||||||
|
expectedArgs := []string{"--verbose", "--threads", "4"}
|
||||||
|
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
|
||||||
|
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
|
||||||
|
}
|
||||||
|
if !cfg.Backends.LlamaCpp.Docker.Enabled {
|
||||||
|
t.Error("Expected llama Docker to be enabled")
|
||||||
|
}
|
||||||
|
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
|
||||||
|
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
|
||||||
|
}
|
||||||
|
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
|
||||||
|
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
|
||||||
|
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
|
||||||
|
}
|
||||||
|
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
|
||||||
|
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
|
||||||
|
}
|
||||||
|
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
|
||||||
|
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify vLLM environment overrides
|
||||||
|
if cfg.Backends.VLLM.Command != "env-vllm" {
|
||||||
|
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
|
||||||
|
}
|
||||||
|
if cfg.Backends.VLLM.Docker.Enabled {
|
||||||
|
t.Error("Expected vLLM Docker to be disabled")
|
||||||
|
}
|
||||||
|
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
|
||||||
|
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify MLX environment overrides
|
||||||
|
if cfg.Backends.MLX.Command != "env-mlx" {
|
||||||
|
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
|
||||||
|
bc := &config.BackendConfig{
|
||||||
|
LlamaCpp: config.BackendSettings{
|
||||||
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test invalid backend type returns empty settings
|
||||||
|
settings := bc.GetBackendSettings("invalid-backend")
|
||||||
|
if settings.Command != "" {
|
||||||
|
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -221,14 +221,33 @@ func (i *Process) MarshalJSON() ([]byte, error) {
|
|||||||
i.mu.RLock()
|
i.mu.RLock()
|
||||||
defer i.mu.RUnlock()
|
defer i.mu.RUnlock()
|
||||||
|
|
||||||
|
// Determine if docker is enabled for this instance's backend
|
||||||
|
var dockerEnabled bool
|
||||||
|
if i.options != nil {
|
||||||
|
switch i.options.BackendType {
|
||||||
|
case backends.BackendTypeLlamaCpp:
|
||||||
|
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
|
||||||
|
dockerEnabled = true
|
||||||
|
}
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
|
||||||
|
dockerEnabled = true
|
||||||
|
}
|
||||||
|
case backends.BackendTypeMlxLm:
|
||||||
|
// MLX does not support docker currently
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Use anonymous struct to avoid recursion
|
// Use anonymous struct to avoid recursion
|
||||||
type Alias Process
|
type Alias Process
|
||||||
return json.Marshal(&struct {
|
return json.Marshal(&struct {
|
||||||
*Alias
|
*Alias
|
||||||
Options *CreateInstanceOptions `json:"options,omitempty"`
|
Options *CreateInstanceOptions `json:"options,omitempty"`
|
||||||
|
DockerEnabled bool `json:"docker_enabled,omitempty"`
|
||||||
}{
|
}{
|
||||||
Alias: (*Alias)(i),
|
Alias: (*Alias)(i),
|
||||||
Options: i.options,
|
Options: i.options,
|
||||||
|
DockerEnabled: dockerEnabled,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,8 +12,18 @@ import (
|
|||||||
|
|
||||||
func TestNewInstance(t *testing.T) {
|
func TestNewInstance(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -66,8 +76,18 @@ func TestNewInstance(t *testing.T) {
|
|||||||
|
|
||||||
func TestNewInstance_WithRestartOptions(t *testing.T) {
|
func TestNewInstance_WithRestartOptions(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -112,8 +132,18 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
|
|||||||
|
|
||||||
func TestSetOptions(t *testing.T) {
|
func TestSetOptions(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -163,8 +193,18 @@ func TestSetOptions(t *testing.T) {
|
|||||||
|
|
||||||
func TestGetProxy(t *testing.T) {
|
func TestGetProxy(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -205,8 +245,18 @@ func TestGetProxy(t *testing.T) {
|
|||||||
|
|
||||||
func TestMarshalJSON(t *testing.T) {
|
func TestMarshalJSON(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -364,8 +414,18 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
VLLM: config.BackendSettings{
|
||||||
|
Command: "vllm",
|
||||||
|
Args: []string{"serve"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"llamactl/pkg/backends"
|
"llamactl/pkg/backends"
|
||||||
|
"llamactl/pkg/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Start starts the llama server instance and returns an error if it fails.
|
// Start starts the llama server instance and returns an error if it fails.
|
||||||
@@ -41,24 +42,14 @@ func (i *Process) Start() error {
|
|||||||
return fmt.Errorf("failed to create log files: %w", err)
|
return fmt.Errorf("failed to create log files: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
args := i.options.BuildCommandArgs()
|
// Build command using backend-specific methods
|
||||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
cmd, cmdErr := i.buildCommand()
|
||||||
|
if cmdErr != nil {
|
||||||
var executable string
|
return fmt.Errorf("failed to build command: %w", cmdErr)
|
||||||
|
|
||||||
// Get executable from global configuration
|
|
||||||
switch i.options.BackendType {
|
|
||||||
case backends.BackendTypeLlamaCpp:
|
|
||||||
executable = i.globalBackendSettings.LlamaExecutable
|
|
||||||
case backends.BackendTypeMlxLm:
|
|
||||||
executable = i.globalBackendSettings.MLXLMExecutable
|
|
||||||
case backends.BackendTypeVllm:
|
|
||||||
executable = i.globalBackendSettings.VllmExecutable
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
i.cmd = exec.CommandContext(i.ctx, executable, args...)
|
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||||
|
i.cmd = cmd
|
||||||
|
|
||||||
if runtime.GOOS != "windows" {
|
if runtime.GOOS != "windows" {
|
||||||
setProcAttrs(i.cmd)
|
setProcAttrs(i.cmd)
|
||||||
@@ -372,3 +363,49 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i
|
|||||||
|
|
||||||
return true, maxRestarts, restartDelay
|
return true, maxRestarts, restartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// buildCommand builds the command to execute using backend-specific logic
|
||||||
|
func (i *Process) buildCommand() (*exec.Cmd, error) {
|
||||||
|
// Get backend configuration
|
||||||
|
backendConfig, err := i.getBackendConfig()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the environment variables
|
||||||
|
env := i.options.BuildEnvironment(backendConfig)
|
||||||
|
|
||||||
|
// Get the command to execute
|
||||||
|
command := i.options.GetCommand(backendConfig)
|
||||||
|
|
||||||
|
// Build command arguments
|
||||||
|
args := i.options.BuildCommandArgs(backendConfig)
|
||||||
|
|
||||||
|
// Create the exec.Cmd
|
||||||
|
cmd := exec.CommandContext(i.ctx, command, args...)
|
||||||
|
cmd.Env = []string{}
|
||||||
|
for k, v := range env {
|
||||||
|
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getBackendConfig resolves the backend configuration for the current instance
|
||||||
|
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
|
||||||
|
var backendTypeStr string
|
||||||
|
|
||||||
|
switch i.options.BackendType {
|
||||||
|
case backends.BackendTypeLlamaCpp:
|
||||||
|
backendTypeStr = "llama-cpp"
|
||||||
|
case backends.BackendTypeMlxLm:
|
||||||
|
backendTypeStr = "mlx"
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
backendTypeStr = "vllm"
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
|
||||||
|
}
|
||||||
|
|
||||||
|
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
|
||||||
|
return &settings, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"llamactl/pkg/backends/vllm"
|
"llamactl/pkg/backends/vllm"
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"log"
|
"log"
|
||||||
|
"maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CreateInstanceOptions struct {
|
type CreateInstanceOptions struct {
|
||||||
@@ -20,6 +21,8 @@ type CreateInstanceOptions struct {
|
|||||||
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||||
// Idle timeout
|
// Idle timeout
|
||||||
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
|
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
|
||||||
|
//Environment variables
|
||||||
|
Environment map[string]string `json:"environment,omitempty"`
|
||||||
|
|
||||||
BackendType backends.BackendType `json:"backend_type"`
|
BackendType backends.BackendType `json:"backend_type"`
|
||||||
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
||||||
@@ -188,24 +191,75 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
|
||||||
|
|
||||||
|
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||||
|
return "docker"
|
||||||
|
}
|
||||||
|
|
||||||
|
return backendConfig.Command
|
||||||
|
}
|
||||||
|
|
||||||
// BuildCommandArgs builds command line arguments for the backend
|
// BuildCommandArgs builds command line arguments for the backend
|
||||||
func (c *CreateInstanceOptions) BuildCommandArgs() []string {
|
func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
|
||||||
|
|
||||||
|
var args []string
|
||||||
|
|
||||||
|
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||||
|
// For Docker, start with Docker args
|
||||||
|
args = append(args, backendConfig.Docker.Args...)
|
||||||
|
args = append(args, backendConfig.Docker.Image)
|
||||||
|
|
||||||
switch c.BackendType {
|
switch c.BackendType {
|
||||||
case backends.BackendTypeLlamaCpp:
|
case backends.BackendTypeLlamaCpp:
|
||||||
if c.LlamaServerOptions != nil {
|
if c.LlamaServerOptions != nil {
|
||||||
return c.LlamaServerOptions.BuildCommandArgs()
|
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
|
||||||
}
|
}
|
||||||
case backends.BackendTypeMlxLm:
|
case backends.BackendTypeVllm:
|
||||||
if c.MlxServerOptions != nil {
|
if c.VllmServerOptions != nil {
|
||||||
return c.MlxServerOptions.BuildCommandArgs()
|
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// For native execution, start with backend args
|
||||||
|
args = append(args, backendConfig.Args...)
|
||||||
|
|
||||||
|
switch c.BackendType {
|
||||||
|
case backends.BackendTypeLlamaCpp:
|
||||||
|
if c.LlamaServerOptions != nil {
|
||||||
|
args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
|
||||||
|
}
|
||||||
|
case backends.BackendTypeMlxLm:
|
||||||
|
if c.MlxServerOptions != nil {
|
||||||
|
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
|
||||||
}
|
}
|
||||||
case backends.BackendTypeVllm:
|
case backends.BackendTypeVllm:
|
||||||
if c.VllmServerOptions != nil {
|
if c.VllmServerOptions != nil {
|
||||||
// Prepend "serve" as first argument
|
|
||||||
args := []string{"serve"}
|
|
||||||
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
|
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return args
|
return args
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return []string{}
|
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
|
||||||
|
env := map[string]string{}
|
||||||
|
|
||||||
|
if backendConfig.Environment != nil {
|
||||||
|
maps.Copy(env, backendConfig.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||||
|
if backendConfig.Docker.Environment != nil {
|
||||||
|
maps.Copy(env, backendConfig.Docker.Environment)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Environment != nil {
|
||||||
|
maps.Copy(env, c.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
return env
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,8 +34,12 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
|
|||||||
|
|
||||||
func TestUpdateLastRequestTime(t *testing.T) {
|
func TestUpdateLastRequestTime(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -60,8 +64,12 @@ func TestUpdateLastRequestTime(t *testing.T) {
|
|||||||
|
|
||||||
func TestShouldTimeout_NotRunning(t *testing.T) {
|
func TestShouldTimeout_NotRunning(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -90,8 +98,12 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
|
|||||||
|
|
||||||
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -133,8 +145,12 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
|||||||
|
|
||||||
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -167,8 +183,12 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
|||||||
|
|
||||||
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
@@ -207,8 +227,12 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
|||||||
|
|
||||||
func TestTimeoutConfiguration_Validation(t *testing.T) {
|
func TestTimeoutConfiguration_Validation(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
|
|||||||
@@ -16,8 +16,12 @@ import (
|
|||||||
|
|
||||||
func TestNewInstanceManager(t *testing.T) {
|
func TestNewInstanceManager(t *testing.T) {
|
||||||
backendConfig := config.BackendConfig{
|
backendConfig := config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg := config.InstancesConfig{
|
cfg := config.InstancesConfig{
|
||||||
@@ -49,8 +53,12 @@ func TestPersistence(t *testing.T) {
|
|||||||
tempDir := t.TempDir()
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
backendConfig := config.BackendConfig{
|
backendConfig := config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg := config.InstancesConfig{
|
cfg := config.InstancesConfig{
|
||||||
@@ -182,8 +190,12 @@ func TestShutdown(t *testing.T) {
|
|||||||
// Helper function to create a test manager with standard config
|
// Helper function to create a test manager with standard config
|
||||||
func createTestManager() manager.InstanceManager {
|
func createTestManager() manager.InstanceManager {
|
||||||
backendConfig := config.BackendConfig{
|
backendConfig := config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg := config.InstancesConfig{
|
cfg := config.InstancesConfig{
|
||||||
|
|||||||
@@ -63,8 +63,12 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
|
|||||||
|
|
||||||
// Test max instances limit
|
// Test max instances limit
|
||||||
backendConfig := config.BackendConfig{
|
backendConfig := config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
MLX: config.BackendSettings{
|
||||||
|
Command: "mlx_lm.server",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
cfg := config.InstancesConfig{
|
cfg := config.InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func (im *instanceManager) EvictLRUInstance() error {
|
|||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
var lruInstance *instance.Process
|
var lruInstance *instance.Process
|
||||||
|
|
||||||
for name, _ := range im.runningInstances {
|
for name := range im.runningInstances {
|
||||||
inst := im.instances[name]
|
inst := im.instances[name]
|
||||||
if inst == nil {
|
if inst == nil {
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
func TestTimeoutFunctionality(t *testing.T) {
|
func TestTimeoutFunctionality(t *testing.T) {
|
||||||
// Test timeout checker initialization
|
// Test timeout checker initialization
|
||||||
backendConfig := config.BackendConfig{
|
backendConfig := config.BackendConfig{
|
||||||
LlamaExecutable: "llama-server",
|
LlamaCpp: config.BackendSettings{Command: "llama-server"},
|
||||||
MLXLMExecutable: "mlx_lm.server",
|
MLX: config.BackendSettings{Command: "mlx_lm.server"},
|
||||||
}
|
}
|
||||||
cfg := config.InstancesConfig{
|
cfg := config.InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
import React from "react";
|
import React from "react";
|
||||||
import { Badge } from "@/components/ui/badge";
|
import { Badge } from "@/components/ui/badge";
|
||||||
import { BackendType, type BackendTypeValue } from "@/types/instance";
|
import { BackendType, type BackendTypeValue } from "@/types/instance";
|
||||||
import { Server } from "lucide-react";
|
import { Server, Package } from "lucide-react";
|
||||||
|
|
||||||
interface BackendBadgeProps {
|
interface BackendBadgeProps {
|
||||||
backend?: BackendTypeValue;
|
backend?: BackendTypeValue;
|
||||||
|
docker?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
|
||||||
if (!backend) {
|
if (!backend) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -39,6 +40,7 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
<div className="flex items-center gap-1">
|
||||||
<Badge
|
<Badge
|
||||||
variant="outline"
|
variant="outline"
|
||||||
className={`flex items-center gap-1.5 ${getColorClasses()}`}
|
className={`flex items-center gap-1.5 ${getColorClasses()}`}
|
||||||
@@ -46,6 +48,17 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
|||||||
<Server className="h-3 w-3" />
|
<Server className="h-3 w-3" />
|
||||||
<span className="text-xs">{getText()}</span>
|
<span className="text-xs">{getText()}</span>
|
||||||
</Badge>
|
</Badge>
|
||||||
|
{docker && (
|
||||||
|
<Badge
|
||||||
|
variant="outline"
|
||||||
|
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
|
||||||
|
title="Docker enabled"
|
||||||
|
>
|
||||||
|
<Package className="h-3 w-3" />
|
||||||
|
<span className="text-[10px] uppercase tracking-wide">Docker</span>
|
||||||
|
</Badge>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ function InstanceCard({
|
|||||||
|
|
||||||
{/* Badges row */}
|
{/* Badges row */}
|
||||||
<div className="flex items-center gap-2 flex-wrap">
|
<div className="flex items-center gap-2 flex-wrap">
|
||||||
<BackendBadge backend={instance.options?.backend_type} />
|
<BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
|
||||||
{running && <HealthBadge health={health} />}
|
{running && <HealthBadge health={health} />}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,144 +0,0 @@
|
|||||||
import React from 'react'
|
|
||||||
import { Input } from '@/components/ui/input'
|
|
||||||
import { Label } from '@/components/ui/label'
|
|
||||||
import { Checkbox } from '@/components/ui/checkbox'
|
|
||||||
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
|
||||||
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
|
|
||||||
|
|
||||||
interface ZodFormFieldProps {
|
|
||||||
fieldKey: keyof CreateInstanceOptions
|
|
||||||
value: string | number | boolean | string[] | undefined
|
|
||||||
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
|
|
||||||
// Get configuration for basic fields, or use field name for advanced fields
|
|
||||||
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
|
||||||
|
|
||||||
// Get type from Zod schema
|
|
||||||
const fieldType = getFieldType(fieldKey)
|
|
||||||
|
|
||||||
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
|
|
||||||
onChange(fieldKey, newValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
const renderField = () => {
|
|
||||||
// Special handling for backend_type field - render as dropdown
|
|
||||||
if (fieldKey === 'backend_type') {
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<select
|
|
||||||
id={fieldKey}
|
|
||||||
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
|
|
||||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
|
||||||
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
|
||||||
>
|
|
||||||
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
|
||||||
<option value={BackendType.MLX_LM}>MLX LM</option>
|
|
||||||
<option value={BackendType.VLLM}>vLLM</option>
|
|
||||||
</select>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (fieldType) {
|
|
||||||
case 'boolean':
|
|
||||||
return (
|
|
||||||
<div className="flex items-center space-x-2">
|
|
||||||
<Checkbox
|
|
||||||
id={fieldKey}
|
|
||||||
checked={typeof value === 'boolean' ? value : false}
|
|
||||||
onCheckedChange={(checked) => handleChange(checked)}
|
|
||||||
/>
|
|
||||||
<Label htmlFor={fieldKey} className="text-sm font-normal">
|
|
||||||
{config.label}
|
|
||||||
{config.description && (
|
|
||||||
<span className="text-muted-foreground ml-1">- {config.description}</span>
|
|
||||||
)}
|
|
||||||
</Label>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'number':
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<Input
|
|
||||||
id={fieldKey}
|
|
||||||
type="number"
|
|
||||||
step="any" // This allows decimal numbers
|
|
||||||
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
|
||||||
onChange={(e) => {
|
|
||||||
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
|
|
||||||
// Only update if the parsed value is valid or the input is empty
|
|
||||||
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
|
|
||||||
handleChange(numValue)
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
/>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'array':
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<Input
|
|
||||||
id={fieldKey}
|
|
||||||
type="text"
|
|
||||||
value={Array.isArray(value) ? value.join(', ') : ''}
|
|
||||||
onChange={(e) => {
|
|
||||||
const arrayValue = e.target.value
|
|
||||||
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
|
|
||||||
: undefined
|
|
||||||
handleChange(arrayValue)
|
|
||||||
}}
|
|
||||||
placeholder="item1, item2, item3"
|
|
||||||
/>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'text':
|
|
||||||
default:
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<Input
|
|
||||||
id={fieldKey}
|
|
||||||
type="text"
|
|
||||||
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
|
||||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
/>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return <div className="space-y-2">{renderField()}</div>
|
|
||||||
}
|
|
||||||
|
|
||||||
export default ZodFormField
|
|
||||||
144
webui/src/components/form/EnvironmentVariablesInput.tsx
Normal file
144
webui/src/components/form/EnvironmentVariablesInput.tsx
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import React, { useState } from 'react'
|
||||||
|
import { Input } from '@/components/ui/input'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
import { Button } from '@/components/ui/button'
|
||||||
|
import { X, Plus } from 'lucide-react'
|
||||||
|
|
||||||
|
interface EnvironmentVariablesInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: Record<string, string> | undefined
|
||||||
|
onChange: (value: Record<string, string> | undefined) => void
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EnvVar {
|
||||||
|
key: string
|
||||||
|
value: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
// Convert the value object to an array of key-value pairs for editing
|
||||||
|
const envVarsFromValue = value
|
||||||
|
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
|
||||||
|
: []
|
||||||
|
|
||||||
|
const [envVars, setEnvVars] = useState<EnvVar[]>(
|
||||||
|
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
|
||||||
|
)
|
||||||
|
|
||||||
|
// Update parent component when env vars change
|
||||||
|
const updateParent = (newEnvVars: EnvVar[]) => {
|
||||||
|
// Filter out empty entries
|
||||||
|
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
|
||||||
|
|
||||||
|
if (validVars.length === 0) {
|
||||||
|
onChange(undefined)
|
||||||
|
} else {
|
||||||
|
const envObject = validVars.reduce((acc, env) => {
|
||||||
|
acc[env.key.trim()] = env.value.trim()
|
||||||
|
return acc
|
||||||
|
}, {} as Record<string, string>)
|
||||||
|
onChange(envObject)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleKeyChange = (index: number, newKey: string) => {
|
||||||
|
const newEnvVars = [...envVars]
|
||||||
|
newEnvVars[index].key = newKey
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleValueChange = (index: number, newValue: string) => {
|
||||||
|
const newEnvVars = [...envVars]
|
||||||
|
newEnvVars[index].value = newValue
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
}
|
||||||
|
|
||||||
|
const addEnvVar = () => {
|
||||||
|
const newEnvVars = [...envVars, { key: '', value: '' }]
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
}
|
||||||
|
|
||||||
|
const removeEnvVar = (index: number) => {
|
||||||
|
if (envVars.length === 1) {
|
||||||
|
// Reset to empty if it's the last one
|
||||||
|
const newEnvVars = [{ key: '', value: '' }]
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
} else {
|
||||||
|
const newEnvVars = envVars.filter((_, i) => i !== index)
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className={`grid gap-2 ${className || ''}`}>
|
||||||
|
<Label htmlFor={id}>
|
||||||
|
{label}
|
||||||
|
</Label>
|
||||||
|
<div className="space-y-2">
|
||||||
|
{envVars.map((envVar, index) => (
|
||||||
|
<div key={index} className="flex gap-2 items-center">
|
||||||
|
<Input
|
||||||
|
placeholder="Variable name"
|
||||||
|
value={envVar.key}
|
||||||
|
onChange={(e) => handleKeyChange(index, e.target.value)}
|
||||||
|
disabled={disabled}
|
||||||
|
className="flex-1"
|
||||||
|
/>
|
||||||
|
<Input
|
||||||
|
placeholder="Variable value"
|
||||||
|
value={envVar.value}
|
||||||
|
onChange={(e) => handleValueChange(index, e.target.value)}
|
||||||
|
disabled={disabled}
|
||||||
|
className="flex-1"
|
||||||
|
/>
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
variant="outline"
|
||||||
|
size="sm"
|
||||||
|
onClick={() => removeEnvVar(index)}
|
||||||
|
disabled={disabled}
|
||||||
|
className="shrink-0"
|
||||||
|
>
|
||||||
|
<X className="h-4 w-4" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
variant="outline"
|
||||||
|
size="sm"
|
||||||
|
onClick={addEnvVar}
|
||||||
|
disabled={disabled}
|
||||||
|
className="w-fit"
|
||||||
|
>
|
||||||
|
<Plus className="h-4 w-4 mr-2" />
|
||||||
|
Add Variable
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
{description && (
|
||||||
|
<p className="text-sm text-muted-foreground">{description}</p>
|
||||||
|
)}
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
Environment variables that will be passed to the backend process
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default EnvironmentVariablesInput
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
import React from 'react'
|
|
||||||
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
|
||||||
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
|
|
||||||
import { getFieldType } from '@/schemas/instanceOptions'
|
|
||||||
import TextInput from '@/components/form/TextInput'
|
|
||||||
import NumberInput from '@/components/form/NumberInput'
|
|
||||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
|
||||||
import SelectInput from '@/components/form/SelectInput'
|
|
||||||
|
|
||||||
interface BasicInstanceFieldsProps {
|
|
||||||
formData: CreateInstanceOptions
|
|
||||||
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
|
|
||||||
formData,
|
|
||||||
onChange
|
|
||||||
}) => {
|
|
||||||
const basicFields = getBasicFields()
|
|
||||||
|
|
||||||
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
|
|
||||||
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
|
||||||
const fieldType = getFieldType(fieldKey)
|
|
||||||
|
|
||||||
// Special handling for backend_type field
|
|
||||||
if (fieldKey === 'backend_type') {
|
|
||||||
return (
|
|
||||||
<SelectInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] || BackendType.LLAMA_CPP}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
options={[
|
|
||||||
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
|
|
||||||
{ value: BackendType.MLX_LM, label: 'MLX LM' },
|
|
||||||
{ value: BackendType.VLLM, label: 'vLLM' }
|
|
||||||
]}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Render based on field type
|
|
||||||
switch (fieldType) {
|
|
||||||
case 'boolean':
|
|
||||||
return (
|
|
||||||
<CheckboxInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] as boolean | undefined}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'number':
|
|
||||||
return (
|
|
||||||
<NumberInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] as number | undefined}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
|
|
||||||
default:
|
|
||||||
return (
|
|
||||||
<TextInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] as string | number | undefined}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter out auto restart fields and backend_options (handled separately)
|
|
||||||
const fieldsToRender = basicFields.filter(
|
|
||||||
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
|
|
||||||
)
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h3 className="text-lg font-medium">Basic Configuration</h3>
|
|
||||||
{fieldsToRender.map(renderField)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
export default BasicInstanceFields
|
|
||||||
@@ -6,6 +6,7 @@ import { Input } from '@/components/ui/input'
|
|||||||
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
|
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
|
||||||
import NumberInput from '@/components/form/NumberInput'
|
import NumberInput from '@/components/form/NumberInput'
|
||||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||||
|
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
|
||||||
|
|
||||||
interface InstanceSettingsCardProps {
|
interface InstanceSettingsCardProps {
|
||||||
instanceName: string
|
instanceName: string
|
||||||
@@ -75,6 +76,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
|||||||
onChange={(value) => onChange('on_demand_start', value)}
|
onChange={(value) => onChange('on_demand_start', value)}
|
||||||
description="Start instance only when needed"
|
description="Start instance only when needed"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<EnvironmentVariablesInput
|
||||||
|
id="environment"
|
||||||
|
label="Environment Variables"
|
||||||
|
value={formData.environment}
|
||||||
|
onChange={(value) => onChange('environment', value)}
|
||||||
|
description="Custom environment variables for the instance"
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
|
|||||||
@@ -1,12 +1,10 @@
|
|||||||
import {
|
import {
|
||||||
type CreateInstanceOptions,
|
|
||||||
type LlamaCppBackendOptions,
|
type LlamaCppBackendOptions,
|
||||||
type MlxBackendOptions,
|
type MlxBackendOptions,
|
||||||
type VllmBackendOptions,
|
type VllmBackendOptions,
|
||||||
LlamaCppBackendOptionsSchema,
|
LlamaCppBackendOptionsSchema,
|
||||||
MlxBackendOptionsSchema,
|
MlxBackendOptionsSchema,
|
||||||
VllmBackendOptionsSchema,
|
VllmBackendOptionsSchema,
|
||||||
getAllFieldKeys,
|
|
||||||
getAllLlamaCppFieldKeys,
|
getAllLlamaCppFieldKeys,
|
||||||
getAllMlxFieldKeys,
|
getAllMlxFieldKeys,
|
||||||
getAllVllmFieldKeys,
|
getAllVllmFieldKeys,
|
||||||
@@ -15,41 +13,6 @@ import {
|
|||||||
getVllmFieldType
|
getVllmFieldType
|
||||||
} from '@/schemas/instanceOptions'
|
} from '@/schemas/instanceOptions'
|
||||||
|
|
||||||
// Instance-level basic fields (not backend-specific)
|
|
||||||
export const basicFieldsConfig: Record<string, {
|
|
||||||
label: string
|
|
||||||
description?: string
|
|
||||||
placeholder?: string
|
|
||||||
}> = {
|
|
||||||
auto_restart: {
|
|
||||||
label: 'Auto Restart',
|
|
||||||
description: 'Automatically restart the instance on failure'
|
|
||||||
},
|
|
||||||
max_restarts: {
|
|
||||||
label: 'Max Restarts',
|
|
||||||
placeholder: '3',
|
|
||||||
description: 'Maximum number of restart attempts (0 = unlimited)'
|
|
||||||
},
|
|
||||||
restart_delay: {
|
|
||||||
label: 'Restart Delay (seconds)',
|
|
||||||
placeholder: '5',
|
|
||||||
description: 'Delay in seconds before attempting restart'
|
|
||||||
},
|
|
||||||
idle_timeout: {
|
|
||||||
label: 'Idle Timeout (minutes)',
|
|
||||||
placeholder: '60',
|
|
||||||
description: 'Time in minutes before instance is considered idle and stopped'
|
|
||||||
},
|
|
||||||
on_demand_start: {
|
|
||||||
label: 'On-Demand Start',
|
|
||||||
description: 'Start instance upon receiving OpenAI-compatible API request'
|
|
||||||
},
|
|
||||||
backend_type: {
|
|
||||||
label: 'Backend Type',
|
|
||||||
description: 'Type of backend to use for this instance'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaCpp backend-specific basic fields
|
// LlamaCpp backend-specific basic fields
|
||||||
const basicLlamaCppFieldsConfig: Record<string, {
|
const basicLlamaCppFieldsConfig: Record<string, {
|
||||||
label: string
|
label: string
|
||||||
@@ -152,18 +115,6 @@ const backendFieldGetters = {
|
|||||||
llama_cpp: getAllLlamaCppFieldKeys,
|
llama_cpp: getAllLlamaCppFieldKeys,
|
||||||
} as const
|
} as const
|
||||||
|
|
||||||
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
|
||||||
return key in basicFieldsConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
|
|
||||||
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
|
|
||||||
return getAllFieldKeys().filter(key => !isBasicField(key))
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getBasicBackendFields(backendType?: string): string[] {
|
export function getBasicBackendFields(backendType?: string): string[] {
|
||||||
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
|
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
|
||||||
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
||||||
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
|
|||||||
return 'text'
|
return 'text'
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-export the Zod-based functions
|
|
||||||
export { getFieldType } from '@/schemas/instanceOptions'
|
|
||||||
@@ -33,6 +33,9 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
idle_timeout: z.number().optional(),
|
idle_timeout: z.number().optional(),
|
||||||
on_demand_start: z.boolean().optional(),
|
on_demand_start: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Environment variables
|
||||||
|
environment: z.record(z.string(), z.string()).optional(),
|
||||||
|
|
||||||
// Backend configuration
|
// Backend configuration
|
||||||
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
||||||
backend_options: BackendOptionsSchema.optional(),
|
backend_options: BackendOptionsSchema.optional(),
|
||||||
@@ -75,5 +78,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
|
|||||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
if (innerSchema instanceof z.ZodObject) return 'object'
|
if (innerSchema instanceof z.ZodObject) return 'object'
|
||||||
|
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
|
||||||
return 'text' // ZodString and others default to text
|
return 'text' // ZodString and others default to text
|
||||||
}
|
}
|
||||||
@@ -23,4 +23,5 @@ export interface Instance {
|
|||||||
name: string;
|
name: string;
|
||||||
status: InstanceStatus;
|
status: InstanceStatus;
|
||||||
options?: CreateInstanceOptions;
|
options?: CreateInstanceOptions;
|
||||||
|
docker_enabled?: boolean; // indicates backend is running via Docker
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user