24 Commits

Author SHA1 Message Date
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
a824f066ec Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
2025-09-25 23:07:24 +02:00
2cd9d374a7 Add Docker badge to UI 2025-09-25 23:04:24 +02:00
031d6c7017 Update Docker command arguments for llama-server and vllm with volume mounts 2025-09-25 22:51:51 +02:00
282344af23 Fix docker command args building 2025-09-25 22:51:40 +02:00
bc9e0535c3 Refactor command building and argument handling 2025-09-25 22:05:46 +02:00
2d925b473d Add Docker support documentation and configuration for backends 2025-09-24 22:15:21 +02:00
ba0f877185 Fix tests 2025-09-24 21:35:44 +02:00
840a7bc650 Add Docker command handling for backend options and refactor command building 2025-09-24 21:34:54 +02:00
76ac93bedc Implement Docker command handling for Llama, MLX, and vLLM backends 2025-09-24 21:31:58 +02:00
72d2a601c8 Update Docker args in LoadConfig and tests to include 'run --rm' prefix 2025-09-24 21:27:51 +02:00
9a56660f68 Refactor backend configuration to use structured settings and update environment variable handling 2025-09-24 20:31:20 +02:00
28 changed files with 1075 additions and 454 deletions

View File

@@ -14,6 +14,7 @@
### 🔗 Universal Compatibility ### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Support**: Run backends in containers
### 🌐 User-Friendly Interface ### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -22,6 +23,7 @@
### ⚡ Smart Operations ### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management - **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
![Dashboard Screenshot](docs/images/dashboard.png) ![Dashboard Screenshot](docs/images/dashboard.png)
@@ -32,6 +34,7 @@
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm # For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm # For vLLM: pip install vllm
# Or use Docker - no local installation required
# 2. Download and run llamactl # 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -50,7 +53,8 @@ llamactl
2. Click "Create Instance" 2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM) 3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options 4. Set model path and backend-specific options
5. Start or stop the instance 5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API: ### Or use the REST API:
```bash ```bash
@@ -64,10 +68,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance # Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \ curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}' -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK # Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \ curl -X POST localhost:8080/v1/chat/completions \
@@ -112,6 +116,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
brew install llama.cpp brew install llama.cpp
# Or build from source - see llama.cpp docs # Or build from source - see llama.cpp docs
# Or use Docker - no local installation required
``` ```
**For MLX backend (macOS only):** **For MLX backend (macOS only):**
@@ -139,9 +144,27 @@ python -m venv vllm-env
source vllm-env/bin/activate source vllm-env/bin/activate
pip install vllm pip install vllm
# For production deployments, consider container-based installation # Or use Docker - no local installation required
``` ```
## Docker Support
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
```yaml
backends:
llama-cpp:
docker:
enabled: true
vllm:
docker:
enabled: true
```
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
## Configuration ## Configuration
llamactl works out of the box with sensible defaults. llamactl works out of the box with sensible defaults.
@@ -154,9 +177,30 @@ server:
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends: backends:
llama_executable: llama-server # Path to llama-server executable llama-cpp:
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable command: "llama-server"
vllm_executable: vllm # Path to vllm executable args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} # Environment variables for the container
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} # Environment variables for the container
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances

View File

@@ -20,9 +20,30 @@ server:
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends: backends:
llama_executable: llama-server # Path to llama-server executable llama-cpp:
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable command: "llama-server"
vllm_executable: vllm # Path to vllm executable args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
@@ -90,18 +111,69 @@ server:
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false) - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration ### Backend Configuration
```yaml ```yaml
backends: backends:
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server") llama-cpp:
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server") command: "llama-server"
vllm_executable: "vllm" # Path to vllm executable (default: "vllm") args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
``` ```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable **LlamaCpp Backend:**
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable - `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
### Instance Configuration ### Instance Configuration

View File

@@ -88,6 +88,21 @@ Here are basic example configurations for each backend:
} }
``` ```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API ## Using the API
You can also manage instances via the REST API: You can also manage instances via the REST API:

View File

@@ -116,7 +116,18 @@ Create and start a new instance.
POST /api/v1/instances/{name} POST /api/v1/instances/{name}
``` ```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options. **Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:** **Response:**
```json ```json
@@ -354,7 +365,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \ -H "Authorization: Bearer your-api-key" \
-d '{ -d '{
"model": "/models/llama-2-7b.gguf" "backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}' }'
# Check instance status # Check instance status

View File

@@ -53,6 +53,7 @@ Each instance is displayed as a card showing:
- **Restart Delay**: Delay in seconds between restart attempts - **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
6. Configure backend-specific options: 6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc. - **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
@@ -101,7 +102,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
"gpu_memory_utilization": 0.9 "gpu_memory_utilization": 0.9
}, },
"auto_restart": true, "auto_restart": true,
"on_demand_start": true "on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
}' }'
# Create llama.cpp instance with HuggingFace model # Create llama.cpp instance with HuggingFace model

View File

@@ -1,6 +1,8 @@
package backends package backends
import ( import (
"fmt"
"llamactl/pkg/config"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
@@ -68,3 +70,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
return args return args
} }
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
// Start with configured Docker arguments (should include "run", "--rm", etc.)
dockerArgs := make([]string, len(backendConfig.Docker.Args))
copy(dockerArgs, backendConfig.Docker.Args)
// Add environment variables
for key, value := range backendConfig.Docker.Environment {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image name
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
// Add backend args and instance args
dockerArgs = append(dockerArgs, backendConfig.Args...)
dockerArgs = append(dockerArgs, instanceArgs...)
return "docker", dockerArgs, nil
}

View File

@@ -7,6 +7,28 @@ import (
"strconv" "strconv"
) )
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes)
var multiValuedFlags = map[string]bool{
// Parsing keys (with underscores)
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
type LlamaServerOptions struct { type LlamaServerOptions struct {
// Common params // Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"` VerbosePrompt bool `json:"verbose_prompt,omitempty"`
@@ -316,17 +338,13 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments // BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string { func (o *LlamaServerOptions) BuildCommandArgs() []string {
// Llama uses multiple flags for arrays by default (not comma-separated) // Llama uses multiple flags for arrays by default (not comma-separated)
multipleFlags := map[string]bool{ // Use package-level multiValuedFlags variable
"override-tensor": true, return backends.BuildCommandArgs(o, multiValuedFlags)
"override-kv": true, }
"lora": true,
"lora-scaled": true, func (o *LlamaServerOptions) BuildDockerArgs() []string {
"control-vector": true, // For llama, Docker args are the same as normal args
"control-vector-scaled": true, return o.BuildCommandArgs()
"dry-sequence-breaker": true,
"logit-bias": true,
}
return backends.BuildCommandArgs(o, multipleFlags)
} }
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
@@ -338,16 +356,7 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) { func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"} executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands var subcommandNames []string // Llama has no subcommands
multiValuedFlags := map[string]bool{ // Use package-level multiValuedFlags variable
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
}
var llamaOptions LlamaServerOptions var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil { if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {

View File

@@ -4,6 +4,15 @@ import (
"llamactl/pkg/backends" "llamactl/pkg/backends"
) )
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var multiValuedFlags = map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
type VllmServerOptions struct { type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl) // Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"` Host string `json:"host,omitempty"`
@@ -131,30 +140,32 @@ type VllmServerOptions struct {
} }
// BuildCommandArgs converts VllmServerOptions to command line arguments // BuildCommandArgs converts VllmServerOptions to command line arguments
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level // For vLLM native, model is a positional argument after "serve"
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
func (o *VllmServerOptions) BuildCommandArgs() []string { func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string var args []string
// Add model as positional argument if specified // Add model as positional argument if specified (for native execution)
if o.Model != "" { if o.Model != "" {
args = append(args, o.Model) args = append(args, o.Model)
} }
// Create a copy of the options without the Model field to avoid including it as --model flag // Create a copy without Model field to avoid --model flag
optionsCopy := *o optionsCopy := *o
optionsCopy.Model = "" // Clear model field so it won't be included as a flag optionsCopy.Model = ""
multipleFlags := map[string]bool{ // Use package-level multipleFlags variable
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
// Build the rest of the arguments as flags flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags) args = append(args, flagArgs...)
return args
}
func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
args = append(args, flagArgs...) args = append(args, flagArgs...)
return args return args

View File

@@ -1,6 +1,7 @@
package config package config
import ( import (
"log"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
@@ -10,16 +11,27 @@ import (
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled"`
Image string `yaml:"image"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations // BackendConfig contains backend executable configurations
type BackendConfig struct { type BackendConfig struct {
// Path to llama-server executable (llama.cpp backend) LlamaCpp BackendSettings `yaml:"llama-cpp"`
LlamaExecutable string `yaml:"llama_executable"` VLLM BackendSettings `yaml:"vllm"`
MLX BackendSettings `yaml:"mlx"`
// Path to mlx_lm executable (MLX-LM backend)
MLXLMExecutable string `yaml:"mlx_lm_executable"`
// Path to vllm executable (vLLM backend)
VllmExecutable string `yaml:"vllm_executable"`
} }
// AppConfig represents the configuration for llamactl // AppConfig represents the configuration for llamactl
@@ -123,15 +135,45 @@ func LoadConfig(configPath string) (AppConfig, error) {
EnableSwagger: false, EnableSwagger: false,
}, },
Backends: BackendConfig{ Backends: BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
VllmExecutable: "vllm", Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
}, },
Instances: InstancesConfig{ Instances: InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(), DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"), // NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), // should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true, AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited MaxRunningInstances: -1, // -1 means unlimited
@@ -159,6 +201,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables // 3. Override with environment variables
loadEnvVars(&cfg) loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
return cfg, nil return cfg, nil
} }
@@ -179,6 +229,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
if err := yaml.Unmarshal(data, cfg); err != nil { if err := yaml.Unmarshal(data, cfg); err != nil {
return err return err
} }
log.Printf("Read config at %s", path)
return nil return nil
} }
} }
@@ -244,15 +295,107 @@ func loadEnvVars(cfg *AppConfig) {
} }
} }
// Backend config // Backend config
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { // LlamaCpp backend
cfg.Backends.LlamaExecutable = llamaExec if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
} }
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" { if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.MLXLMExecutable = mlxLMExec cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
} }
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" { if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
cfg.Backends.VllmExecutable = vllmExec if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
} }
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil { if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b cfg.Instances.DefaultAutoRestart = b
@@ -325,6 +468,19 @@ func ParsePortRange(s string) [2]int {
return [2]int{0, 0} // Invalid format return [2]int{0, 0} // Invalid format
} }
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory // getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string { func getDefaultDataDirectory() string {
switch runtime.GOOS { switch runtime.GOOS {
@@ -357,6 +513,10 @@ func getDefaultDataDirectory() string {
// getDefaultConfigLocations returns platform-specific config file locations // getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string { func getDefaultConfigLocations() []string {
var locations []string var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir() homeDir, _ := os.UserHomeDir()
switch runtime.GOOS { switch runtime.GOOS {
@@ -386,3 +546,17 @@ func getDefaultConfigLocations() []string {
return locations return locations
} }
// GetBackendSettings resolves backend settings
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return BackendSettings{}
}
}

View File

@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000", "LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOGS_DIR": "/env/logs", "LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20", "LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false", "LLAMACTL_DEFAULT_AUTO_RESTART": "false",
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7", "LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
"LLAMACTL_DEFAULT_RESTART_DELAY": "15", "LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.MaxInstances != 20 { if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
} }
if cfg.Backends.LlamaExecutable != "/env/llama-server" { if cfg.Backends.LlamaCpp.Command != "llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable) t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
} }
if cfg.Instances.DefaultAutoRestart { if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false") t.Error("Expected auto restart to be false")
@@ -349,3 +348,165 @@ server:
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
} }
} }
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "custom-llama",
Args: []string{"--verbose"},
Docker: &config.DockerSettings{
Enabled: true,
Image: "custom-llama:latest",
Args: []string{"--gpus", "all"},
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
},
},
VLLM: config.BackendSettings{
Command: "custom-vllm",
Args: []string{"serve", "--debug"},
},
MLX: config.BackendSettings{
Command: "custom-mlx",
Args: []string{},
},
}
// Test llama-cpp with Docker
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
}
if settings.Docker == nil || !settings.Docker.Enabled {
t.Error("Expected Docker to be enabled")
}
if settings.Docker.Image != "custom-llama:latest" {
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
}
// Test vLLM without Docker
settings = bc.GetBackendSettings("vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
}
if settings.Docker != nil && settings.Docker.Enabled {
t.Error("Expected Docker to be disabled or nil")
}
// Test MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
bc := &config.BackendConfig{}
// Test empty llama-cpp
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty vLLM
settings = bc.GetBackendSettings("vllm")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
}
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
envVars := map[string]string{
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
"LLAMACTL_VLLM_COMMAND": "env-vllm",
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
"LLAMACTL_MLX_COMMAND": "env-mlx",
}
// Set env vars and ensure cleanup
for key, value := range envVars {
os.Setenv(key, value)
defer os.Unsetenv(key)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Verify llama-cpp environment overrides
if cfg.Backends.LlamaCpp.Command != "env-llama" {
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
}
expectedArgs := []string{"--verbose", "--threads", "4"}
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
}
if !cfg.Backends.LlamaCpp.Docker.Enabled {
t.Error("Expected llama Docker to be enabled")
}
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
}
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
}
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
}
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
}
// Verify vLLM environment overrides
if cfg.Backends.VLLM.Command != "env-vllm" {
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
}
if cfg.Backends.VLLM.Docker.Enabled {
t.Error("Expected vLLM Docker to be disabled")
}
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
}
// Verify MLX environment overrides
if cfg.Backends.MLX.Command != "env-mlx" {
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
}
}
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
// Test invalid backend type returns empty settings
settings := bc.GetBackendSettings("invalid-backend")
if settings.Command != "" {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
}

View File

@@ -221,14 +221,33 @@ func (i *Process) MarshalJSON() ([]byte, error) {
i.mu.RLock() i.mu.RLock()
defer i.mu.RUnlock() defer i.mu.RUnlock()
// Determine if docker is enabled for this instance's backend
var dockerEnabled bool
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeVllm:
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeMlxLm:
// MLX does not support docker currently
}
}
// Use anonymous struct to avoid recursion // Use anonymous struct to avoid recursion
type Alias Process type Alias Process
return json.Marshal(&struct { return json.Marshal(&struct {
*Alias *Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
}{ }{
Alias: (*Alias)(i), Alias: (*Alias)(i),
Options: i.options, Options: i.options,
DockerEnabled: dockerEnabled,
}) })
} }

View File

@@ -12,8 +12,18 @@ import (
func TestNewInstance(t *testing.T) { func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -66,8 +76,18 @@ func TestNewInstance(t *testing.T) {
func TestNewInstance_WithRestartOptions(t *testing.T) { func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -112,8 +132,18 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
func TestSetOptions(t *testing.T) { func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -163,8 +193,18 @@ func TestSetOptions(t *testing.T) {
func TestGetProxy(t *testing.T) { func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -205,8 +245,18 @@ func TestGetProxy(t *testing.T) {
func TestMarshalJSON(t *testing.T) { func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -364,8 +414,18 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
} }
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{

View File

@@ -11,6 +11,7 @@ import (
"time" "time"
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/config"
) )
// Start starts the llama server instance and returns an error if it fails. // Start starts the llama server instance and returns an error if it fails.
@@ -41,24 +42,14 @@ func (i *Process) Start() error {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
} }
args := i.options.BuildCommandArgs() // Build command using backend-specific methods
i.ctx, i.cancel = context.WithCancel(context.Background()) cmd, cmdErr := i.buildCommand()
if cmdErr != nil {
var executable string return fmt.Errorf("failed to build command: %w", cmdErr)
// Get executable from global configuration
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
executable = i.globalBackendSettings.LlamaExecutable
case backends.BackendTypeMlxLm:
executable = i.globalBackendSettings.MLXLMExecutable
case backends.BackendTypeVllm:
executable = i.globalBackendSettings.VllmExecutable
default:
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
} }
i.cmd = exec.CommandContext(i.ctx, executable, args...) i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = cmd
if runtime.GOOS != "windows" { if runtime.GOOS != "windows" {
setProcAttrs(i.cmd) setProcAttrs(i.cmd)
@@ -372,3 +363,49 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i
return true, maxRestarts, restartDelay return true, maxRestarts, restartDelay
} }
// buildCommand builds the command to execute using backend-specific logic
func (i *Process) buildCommand() (*exec.Cmd, error) {
// Get backend configuration
backendConfig, err := i.getBackendConfig()
if err != nil {
return nil, err
}
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute
command := i.options.GetCommand(backendConfig)
// Build command arguments
args := i.options.BuildCommandArgs(backendConfig)
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
cmd.Env = []string{}
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}
// getBackendConfig resolves the backend configuration for the current instance
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
var backendTypeStr string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
backendTypeStr = "llama-cpp"
case backends.BackendTypeMlxLm:
backendTypeStr = "mlx"
case backends.BackendTypeVllm:
backendTypeStr = "vllm"
default:
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
return &settings, nil
}

View File

@@ -9,6 +9,7 @@ import (
"llamactl/pkg/backends/vllm" "llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
"maps"
) )
type CreateInstanceOptions struct { type CreateInstanceOptions struct {
@@ -20,6 +21,8 @@ type CreateInstanceOptions struct {
OnDemandStart *bool `json:"on_demand_start,omitempty"` OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout // Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"` BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"` BackendOptions map[string]any `json:"backend_options,omitempty"`
@@ -188,24 +191,75 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
} }
} }
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
return "docker"
}
return backendConfig.Command
}
// BuildCommandArgs builds command line arguments for the backend // BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string { func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp: var args []string
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs() if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
// For Docker, start with Docker args
args = append(args, backendConfig.Docker.Args...)
args = append(args, backendConfig.Docker.Image)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
}
} }
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil { } else {
return c.MlxServerOptions.BuildCommandArgs() // For native execution, start with backend args
} args = append(args, backendConfig.Args...)
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil { switch c.BackendType {
// Prepend "serve" as first argument case backends.BackendTypeLlamaCpp:
args := []string{"serve"} if c.LlamaServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...) args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
return args }
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
}
} }
} }
return []string{}
return args
}
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
} }

View File

@@ -34,8 +34,12 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
func TestUpdateLastRequestTime(t *testing.T) { func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -60,8 +64,12 @@ func TestUpdateLastRequestTime(t *testing.T) {
func TestShouldTimeout_NotRunning(t *testing.T) { func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -90,8 +98,12 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -133,8 +145,12 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
func TestShouldTimeout_WithinTimeLimit(t *testing.T) { func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -167,8 +183,12 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -207,8 +227,12 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
func TestTimeoutConfiguration_Validation(t *testing.T) { func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{

View File

@@ -16,8 +16,12 @@ import (
func TestNewInstanceManager(t *testing.T) { func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
@@ -49,8 +53,12 @@ func TestPersistence(t *testing.T) {
tempDir := t.TempDir() tempDir := t.TempDir()
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
@@ -182,8 +190,12 @@ func TestShutdown(t *testing.T) {
// Helper function to create a test manager with standard config // Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager { func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{

View File

@@ -63,8 +63,12 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test max instances limit // Test max instances limit
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},

View File

@@ -34,7 +34,7 @@ func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock() im.mu.RLock()
var lruInstance *instance.Process var lruInstance *instance.Process
for name, _ := range im.runningInstances { for name := range im.runningInstances {
inst := im.instances[name] inst := im.instances[name]
if inst == nil { if inst == nil {
continue continue

View File

@@ -14,8 +14,8 @@ import (
func TestTimeoutFunctionality(t *testing.T) { func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization // Test timeout checker initialization
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{Command: "llama-server"},
MLXLMExecutable: "mlx_lm.server", MLX: config.BackendSettings{Command: "mlx_lm.server"},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},

View File

@@ -1,13 +1,14 @@
import React from "react"; import React from "react";
import { Badge } from "@/components/ui/badge"; import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance"; import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Server } from "lucide-react"; import { Server, Package } from "lucide-react";
interface BackendBadgeProps { interface BackendBadgeProps {
backend?: BackendTypeValue; backend?: BackendTypeValue;
docker?: boolean;
} }
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => { const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
if (!backend) { if (!backend) {
return null; return null;
} }
@@ -39,13 +40,25 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
}; };
return ( return (
<Badge <div className="flex items-center gap-1">
variant="outline" <Badge
className={`flex items-center gap-1.5 ${getColorClasses()}`} variant="outline"
> className={`flex items-center gap-1.5 ${getColorClasses()}`}
<Server className="h-3 w-3" /> >
<span className="text-xs">{getText()}</span> <Server className="h-3 w-3" />
</Badge> <span className="text-xs">{getText()}</span>
</Badge>
{docker && (
<Badge
variant="outline"
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
title="Docker enabled"
>
<Package className="h-3 w-3" />
<span className="text-[10px] uppercase tracking-wide">Docker</span>
</Badge>
)}
</div>
); );
}; };

View File

@@ -66,7 +66,7 @@ function InstanceCard({
{/* Badges row */} {/* Badges row */}
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} /> <BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
{running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
</div> </div>
</div> </div>

View File

@@ -1,144 +0,0 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue)
}
const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default ZodFormField

View File

@@ -0,0 +1,144 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -1,99 +0,0 @@
import React from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import SelectInput from '@/components/form/SelectInput'
interface BasicInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
formData,
onChange
}) => {
const basicFields = getBasicFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
// Special handling for backend_type field
if (fieldKey === 'backend_type') {
return (
<SelectInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] || BackendType.LLAMA_CPP}
onChange={(value) => onChange(fieldKey, value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description={config.description}
/>
)
}
// Render based on field type
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out auto restart fields and backend_options (handled separately)
const fieldsToRender = basicFields.filter(
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{fieldsToRender.map(renderField)}
</div>
)
}
export default BasicInstanceFields

View File

@@ -6,6 +6,7 @@ import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration' import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput' import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput' import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
interface InstanceSettingsCardProps { interface InstanceSettingsCardProps {
instanceName: string instanceName: string
@@ -75,6 +76,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onChange={(value) => onChange('on_demand_start', value)} onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed" description="Start instance only when needed"
/> />
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div> </div>
</CardContent> </CardContent>
</Card> </Card>

View File

@@ -1,12 +1,10 @@
import { import {
type CreateInstanceOptions,
type LlamaCppBackendOptions, type LlamaCppBackendOptions,
type MlxBackendOptions, type MlxBackendOptions,
type VllmBackendOptions, type VllmBackendOptions,
LlamaCppBackendOptionsSchema, LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema, MlxBackendOptionsSchema,
VllmBackendOptionsSchema, VllmBackendOptionsSchema,
getAllFieldKeys,
getAllLlamaCppFieldKeys, getAllLlamaCppFieldKeys,
getAllMlxFieldKeys, getAllMlxFieldKeys,
getAllVllmFieldKeys, getAllVllmFieldKeys,
@@ -15,41 +13,6 @@ import {
getVllmFieldType getVllmFieldType
} from '@/schemas/instanceOptions' } from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
auto_restart: {
label: 'Auto Restart',
description: 'Automatically restart the instance on failure'
},
max_restarts: {
label: 'Max Restarts',
placeholder: '3',
description: 'Maximum number of restart attempts (0 = unlimited)'
},
restart_delay: {
label: 'Restart Delay (seconds)',
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// LlamaCpp backend-specific basic fields // LlamaCpp backend-specific basic fields
const basicLlamaCppFieldsConfig: Record<string, { const basicLlamaCppFieldsConfig: Record<string, {
label: string label: string
@@ -152,18 +115,6 @@ const backendFieldGetters = {
llama_cpp: getAllLlamaCppFieldKeys, llama_cpp: getAllLlamaCppFieldKeys,
} as const } as const
function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
}
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
}
export function getBasicBackendFields(backendType?: string): string[] { export function getBasicBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
return 'text' return 'text'
} }
// Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions'

View File

@@ -33,6 +33,9 @@ export const CreateInstanceOptionsSchema = z.object({
idle_timeout: z.number().optional(), idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(), on_demand_start: z.boolean().optional(),
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
// Backend configuration // Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(), backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(), backend_options: BackendOptionsSchema.optional(),
@@ -75,5 +78,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object' if (innerSchema instanceof z.ZodObject) return 'object'
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
return 'text' // ZodString and others default to text return 'text' // ZodString and others default to text
} }

View File

@@ -23,4 +23,5 @@ export interface Instance {
name: string; name: string;
status: InstanceStatus; status: InstanceStatus;
options?: CreateInstanceOptions; options?: CreateInstanceOptions;
docker_enabled?: boolean; // indicates backend is running via Docker
} }