43 Commits

Author SHA1 Message Date
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
a824f066ec Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
2025-09-25 23:07:24 +02:00
2cd9d374a7 Add Docker badge to UI 2025-09-25 23:04:24 +02:00
031d6c7017 Update Docker command arguments for llama-server and vllm with volume mounts 2025-09-25 22:51:51 +02:00
282344af23 Fix docker command args building 2025-09-25 22:51:40 +02:00
bc9e0535c3 Refactor command building and argument handling 2025-09-25 22:05:46 +02:00
2d925b473d Add Docker support documentation and configuration for backends 2025-09-24 22:15:21 +02:00
ba0f877185 Fix tests 2025-09-24 21:35:44 +02:00
840a7bc650 Add Docker command handling for backend options and refactor command building 2025-09-24 21:34:54 +02:00
76ac93bedc Implement Docker command handling for Llama, MLX, and vLLM backends 2025-09-24 21:31:58 +02:00
72d2a601c8 Update Docker args in LoadConfig and tests to include 'run --rm' prefix 2025-09-24 21:27:51 +02:00
9a56660f68 Refactor backend configuration to use structured settings and update environment variable handling 2025-09-24 20:31:20 +02:00
38 changed files with 1452 additions and 462 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

View File

@@ -14,6 +14,7 @@
### 🔗 Universal Compatibility ### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Support**: Run backends in containers
### 🌐 User-Friendly Interface ### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -22,6 +23,7 @@
### ⚡ Smart Operations ### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management - **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
![Dashboard Screenshot](docs/images/dashboard.png) ![Dashboard Screenshot](docs/images/dashboard.png)
@@ -32,6 +34,7 @@
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm # For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm # For vLLM: pip install vllm
# Or use Docker - no local installation required
# 2. Download and run llamactl # 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -50,7 +53,8 @@ llamactl
2. Click "Create Instance" 2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM) 3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options 4. Set model path and backend-specific options
5. Start or stop the instance 5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API: ### Or use the REST API:
```bash ```bash
@@ -64,10 +68,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance # Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \ curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}' -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK # Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \ curl -X POST localhost:8080/v1/chat/completions \
@@ -91,7 +95,30 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page # Windows - Download from releases page
``` ```
### Option 2: Build from Source ### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+ Requires Go 1.24+ and Node.js 22+
```bash ```bash
git clone https://github.com/lordmathis/llamactl.git git clone https://github.com/lordmathis/llamactl.git
@@ -112,6 +139,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
brew install llama.cpp brew install llama.cpp
# Or build from source - see llama.cpp docs # Or build from source - see llama.cpp docs
# Or use Docker - no local installation required
``` ```
**For MLX backend (macOS only):** **For MLX backend (macOS only):**
@@ -139,9 +167,27 @@ python -m venv vllm-env
source vllm-env/bin/activate source vllm-env/bin/activate
pip install vllm pip install vllm
# For production deployments, consider container-based installation # Or use Docker - no local installation required
``` ```
## Backend Docker Support
llamactl can run backends in Docker containers:
```yaml
backends:
llama-cpp:
docker:
enabled: true
vllm:
docker:
enabled: true
```
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
## Configuration ## Configuration
llamactl works out of the box with sensible defaults. llamactl works out of the box with sensible defaults.
@@ -154,9 +200,30 @@ server:
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends: backends:
llama_executable: llama-server # Path to llama-server executable llama-cpp:
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable command: "llama-server"
vllm_executable: vllm # Path to vllm executable args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} # Environment variables for the container
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} # Environment variables for the container
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY apidocs/ ./apidocs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

View File

@@ -20,9 +20,33 @@ server:
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends: backends:
llama_executable: llama-server # Path to llama-server executable llama-cpp:
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable command: "llama-server"
vllm_executable: vllm # Path to vllm executable args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
@@ -90,18 +114,78 @@ server:
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false) - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration ### Backend Configuration
```yaml ```yaml
backends: backends:
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server") llama-cpp:
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server") command: "llama-server"
vllm_executable: "vllm" # Path to vllm executable (default: "vllm") args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
``` ```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable **LlamaCpp Backend:**
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable - `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Instance Configuration ### Instance Configuration

View File

@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page # Windows - Download from releases page
``` ```
### Option 2: Build from Source ### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
#### Using Docker Compose
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
#### Using Docker Build and Run
**llamactl with llama.cpp CUDA:**
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
**llamactl with vLLM CUDA:**
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
**llamactl built from source:**
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements: Requirements:
- Go 1.24 or later - Go 1.24 or later

View File

@@ -88,6 +88,21 @@ Here are basic example configurations for each backend:
} }
``` ```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API ## Using the API
You can also manage instances via the REST API: You can also manage instances via the REST API:

View File

@@ -116,7 +116,18 @@ Create and start a new instance.
POST /api/v1/instances/{name} POST /api/v1/instances/{name}
``` ```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options. **Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:** **Response:**
```json ```json
@@ -354,7 +365,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \ -H "Authorization: Bearer your-api-key" \
-d '{ -d '{
"model": "/models/llama-2-7b.gguf" "backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}' }'
# Check instance status # Check instance status

View File

@@ -53,6 +53,7 @@ Each instance is displayed as a card showing:
- **Restart Delay**: Delay in seconds between restart attempts - **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
6. Configure backend-specific options: 6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc. - **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
@@ -101,7 +102,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
"gpu_memory_utilization": 0.9 "gpu_memory_utilization": 0.9
}, },
"auto_restart": true, "auto_restart": true,
"on_demand_start": true "on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
}' }'
# Create llama.cpp instance with HuggingFace model # Create llama.cpp instance with HuggingFace model

View File

@@ -1,6 +1,8 @@
package backends package backends
import ( import (
"fmt"
"llamactl/pkg/config"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
@@ -68,3 +70,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
return args return args
} }
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
// Start with configured Docker arguments (should include "run", "--rm", etc.)
dockerArgs := make([]string, len(backendConfig.Docker.Args))
copy(dockerArgs, backendConfig.Docker.Args)
// Add environment variables
for key, value := range backendConfig.Docker.Environment {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image name
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
// Add backend args and instance args
dockerArgs = append(dockerArgs, backendConfig.Args...)
dockerArgs = append(dockerArgs, instanceArgs...)
return "docker", dockerArgs, nil
}

View File

@@ -7,6 +7,28 @@ import (
"strconv" "strconv"
) )
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes)
var multiValuedFlags = map[string]bool{
// Parsing keys (with underscores)
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
type LlamaServerOptions struct { type LlamaServerOptions struct {
// Common params // Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"` VerbosePrompt bool `json:"verbose_prompt,omitempty"`
@@ -316,17 +338,13 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments // BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string { func (o *LlamaServerOptions) BuildCommandArgs() []string {
// Llama uses multiple flags for arrays by default (not comma-separated) // Llama uses multiple flags for arrays by default (not comma-separated)
multipleFlags := map[string]bool{ // Use package-level multiValuedFlags variable
"override-tensor": true, return backends.BuildCommandArgs(o, multiValuedFlags)
"override-kv": true, }
"lora": true,
"lora-scaled": true, func (o *LlamaServerOptions) BuildDockerArgs() []string {
"control-vector": true, // For llama, Docker args are the same as normal args
"control-vector-scaled": true, return o.BuildCommandArgs()
"dry-sequence-breaker": true,
"logit-bias": true,
}
return backends.BuildCommandArgs(o, multipleFlags)
} }
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
@@ -338,16 +356,7 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) { func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"} executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands var subcommandNames []string // Llama has no subcommands
multiValuedFlags := map[string]bool{ // Use package-level multiValuedFlags variable
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
}
var llamaOptions LlamaServerOptions var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil { if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {

View File

@@ -4,6 +4,15 @@ import (
"llamactl/pkg/backends" "llamactl/pkg/backends"
) )
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var multiValuedFlags = map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
type VllmServerOptions struct { type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl) // Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"` Host string `json:"host,omitempty"`
@@ -131,30 +140,32 @@ type VllmServerOptions struct {
} }
// BuildCommandArgs converts VllmServerOptions to command line arguments // BuildCommandArgs converts VllmServerOptions to command line arguments
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level // For vLLM native, model is a positional argument after "serve"
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
func (o *VllmServerOptions) BuildCommandArgs() []string { func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string var args []string
// Add model as positional argument if specified // Add model as positional argument if specified (for native execution)
if o.Model != "" { if o.Model != "" {
args = append(args, o.Model) args = append(args, o.Model)
} }
// Create a copy of the options without the Model field to avoid including it as --model flag // Create a copy without Model field to avoid --model flag
optionsCopy := *o optionsCopy := *o
optionsCopy.Model = "" // Clear model field so it won't be included as a flag optionsCopy.Model = ""
multipleFlags := map[string]bool{ // Use package-level multipleFlags variable
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
// Build the rest of the arguments as flags flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags) args = append(args, flagArgs...)
return args
}
func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
args = append(args, flagArgs...) args = append(args, flagArgs...)
return args return args

View File

@@ -1,6 +1,7 @@
package config package config
import ( import (
"log"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
@@ -10,16 +11,28 @@ import (
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled"`
Image string `yaml:"image"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations // BackendConfig contains backend executable configurations
type BackendConfig struct { type BackendConfig struct {
// Path to llama-server executable (llama.cpp backend) LlamaCpp BackendSettings `yaml:"llama-cpp"`
LlamaExecutable string `yaml:"llama_executable"` VLLM BackendSettings `yaml:"vllm"`
MLX BackendSettings `yaml:"mlx"`
// Path to mlx_lm executable (MLX-LM backend)
MLXLMExecutable string `yaml:"mlx_lm_executable"`
// Path to vllm executable (vLLM backend)
VllmExecutable string `yaml:"vllm_executable"`
} }
// AppConfig represents the configuration for llamactl // AppConfig represents the configuration for llamactl
@@ -46,6 +59,9 @@ type ServerConfig struct {
// Enable Swagger UI for API documentation // Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"` EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
} }
// InstancesConfig contains instance management configuration // InstancesConfig contains instance management configuration
@@ -123,15 +139,45 @@ func LoadConfig(configPath string) (AppConfig, error) {
EnableSwagger: false, EnableSwagger: false,
}, },
Backends: BackendConfig{ Backends: BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
VllmExecutable: "vllm", Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
}, },
Instances: InstancesConfig{ Instances: InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(), DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"), // NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), // should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true, AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited MaxRunningInstances: -1, // -1 means unlimited
@@ -159,6 +205,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables // 3. Override with environment variables
loadEnvVars(&cfg) loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
return cfg, nil return cfg, nil
} }
@@ -179,6 +233,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
if err := yaml.Unmarshal(data, cfg); err != nil { if err := yaml.Unmarshal(data, cfg); err != nil {
return err return err
} }
log.Printf("Read config at %s", path)
return nil return nil
} }
} }
@@ -244,15 +299,125 @@ func loadEnvVars(cfg *AppConfig) {
} }
} }
// Backend config // Backend config
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { // LlamaCpp backend
cfg.Backends.LlamaExecutable = llamaExec if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
} }
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" { if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.MLXLMExecutable = mlxLMExec cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
} }
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" { if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
cfg.Backends.VllmExecutable = vllmExec if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
} }
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil { if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b cfg.Instances.DefaultAutoRestart = b
@@ -325,6 +490,32 @@ func ParsePortRange(s string) [2]int {
return [2]int{0, 0} // Invalid format return [2]int{0, 0} // Invalid format
} }
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory // getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string { func getDefaultDataDirectory() string {
switch runtime.GOOS { switch runtime.GOOS {
@@ -357,6 +548,10 @@ func getDefaultDataDirectory() string {
// getDefaultConfigLocations returns platform-specific config file locations // getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string { func getDefaultConfigLocations() []string {
var locations []string var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir() homeDir, _ := os.UserHomeDir()
switch runtime.GOOS { switch runtime.GOOS {
@@ -386,3 +581,17 @@ func getDefaultConfigLocations() []string {
return locations return locations
} }
// GetBackendSettings resolves backend settings
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return BackendSettings{}
}
}

View File

@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000", "LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOGS_DIR": "/env/logs", "LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20", "LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false", "LLAMACTL_DEFAULT_AUTO_RESTART": "false",
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7", "LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
"LLAMACTL_DEFAULT_RESTART_DELAY": "15", "LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.MaxInstances != 20 { if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
} }
if cfg.Backends.LlamaExecutable != "/env/llama-server" { if cfg.Backends.LlamaCpp.Command != "llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable) t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
} }
if cfg.Instances.DefaultAutoRestart { if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false") t.Error("Expected auto restart to be false")
@@ -349,3 +348,165 @@ server:
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
} }
} }
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "custom-llama",
Args: []string{"--verbose"},
Docker: &config.DockerSettings{
Enabled: true,
Image: "custom-llama:latest",
Args: []string{"--gpus", "all"},
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
},
},
VLLM: config.BackendSettings{
Command: "custom-vllm",
Args: []string{"serve", "--debug"},
},
MLX: config.BackendSettings{
Command: "custom-mlx",
Args: []string{},
},
}
// Test llama-cpp with Docker
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
}
if settings.Docker == nil || !settings.Docker.Enabled {
t.Error("Expected Docker to be enabled")
}
if settings.Docker.Image != "custom-llama:latest" {
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
}
// Test vLLM without Docker
settings = bc.GetBackendSettings("vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
}
if settings.Docker != nil && settings.Docker.Enabled {
t.Error("Expected Docker to be disabled or nil")
}
// Test MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
bc := &config.BackendConfig{}
// Test empty llama-cpp
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty vLLM
settings = bc.GetBackendSettings("vllm")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
}
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
envVars := map[string]string{
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
"LLAMACTL_VLLM_COMMAND": "env-vllm",
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
"LLAMACTL_MLX_COMMAND": "env-mlx",
}
// Set env vars and ensure cleanup
for key, value := range envVars {
os.Setenv(key, value)
defer os.Unsetenv(key)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Verify llama-cpp environment overrides
if cfg.Backends.LlamaCpp.Command != "env-llama" {
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
}
expectedArgs := []string{"--verbose", "--threads", "4"}
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
}
if !cfg.Backends.LlamaCpp.Docker.Enabled {
t.Error("Expected llama Docker to be enabled")
}
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
}
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
}
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
}
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
}
// Verify vLLM environment overrides
if cfg.Backends.VLLM.Command != "env-vllm" {
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
}
if cfg.Backends.VLLM.Docker.Enabled {
t.Error("Expected vLLM Docker to be disabled")
}
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
}
// Verify MLX environment overrides
if cfg.Backends.MLX.Command != "env-mlx" {
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
}
}
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
// Test invalid backend type returns empty settings
settings := bc.GetBackendSettings("invalid-backend")
if settings.Command != "" {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
}

View File

@@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(targetURL) proxy := httputil.NewSingleHostReverseProxy(targetURL)
var responseHeaders map[string]string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
case backends.BackendTypeVllm:
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
case backends.BackendTypeMlxLm:
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
}
proxy.ModifyResponse = func(resp *http.Response) error { proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts // Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers // llamactl will add its own CORS headers
@@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
resp.Header.Del("Access-Control-Allow-Credentials") resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age") resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers") resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range responseHeaders {
resp.Header.Set(key, value)
}
return nil return nil
} }
@@ -221,14 +234,33 @@ func (i *Process) MarshalJSON() ([]byte, error) {
i.mu.RLock() i.mu.RLock()
defer i.mu.RUnlock() defer i.mu.RUnlock()
// Determine if docker is enabled for this instance's backend
var dockerEnabled bool
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeVllm:
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeMlxLm:
// MLX does not support docker currently
}
}
// Use anonymous struct to avoid recursion // Use anonymous struct to avoid recursion
type Alias Process type Alias Process
return json.Marshal(&struct { return json.Marshal(&struct {
*Alias *Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
}{ }{
Alias: (*Alias)(i), Alias: (*Alias)(i),
Options: i.options, Options: i.options,
DockerEnabled: dockerEnabled,
}) })
} }

View File

@@ -12,8 +12,18 @@ import (
func TestNewInstance(t *testing.T) { func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -66,8 +76,18 @@ func TestNewInstance(t *testing.T) {
func TestNewInstance_WithRestartOptions(t *testing.T) { func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -112,8 +132,18 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
func TestSetOptions(t *testing.T) { func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -163,8 +193,18 @@ func TestSetOptions(t *testing.T) {
func TestGetProxy(t *testing.T) { func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -205,8 +245,18 @@ func TestGetProxy(t *testing.T) {
func TestMarshalJSON(t *testing.T) { func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -364,8 +414,18 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
} }
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{

View File

@@ -5,12 +5,14 @@ import (
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"os"
"os/exec" "os/exec"
"runtime" "runtime"
"syscall" "syscall"
"time" "time"
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/config"
) )
// Start starts the llama server instance and returns an error if it fails. // Start starts the llama server instance and returns an error if it fails.
@@ -36,29 +38,20 @@ func (i *Process) Start() error {
// Initialize last request time to current time when starting // Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix()) i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files // Create log files
if err := i.logger.Create(); err != nil { if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
} }
args := i.options.BuildCommandArgs() // Build command using backend-specific methods
i.ctx, i.cancel = context.WithCancel(context.Background()) cmd, cmdErr := i.buildCommand()
if cmdErr != nil {
var executable string return fmt.Errorf("failed to build command: %w", cmdErr)
// Get executable from global configuration
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
executable = i.globalBackendSettings.LlamaExecutable
case backends.BackendTypeMlxLm:
executable = i.globalBackendSettings.MLXLMExecutable
case backends.BackendTypeVllm:
executable = i.globalBackendSettings.VllmExecutable
default:
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
} }
i.cmd = cmd
i.cmd = exec.CommandContext(i.ctx, executable, args...)
if runtime.GOOS != "windows" { if runtime.GOOS != "windows" {
setProcAttrs(i.cmd) setProcAttrs(i.cmd)
@@ -372,3 +365,53 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i
return true, maxRestarts, restartDelay return true, maxRestarts, restartDelay
} }
// buildCommand builds the command to execute using backend-specific logic
func (i *Process) buildCommand() (*exec.Cmd, error) {
// Get backend configuration
backendConfig, err := i.getBackendConfig()
if err != nil {
return nil, err
}
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute
command := i.options.GetCommand(backendConfig)
// Build command arguments
args := i.options.BuildCommandArgs(backendConfig)
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}
// getBackendConfig resolves the backend configuration for the current instance
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
var backendTypeStr string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
backendTypeStr = "llama-cpp"
case backends.BackendTypeMlxLm:
backendTypeStr = "mlx"
case backends.BackendTypeVllm:
backendTypeStr = "vllm"
default:
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
return &settings, nil
}

View File

@@ -9,6 +9,7 @@ import (
"llamactl/pkg/backends/vllm" "llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
"maps"
) )
type CreateInstanceOptions struct { type CreateInstanceOptions struct {
@@ -20,6 +21,8 @@ type CreateInstanceOptions struct {
OnDemandStart *bool `json:"on_demand_start,omitempty"` OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout // Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"` BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"` BackendOptions map[string]any `json:"backend_options,omitempty"`
@@ -188,24 +191,75 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
} }
} }
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
return "docker"
}
return backendConfig.Command
}
// BuildCommandArgs builds command line arguments for the backend // BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string { func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp: var args []string
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs() if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
// For Docker, start with Docker args
args = append(args, backendConfig.Docker.Args...)
args = append(args, backendConfig.Docker.Image)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
}
} }
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil { } else {
return c.MlxServerOptions.BuildCommandArgs() // For native execution, start with backend args
} args = append(args, backendConfig.Args...)
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil { switch c.BackendType {
// Prepend "serve" as first argument case backends.BackendTypeLlamaCpp:
args := []string{"serve"} if c.LlamaServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...) args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
return args }
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
}
} }
} }
return []string{}
return args
}
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
} }

View File

@@ -34,8 +34,12 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
func TestUpdateLastRequestTime(t *testing.T) { func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -60,8 +64,12 @@ func TestUpdateLastRequestTime(t *testing.T) {
func TestShouldTimeout_NotRunning(t *testing.T) { func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -90,8 +98,12 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -133,8 +145,12 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
func TestShouldTimeout_WithinTimeLimit(t *testing.T) { func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -167,8 +183,12 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
@@ -207,8 +227,12 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
func TestTimeoutConfiguration_Validation(t *testing.T) { func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{

View File

@@ -16,8 +16,12 @@ import (
func TestNewInstanceManager(t *testing.T) { func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
@@ -49,8 +53,12 @@ func TestPersistence(t *testing.T) {
tempDir := t.TempDir() tempDir := t.TempDir()
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
@@ -182,8 +190,12 @@ func TestShutdown(t *testing.T) {
// Helper function to create a test manager with standard config // Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager { func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{

View File

@@ -63,8 +63,12 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test max instances limit // Test max instances limit
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{
MLXLMExecutable: "mlx_lm.server", Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},

View File

@@ -34,7 +34,7 @@ func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock() im.mu.RLock()
var lruInstance *instance.Process var lruInstance *instance.Process
for name, _ := range im.runningInstances { for name := range im.runningInstances {
inst := im.instances[name] inst := im.instances[name]
if inst == nil { if inst == nil {
continue continue

View File

@@ -14,8 +14,8 @@ import (
func TestTimeoutFunctionality(t *testing.T) { func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization // Test timeout checker initialization
backendConfig := config.BackendConfig{ backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server", LlamaCpp: config.BackendSettings{Command: "llama-server"},
MLXLMExecutable: "mlx_lm.server", MLX: config.BackendSettings{Command: "mlx_lm.server"},
} }
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},

View File

@@ -1,13 +1,14 @@
import React from "react"; import React from "react";
import { Badge } from "@/components/ui/badge"; import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance"; import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Server } from "lucide-react"; import { Server, Package } from "lucide-react";
interface BackendBadgeProps { interface BackendBadgeProps {
backend?: BackendTypeValue; backend?: BackendTypeValue;
docker?: boolean;
} }
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => { const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
if (!backend) { if (!backend) {
return null; return null;
} }
@@ -39,13 +40,25 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
}; };
return ( return (
<Badge <div className="flex items-center gap-1">
variant="outline" <Badge
className={`flex items-center gap-1.5 ${getColorClasses()}`} variant="outline"
> className={`flex items-center gap-1.5 ${getColorClasses()}`}
<Server className="h-3 w-3" /> >
<span className="text-xs">{getText()}</span> <Server className="h-3 w-3" />
</Badge> <span className="text-xs">{getText()}</span>
</Badge>
{docker && (
<Badge
variant="outline"
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
title="Docker enabled"
>
<Package className="h-3 w-3" />
<span className="text-[10px] uppercase tracking-wide">Docker</span>
</Badge>
)}
</div>
); );
}; };

View File

@@ -66,7 +66,7 @@ function InstanceCard({
{/* Badges row */} {/* Badges row */}
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} /> <BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
{running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
</div> </div>
</div> </div>

View File

@@ -1,144 +0,0 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue)
}
const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default ZodFormField

View File

@@ -0,0 +1,144 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -1,99 +0,0 @@
import React from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import SelectInput from '@/components/form/SelectInput'
interface BasicInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
formData,
onChange
}) => {
const basicFields = getBasicFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
// Special handling for backend_type field
if (fieldKey === 'backend_type') {
return (
<SelectInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] || BackendType.LLAMA_CPP}
onChange={(value) => onChange(fieldKey, value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description={config.description}
/>
)
}
// Render based on field type
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out auto restart fields and backend_options (handled separately)
const fieldsToRender = basicFields.filter(
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{fieldsToRender.map(renderField)}
</div>
)
}
export default BasicInstanceFields

View File

@@ -6,6 +6,7 @@ import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration' import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput' import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput' import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
interface InstanceSettingsCardProps { interface InstanceSettingsCardProps {
instanceName: string instanceName: string
@@ -75,6 +76,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onChange={(value) => onChange('on_demand_start', value)} onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed" description="Start instance only when needed"
/> />
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div> </div>
</CardContent> </CardContent>
</Card> </Card>

View File

@@ -1,4 +1,4 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react' import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState { interface AuthContextState {
isAuthenticated: boolean isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request // Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => { const validateApiKey = async (key: string): Promise<boolean> => {
try { try {
const response = await fetch('/api/v1/instances', { const response = await fetch(document.baseURI + 'api/v1/instances', {
headers: { headers: {
'Authorization': `Bearer ${key}`, 'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json' 'Content-Type': 'application/json'

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import { beforeEach, describe, expect, it, vi } from 'vitest'
// Mock fetch globally // Mock fetch globally
const mockFetch = vi.fn() const mockFetch = vi.fn()
@@ -53,7 +53,9 @@ describe('API Error Handling', () => {
await instancesApi.getLogs('test-instance', 100) await instancesApi.getLogs('test-instance', 100)
expect(mockFetch).toHaveBeenCalledWith( expect(mockFetch).toHaveBeenCalledWith(
'/api/v1/instances/test-instance/logs?lines=100', expect.stringMatching(
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
),
expect.any(Object) expect.any(Object)
) )
}) })

View File

@@ -1,7 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils"; import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1"; // Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling // Base API call function with error handling
async function apiCall<T>( async function apiCall<T>(

View File

@@ -1,12 +1,10 @@
import { import {
type CreateInstanceOptions,
type LlamaCppBackendOptions, type LlamaCppBackendOptions,
type MlxBackendOptions, type MlxBackendOptions,
type VllmBackendOptions, type VllmBackendOptions,
LlamaCppBackendOptionsSchema, LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema, MlxBackendOptionsSchema,
VllmBackendOptionsSchema, VllmBackendOptionsSchema,
getAllFieldKeys,
getAllLlamaCppFieldKeys, getAllLlamaCppFieldKeys,
getAllMlxFieldKeys, getAllMlxFieldKeys,
getAllVllmFieldKeys, getAllVllmFieldKeys,
@@ -15,41 +13,6 @@ import {
getVllmFieldType getVllmFieldType
} from '@/schemas/instanceOptions' } from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
auto_restart: {
label: 'Auto Restart',
description: 'Automatically restart the instance on failure'
},
max_restarts: {
label: 'Max Restarts',
placeholder: '3',
description: 'Maximum number of restart attempts (0 = unlimited)'
},
restart_delay: {
label: 'Restart Delay (seconds)',
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// LlamaCpp backend-specific basic fields // LlamaCpp backend-specific basic fields
const basicLlamaCppFieldsConfig: Record<string, { const basicLlamaCppFieldsConfig: Record<string, {
label: string label: string
@@ -152,18 +115,6 @@ const backendFieldGetters = {
llama_cpp: getAllLlamaCppFieldKeys, llama_cpp: getAllLlamaCppFieldKeys,
} as const } as const
function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
}
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
}
export function getBasicBackendFields(backendType?: string): string[] { export function getBasicBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
return 'text' return 'text'
} }
// Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions'

View File

@@ -33,6 +33,9 @@ export const CreateInstanceOptionsSchema = z.object({
idle_timeout: z.number().optional(), idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(), on_demand_start: z.boolean().optional(),
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
// Backend configuration // Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(), backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(), backend_options: BackendOptionsSchema.optional(),
@@ -75,5 +78,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object' if (innerSchema instanceof z.ZodObject) return 'object'
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
return 'text' // ZodString and others default to text return 'text' // ZodString and others default to text
} }

View File

@@ -23,4 +23,5 @@ export interface Instance {
name: string; name: string;
status: InstanceStatus; status: InstanceStatus;
options?: CreateInstanceOptions; options?: CreateInstanceOptions;
docker_enabled?: boolean; // indicates backend is running via Docker
} }

View File

@@ -21,4 +21,6 @@ export default defineConfig({
setupFiles: ['./src/test/setup.ts'], setupFiles: ['./src/test/setup.ts'],
css: true, css: true,
}, },
// ensures relative asset paths to support being served behind a subpath
base: "./"
}) })