mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1892dc8315 | |||
|
|
997bd1b063 | ||
|
|
fa43f9e967 | ||
| db9eebeb8b | |||
| bd062f8ca0 | |||
| 8ebdb1a183 | |||
| 7272212081 | |||
| 035e184789 | |||
| d15976e7aa | |||
| 4fa75d9801 | |||
|
|
0e1bc8a352 | ||
| 1e5e86d2c3 | |||
| 25d3d70707 | |||
| e54cfd006d | |||
| 7d39e7ee86 | |||
| 222d913b4a | |||
|
|
03a7a5d139 | ||
|
|
e50660c379 | ||
|
|
5906d89f8d | ||
| cb2d95139f | |||
| 889a8707e7 | |||
| 070c91787d | |||
| 169ee422ec | |||
| bb0176b7f5 | |||
| 291ec7995f | |||
| b940b38e46 | |||
| 92cb57e816 | |||
| 0ecd55c354 | |||
| b4c17194eb | |||
| 808092decf | |||
| 12bbf34236 | |||
| 9a7255a52d | |||
| 97a7c9a4e3 | |||
| fa9335663a | |||
| d092518114 | |||
| ffa0a0c161 | |||
| 1fbf809a2d | |||
| c984d95723 | |||
| 50e1355205 | |||
| 7994fd05b3 | |||
|
|
f496a28f04 | ||
| f9371e876d | |||
|
|
3a979da815 |
45
.dockerignore
Normal file
45
.dockerignore
Normal file
@@ -0,0 +1,45 @@
|
||||
# Git and version control
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
docs/
|
||||
|
||||
# Development files
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# Build artifacts
|
||||
webui/node_modules/
|
||||
webui/dist/
|
||||
webui/.next/
|
||||
*.log
|
||||
*.tmp
|
||||
|
||||
# Data directories
|
||||
data/
|
||||
models/
|
||||
logs/
|
||||
|
||||
# Test files
|
||||
*_test.go
|
||||
**/*_test.go
|
||||
|
||||
# CI/CD
|
||||
.github/
|
||||
|
||||
# Local configuration
|
||||
llamactl.yaml
|
||||
config.yaml
|
||||
.env
|
||||
.env.local
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Backup files
|
||||
*.bak
|
||||
*.backup
|
||||
*~
|
||||
75
README.md
75
README.md
@@ -22,7 +22,8 @@
|
||||
|
||||
### ⚡ Smart Operations
|
||||
- **Instance Monitoring**: Health checks, auto-restart, log management
|
||||
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
||||
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
||||
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
|
||||
|
||||

|
||||
|
||||
@@ -52,7 +53,8 @@ llamactl
|
||||
2. Click "Create Instance"
|
||||
3. Choose backend type (llama.cpp, MLX, or vLLM)
|
||||
4. Set model path and backend-specific options
|
||||
5. Start or stop the instance
|
||||
5. Configure environment variables if needed (optional)
|
||||
6. Start or stop the instance
|
||||
|
||||
### Or use the REST API:
|
||||
```bash
|
||||
@@ -66,10 +68,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
|
||||
-H "Authorization: Bearer your-key" \
|
||||
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
||||
|
||||
# Create vLLM instance
|
||||
# Create vLLM instance with environment variables
|
||||
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
|
||||
-H "Authorization: Bearer your-key" \
|
||||
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
|
||||
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
|
||||
|
||||
# Use with OpenAI SDK
|
||||
curl -X POST localhost:8080/v1/chat/completions \
|
||||
@@ -93,7 +95,30 @@ sudo mv llamactl /usr/local/bin/
|
||||
# Windows - Download from releases page
|
||||
```
|
||||
|
||||
### Option 2: Build from Source
|
||||
### Option 2: Docker (No local backend installation required)
|
||||
|
||||
```bash
|
||||
# Clone repository and build Docker images
|
||||
git clone https://github.com/lordmathis/llamactl.git
|
||||
cd llamactl
|
||||
mkdir -p data/llamacpp data/vllm models
|
||||
|
||||
# Build and start llamactl with llama.cpp CUDA backend
|
||||
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
|
||||
|
||||
# Build and start llamactl with vLLM CUDA backend
|
||||
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
|
||||
|
||||
# Build from source using multi-stage build
|
||||
docker build -f docker/Dockerfile.source -t llamactl:source .
|
||||
```
|
||||
|
||||
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
|
||||
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
|
||||
|
||||
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
|
||||
|
||||
### Option 3: Build from Source
|
||||
Requires Go 1.24+ and Node.js 22+
|
||||
```bash
|
||||
git clone https://github.com/lordmathis/llamactl.git
|
||||
@@ -145,47 +170,23 @@ pip install vllm
|
||||
# Or use Docker - no local installation required
|
||||
```
|
||||
|
||||
## Docker Support
|
||||
## Backend Docker Support
|
||||
|
||||
llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for:
|
||||
- Production deployments without local backend installation
|
||||
- Isolating backend dependencies
|
||||
- GPU-accelerated inference using official Docker images
|
||||
|
||||
### Docker Configuration
|
||||
|
||||
Enable Docker support using the new structured backend configuration:
|
||||
llamactl can run backends in Docker containers:
|
||||
|
||||
```yaml
|
||||
backends:
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
docker:
|
||||
enabled: true
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
docker:
|
||||
enabled: true
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
```
|
||||
|
||||
### Key Features
|
||||
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
|
||||
|
||||
- **Host Networking**: Uses `--network host` for seamless port management
|
||||
- **GPU Support**: Includes `--gpus all` for GPU acceleration
|
||||
- **Environment Variables**: Configure container environment as needed
|
||||
- **Flexible Configuration**: Per-backend Docker settings with sensible defaults
|
||||
|
||||
### Requirements
|
||||
|
||||
- Docker installed and running
|
||||
- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support
|
||||
- No local backend installation required when using Docker
|
||||
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
|
||||
|
||||
## Configuration
|
||||
|
||||
@@ -196,30 +197,34 @@ server:
|
||||
host: "0.0.0.0" # Server host to bind to
|
||||
port: 8080 # Server port to bind to
|
||||
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
||||
allowed_headers: ["*"] # Allowed CORS headers (default: all)
|
||||
enable_swagger: false # Enable Swagger UI for API docs
|
||||
|
||||
backends:
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
docker:
|
||||
enabled: false
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
environment: {} # Environment variables for the container
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
environment: {} # Environment variables for the backend process
|
||||
docker:
|
||||
enabled: false
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
environment: {} # Environment variables for the container
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
|
||||
instances:
|
||||
port_range: [8000, 9000] # Port range for instances
|
||||
|
||||
23
docker/Dockerfile.llamacpp
Normal file
23
docker/Dockerfile.llamacpp
Normal file
@@ -0,0 +1,23 @@
|
||||
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
|
||||
|
||||
# Install curl for downloading llamactl
|
||||
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Download and install the latest llamactl release
|
||||
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
|
||||
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
|
||||
mv llamactl /usr/local/bin/ && \
|
||||
chmod +x /usr/local/bin/llamactl
|
||||
|
||||
# Set working directory
|
||||
RUN mkdir -p /data
|
||||
WORKDIR /data
|
||||
|
||||
# Expose the default llamactl port
|
||||
EXPOSE 8080
|
||||
|
||||
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
|
||||
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
|
||||
|
||||
# Set llamactl as the entrypoint
|
||||
ENTRYPOINT ["llamactl"]
|
||||
64
docker/Dockerfile.source
Normal file
64
docker/Dockerfile.source
Normal file
@@ -0,0 +1,64 @@
|
||||
# WebUI build stage
|
||||
FROM node:20-alpine AS webui-builder
|
||||
|
||||
WORKDIR /webui
|
||||
|
||||
# Copy webui package files
|
||||
COPY webui/package*.json ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm ci
|
||||
|
||||
# Copy webui source
|
||||
COPY webui/ ./
|
||||
|
||||
# Build webui
|
||||
RUN npm run build
|
||||
|
||||
# Go build stage
|
||||
FROM golang:1.24-alpine AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apk add --no-cache git ca-certificates
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /build
|
||||
|
||||
# Copy go mod files
|
||||
COPY go.mod go.sum ./
|
||||
|
||||
# Download dependencies
|
||||
RUN go mod download
|
||||
|
||||
# Copy source code
|
||||
COPY cmd/ ./cmd/
|
||||
COPY pkg/ ./pkg/
|
||||
COPY apidocs/ ./apidocs/
|
||||
COPY webui/webui.go ./webui/
|
||||
|
||||
# Copy built webui from webui-builder
|
||||
COPY --from=webui-builder /webui/dist ./webui/dist
|
||||
|
||||
# Build the application
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
|
||||
|
||||
# Final stage
|
||||
FROM alpine:latest
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk --no-cache add ca-certificates
|
||||
|
||||
# Create data directory
|
||||
RUN mkdir -p /data
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /data
|
||||
|
||||
# Copy binary from builder
|
||||
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
|
||||
|
||||
# Expose the default port
|
||||
EXPOSE 8080
|
||||
|
||||
# Set llamactl as the entrypoint
|
||||
ENTRYPOINT ["llamactl"]
|
||||
20
docker/Dockerfile.vllm
Normal file
20
docker/Dockerfile.vllm
Normal file
@@ -0,0 +1,20 @@
|
||||
FROM vllm/vllm-openai:latest
|
||||
|
||||
# Install curl for downloading llamactl
|
||||
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Download and install the latest llamactl release
|
||||
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
|
||||
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
|
||||
mv llamactl /usr/local/bin/ && \
|
||||
chmod +x /usr/local/bin/llamactl
|
||||
|
||||
# Set working directory
|
||||
RUN mkdir -p /data
|
||||
WORKDIR /data
|
||||
|
||||
# Expose the default llamactl port
|
||||
EXPOSE 8080
|
||||
|
||||
# Set llamactl as the entrypoint
|
||||
ENTRYPOINT ["llamactl"]
|
||||
56
docker/docker-compose.yml
Normal file
56
docker/docker-compose.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
llamactl-llamacpp:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker/Dockerfile.llamacpp
|
||||
image: llamactl:llamacpp-cuda
|
||||
container_name: llamactl-llamacpp
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./data/llamacpp:/data
|
||||
- ./models:/models # Mount models directory
|
||||
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
|
||||
environment:
|
||||
# Set data directory for persistence
|
||||
- LLAMACTL_DATA_DIR=/data
|
||||
# Enable Docker mode for nested containers (if needed)
|
||||
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
|
||||
llamactl-vllm:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: docker/Dockerfile.vllm
|
||||
image: llamactl:vllm-cuda
|
||||
container_name: llamactl-vllm
|
||||
ports:
|
||||
- "8081:8080" # Use different port to avoid conflicts
|
||||
volumes:
|
||||
- ./data/vllm:/data
|
||||
- ./models:/models # Mount models directory
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
|
||||
environment:
|
||||
# Set data directory for persistence
|
||||
- LLAMACTL_DATA_DIR=/data
|
||||
# Enable Docker mode for nested containers (if needed)
|
||||
- LLAMACTL_VLLM_DOCKER_ENABLED=false
|
||||
# vLLM specific environment variables
|
||||
- CUDA_VISIBLE_DEVICES=all
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
restart: unless-stopped
|
||||
@@ -17,30 +17,37 @@ server:
|
||||
host: "0.0.0.0" # Server host to bind to
|
||||
port: 8080 # Server port to bind to
|
||||
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
||||
allowed_headers: ["*"] # Allowed CORS headers (default: all)
|
||||
enable_swagger: false # Enable Swagger UI for API docs
|
||||
|
||||
backends:
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
docker:
|
||||
enabled: false
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
environment: {} # Environment variables for the backend process
|
||||
docker:
|
||||
enabled: false
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
instances:
|
||||
port_range: [8000, 9000] # Port range for instances
|
||||
@@ -98,6 +105,7 @@ server:
|
||||
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
|
||||
port: 8080 # Server port to bind to (default: 8080)
|
||||
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
|
||||
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
|
||||
enable_swagger: false # Enable Swagger UI (default: false)
|
||||
```
|
||||
|
||||
@@ -113,36 +121,74 @@ backends:
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
docker:
|
||||
enabled: false # Enable Docker runtime (default: false)
|
||||
enabled: false # Enable Docker runtime (default: false)
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
environment: {} # Environment variables for the backend process
|
||||
docker:
|
||||
enabled: false
|
||||
enabled: false # Enable Docker runtime (default: false)
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
# MLX does not support Docker
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
```
|
||||
|
||||
**Backend Configuration Fields:**
|
||||
- `command`: Executable name/path for the backend
|
||||
- `args`: Default arguments prepended to all instances
|
||||
- `environment`: Environment variables for the backend process (optional)
|
||||
- `response_headers`: Additional response headers to send with responses (optional)
|
||||
- `docker`: Docker-specific configuration (optional)
|
||||
- `enabled`: Boolean flag to enable Docker runtime
|
||||
- `image`: Docker image to use
|
||||
- `args`: Additional arguments passed to `docker run`
|
||||
- `environment`: Environment variables for the container (optional)
|
||||
|
||||
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
**LlamaCpp Backend:**
|
||||
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
|
||||
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
|
||||
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
|
||||
|
||||
**VLLM Backend:**
|
||||
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
|
||||
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
|
||||
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
|
||||
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
|
||||
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
|
||||
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
|
||||
|
||||
**MLX Backend:**
|
||||
- `LLAMACTL_MLX_COMMAND` - MLX executable command
|
||||
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
|
||||
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
|
||||
|
||||
### Instance Configuration
|
||||
|
||||
```yaml
|
||||
|
||||
@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
|
||||
# Windows - Download from releases page
|
||||
```
|
||||
|
||||
### Option 2: Build from Source
|
||||
### Option 2: Docker
|
||||
|
||||
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
|
||||
|
||||
**Available Dockerfiles (CUDA):**
|
||||
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
|
||||
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
|
||||
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
|
||||
|
||||
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
|
||||
|
||||
#### Using Docker Compose
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/lordmathis/llamactl.git
|
||||
cd llamactl
|
||||
|
||||
# Create directories for data and models
|
||||
mkdir -p data/llamacpp data/vllm models
|
||||
|
||||
# Start llamactl with llama.cpp backend
|
||||
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
|
||||
|
||||
# Or start llamactl with vLLM backend
|
||||
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
|
||||
```
|
||||
|
||||
Access the dashboard at:
|
||||
- llamactl with llama.cpp: http://localhost:8080
|
||||
- llamactl with vLLM: http://localhost:8081
|
||||
|
||||
#### Using Docker Build and Run
|
||||
|
||||
**llamactl with llama.cpp CUDA:**
|
||||
```bash
|
||||
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
|
||||
docker run -d \
|
||||
--name llamactl-llamacpp \
|
||||
--gpus all \
|
||||
-p 8080:8080 \
|
||||
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
|
||||
llamactl:llamacpp-cuda
|
||||
```
|
||||
|
||||
**llamactl with vLLM CUDA:**
|
||||
```bash
|
||||
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
|
||||
docker run -d \
|
||||
--name llamactl-vllm \
|
||||
--gpus all \
|
||||
-p 8080:8080 \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
llamactl:vllm-cuda
|
||||
```
|
||||
|
||||
**llamactl built from source:**
|
||||
```bash
|
||||
docker build -f docker/Dockerfile.source -t llamactl:source .
|
||||
docker run -d \
|
||||
--name llamactl \
|
||||
-p 8080:8080 \
|
||||
llamactl:source
|
||||
```
|
||||
|
||||
### Option 3: Build from Source
|
||||
|
||||
Requirements:
|
||||
- Go 1.24 or later
|
||||
|
||||
@@ -116,7 +116,18 @@ Create and start a new instance.
|
||||
POST /api/v1/instances/{name}
|
||||
```
|
||||
|
||||
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
|
||||
**Request Body:** JSON object with instance configuration. Common fields include:
|
||||
|
||||
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
|
||||
- `backend_options`: Backend-specific configuration
|
||||
- `auto_restart`: Enable automatic restart on failure
|
||||
- `max_restarts`: Maximum restart attempts
|
||||
- `restart_delay`: Delay between restarts in seconds
|
||||
- `on_demand_start`: Start instance when receiving requests
|
||||
- `idle_timeout`: Idle timeout in minutes
|
||||
- `environment`: Environment variables as key-value pairs
|
||||
|
||||
See [Managing Instances](managing-instances.md) for complete configuration options.
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
@@ -354,7 +365,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer your-api-key" \
|
||||
-d '{
|
||||
"model": "/models/llama-2-7b.gguf"
|
||||
"backend_type": "llama_cpp",
|
||||
"backend_options": {
|
||||
"model": "/models/llama-2-7b.gguf",
|
||||
"gpu_layers": 32
|
||||
},
|
||||
"environment": {
|
||||
"CUDA_VISIBLE_DEVICES": "0",
|
||||
"OMP_NUM_THREADS": "8"
|
||||
}
|
||||
}'
|
||||
|
||||
# Check instance status
|
||||
|
||||
@@ -53,6 +53,7 @@ Each instance is displayed as a card showing:
|
||||
- **Restart Delay**: Delay in seconds between restart attempts
|
||||
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
||||
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
||||
- **Environment Variables**: Set custom environment variables for the instance process
|
||||
6. Configure backend-specific options:
|
||||
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
||||
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
||||
@@ -101,7 +102,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
|
||||
"gpu_memory_utilization": 0.9
|
||||
},
|
||||
"auto_restart": true,
|
||||
"on_demand_start": true
|
||||
"on_demand_start": true,
|
||||
"environment": {
|
||||
"CUDA_VISIBLE_DEVICES": "0,1",
|
||||
"NCCL_DEBUG": "INFO",
|
||||
"PYTHONPATH": "/custom/path"
|
||||
}
|
||||
}'
|
||||
|
||||
# Create llama.cpp instance with HuggingFace model
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
@@ -12,9 +13,11 @@ import (
|
||||
|
||||
// BackendSettings contains structured backend configuration
|
||||
type BackendSettings struct {
|
||||
Command string `yaml:"command"`
|
||||
Args []string `yaml:"args"`
|
||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||
Command string `yaml:"command"`
|
||||
Args []string `yaml:"args"`
|
||||
Environment map[string]string `yaml:"environment,omitempty"`
|
||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
|
||||
}
|
||||
|
||||
// DockerSettings contains Docker-specific configuration
|
||||
@@ -54,8 +57,14 @@ type ServerConfig struct {
|
||||
// Allowed origins for CORS (e.g., "http://localhost:3000")
|
||||
AllowedOrigins []string `yaml:"allowed_origins"`
|
||||
|
||||
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
|
||||
AllowedHeaders []string `yaml:"allowed_headers"`
|
||||
|
||||
// Enable Swagger UI for API documentation
|
||||
EnableSwagger bool `yaml:"enable_swagger"`
|
||||
|
||||
// Response headers to send with responses
|
||||
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
|
||||
}
|
||||
|
||||
// InstancesConfig contains instance management configuration
|
||||
@@ -130,12 +139,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
||||
Host: "0.0.0.0",
|
||||
Port: 8080,
|
||||
AllowedOrigins: []string{"*"}, // Default to allow all origins
|
||||
AllowedHeaders: []string{"*"}, // Default to allow all headers
|
||||
EnableSwagger: false,
|
||||
},
|
||||
Backends: BackendConfig{
|
||||
LlamaCpp: BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
Environment: map[string]string{},
|
||||
Docker: &DockerSettings{
|
||||
Enabled: false,
|
||||
Image: "ghcr.io/ggml-org/llama.cpp:server",
|
||||
@@ -165,10 +176,12 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
||||
},
|
||||
},
|
||||
Instances: InstancesConfig{
|
||||
PortRange: [2]int{8000, 9000},
|
||||
DataDir: getDefaultDataDirectory(),
|
||||
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
||||
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
||||
PortRange: [2]int{8000, 9000},
|
||||
DataDir: getDefaultDataDirectory(),
|
||||
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
|
||||
// should be relative path to DataDir if not explicitly set.
|
||||
InstancesDir: "",
|
||||
LogsDir: "",
|
||||
AutoCreateDirs: true,
|
||||
MaxInstances: -1, // -1 means unlimited
|
||||
MaxRunningInstances: -1, // -1 means unlimited
|
||||
@@ -196,6 +209,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
||||
// 3. Override with environment variables
|
||||
loadEnvVars(&cfg)
|
||||
|
||||
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
|
||||
if cfg.Instances.InstancesDir == "" {
|
||||
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
|
||||
}
|
||||
if cfg.Instances.LogsDir == "" {
|
||||
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
@@ -216,6 +237,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
|
||||
if err := yaml.Unmarshal(data, cfg); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Printf("Read config at %s", path)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -288,6 +310,12 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
|
||||
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
|
||||
if cfg.Backends.LlamaCpp.Environment == nil {
|
||||
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
|
||||
}
|
||||
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
|
||||
}
|
||||
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
|
||||
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
@@ -315,18 +343,28 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
|
||||
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
|
||||
}
|
||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
||||
for _, envPair := range strings.Split(llamaDockerEnv, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1]
|
||||
}
|
||||
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
|
||||
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
|
||||
}
|
||||
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
|
||||
}
|
||||
|
||||
// vLLM backend
|
||||
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
||||
cfg.Backends.VLLM.Command = vllmCmd
|
||||
}
|
||||
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
|
||||
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
|
||||
}
|
||||
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
|
||||
if cfg.Backends.VLLM.Environment == nil {
|
||||
cfg.Backends.VLLM.Environment = make(map[string]string)
|
||||
}
|
||||
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
|
||||
}
|
||||
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
|
||||
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
@@ -354,12 +392,13 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
if cfg.Backends.VLLM.Docker.Environment == nil {
|
||||
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
|
||||
}
|
||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
||||
for _, envPair := range strings.Split(vllmDockerEnv, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1]
|
||||
}
|
||||
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||
if cfg.Backends.VLLM.ResponseHeaders == nil {
|
||||
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
|
||||
}
|
||||
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
|
||||
}
|
||||
|
||||
// MLX backend
|
||||
@@ -369,6 +408,18 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
|
||||
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
|
||||
}
|
||||
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
|
||||
if cfg.Backends.MLX.Environment == nil {
|
||||
cfg.Backends.MLX.Environment = make(map[string]string)
|
||||
}
|
||||
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||
if cfg.Backends.MLX.ResponseHeaders == nil {
|
||||
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
|
||||
}
|
||||
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
|
||||
}
|
||||
|
||||
// Instance defaults
|
||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||
@@ -443,6 +494,32 @@ func ParsePortRange(s string) [2]int {
|
||||
return [2]int{0, 0} // Invalid format
|
||||
}
|
||||
|
||||
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
|
||||
// and populates the provided environment map
|
||||
func parseEnvVars(envString string, envMap map[string]string) {
|
||||
if envString == "" {
|
||||
return
|
||||
}
|
||||
for _, envPair := range strings.Split(envString, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
envMap[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
|
||||
// and populates the provided environment map
|
||||
func parseHeaders(envString string, envMap map[string]string) {
|
||||
if envString == "" {
|
||||
return
|
||||
}
|
||||
for _, envPair := range strings.Split(envString, ";") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
envMap[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getDefaultDataDirectory returns platform-specific default data directory
|
||||
func getDefaultDataDirectory() string {
|
||||
switch runtime.GOOS {
|
||||
@@ -475,6 +552,10 @@ func getDefaultDataDirectory() string {
|
||||
// getDefaultConfigLocations returns platform-specific config file locations
|
||||
func getDefaultConfigLocations() []string {
|
||||
var locations []string
|
||||
// Use ./llamactl.yaml and ./config.yaml as the default config file
|
||||
locations = append(locations, "llamactl.yaml")
|
||||
locations = append(locations, "config.yaml")
|
||||
|
||||
homeDir, _ := os.UserHomeDir()
|
||||
|
||||
switch runtime.GOOS {
|
||||
|
||||
@@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||
|
||||
proxy := httputil.NewSingleHostReverseProxy(targetURL)
|
||||
|
||||
var responseHeaders map[string]string
|
||||
switch i.options.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
|
||||
case backends.BackendTypeVllm:
|
||||
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
|
||||
case backends.BackendTypeMlxLm:
|
||||
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
|
||||
}
|
||||
proxy.ModifyResponse = func(resp *http.Response) error {
|
||||
// Remove CORS headers from llama-server response to avoid conflicts
|
||||
// llamactl will add its own CORS headers
|
||||
@@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||
resp.Header.Del("Access-Control-Allow-Credentials")
|
||||
resp.Header.Del("Access-Control-Max-Age")
|
||||
resp.Header.Del("Access-Control-Expose-Headers")
|
||||
|
||||
for key, value := range responseHeaders {
|
||||
resp.Header.Set(key, value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"syscall"
|
||||
@@ -37,6 +38,9 @@ func (i *Process) Start() error {
|
||||
// Initialize last request time to current time when starting
|
||||
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
|
||||
|
||||
// Create context before building command (needed for CommandContext)
|
||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||
|
||||
// Create log files
|
||||
if err := i.logger.Create(); err != nil {
|
||||
return fmt.Errorf("failed to create log files: %w", err)
|
||||
@@ -47,8 +51,6 @@ func (i *Process) Start() error {
|
||||
if cmdErr != nil {
|
||||
return fmt.Errorf("failed to build command: %w", cmdErr)
|
||||
}
|
||||
|
||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||
i.cmd = cmd
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
@@ -372,13 +374,27 @@ func (i *Process) buildCommand() (*exec.Cmd, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Build the environment variables
|
||||
env := i.options.BuildEnvironment(backendConfig)
|
||||
|
||||
// Get the command to execute
|
||||
cmd := i.options.GetCommand(backendConfig)
|
||||
command := i.options.GetCommand(backendConfig)
|
||||
|
||||
// Build command arguments
|
||||
args := i.options.BuildCommandArgs(backendConfig)
|
||||
|
||||
return exec.Command(cmd, args...), nil
|
||||
// Create the exec.Cmd
|
||||
cmd := exec.CommandContext(i.ctx, command, args...)
|
||||
|
||||
// Start with host environment variables
|
||||
cmd.Env = os.Environ()
|
||||
|
||||
// Add/override with backend-specific environment variables
|
||||
for k, v := range env {
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
|
||||
return cmd, nil
|
||||
}
|
||||
|
||||
// getBackendConfig resolves the backend configuration for the current instance
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"llamactl/pkg/backends/vllm"
|
||||
"llamactl/pkg/config"
|
||||
"log"
|
||||
"maps"
|
||||
)
|
||||
|
||||
type CreateInstanceOptions struct {
|
||||
@@ -20,6 +21,8 @@ type CreateInstanceOptions struct {
|
||||
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||
// Idle timeout
|
||||
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
|
||||
//Environment variables
|
||||
Environment map[string]string `json:"environment,omitempty"`
|
||||
|
||||
BackendType backends.BackendType `json:"backend_type"`
|
||||
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
||||
@@ -240,3 +243,23 @@ func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSe
|
||||
|
||||
return args
|
||||
}
|
||||
|
||||
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
|
||||
env := map[string]string{}
|
||||
|
||||
if backendConfig.Environment != nil {
|
||||
maps.Copy(env, backendConfig.Environment)
|
||||
}
|
||||
|
||||
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||
if backendConfig.Docker.Environment != nil {
|
||||
maps.Copy(env, backendConfig.Docker.Environment)
|
||||
}
|
||||
}
|
||||
|
||||
if c.Environment != nil {
|
||||
maps.Copy(env, c.Environment)
|
||||
}
|
||||
|
||||
return env
|
||||
}
|
||||
|
||||
@@ -263,19 +263,32 @@ func (im *instanceManager) loadInstance(name, path string) error {
|
||||
}
|
||||
|
||||
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
|
||||
// For instances with auto-restart disabled, it sets their status to Stopped
|
||||
func (im *instanceManager) autoStartInstances() {
|
||||
im.mu.RLock()
|
||||
var instancesToStart []*instance.Process
|
||||
var instancesToStop []*instance.Process
|
||||
for _, inst := range im.instances {
|
||||
if inst.IsRunning() && // Was running when persisted
|
||||
inst.GetOptions() != nil &&
|
||||
inst.GetOptions().AutoRestart != nil &&
|
||||
*inst.GetOptions().AutoRestart {
|
||||
instancesToStart = append(instancesToStart, inst)
|
||||
inst.GetOptions().AutoRestart != nil {
|
||||
if *inst.GetOptions().AutoRestart {
|
||||
instancesToStart = append(instancesToStart, inst)
|
||||
} else {
|
||||
// Instance was running but auto-restart is disabled, mark as stopped
|
||||
instancesToStop = append(instancesToStop, inst)
|
||||
}
|
||||
}
|
||||
}
|
||||
im.mu.RUnlock()
|
||||
|
||||
// Stop instances that have auto-restart disabled
|
||||
for _, inst := range instancesToStop {
|
||||
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
|
||||
inst.SetStatus(instance.Stopped)
|
||||
}
|
||||
|
||||
// Start instances that have auto-restart enabled
|
||||
for _, inst := range instancesToStart {
|
||||
log.Printf("Auto-starting instance %s", inst.Name)
|
||||
// Reset running state before starting (since Start() expects stopped instance)
|
||||
|
||||
@@ -209,3 +209,66 @@ func createTestManager() manager.InstanceManager {
|
||||
}
|
||||
return manager.NewInstanceManager(backendConfig, cfg)
|
||||
}
|
||||
|
||||
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
backendConfig := config.BackendConfig{
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
}
|
||||
|
||||
cfg := config.InstancesConfig{
|
||||
PortRange: [2]int{8000, 9000},
|
||||
InstancesDir: tempDir,
|
||||
MaxInstances: 10,
|
||||
TimeoutCheckInterval: 5,
|
||||
}
|
||||
|
||||
// Create first manager and instance with auto-restart disabled
|
||||
manager1 := manager.NewInstanceManager(backendConfig, cfg)
|
||||
|
||||
autoRestart := false
|
||||
options := &instance.CreateInstanceOptions{
|
||||
BackendType: backends.BackendTypeLlamaCpp,
|
||||
AutoRestart: &autoRestart,
|
||||
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
||||
Model: "/path/to/model.gguf",
|
||||
Port: 8080,
|
||||
},
|
||||
}
|
||||
|
||||
inst, err := manager1.CreateInstance("test-instance", options)
|
||||
if err != nil {
|
||||
t.Fatalf("CreateInstance failed: %v", err)
|
||||
}
|
||||
|
||||
// Simulate instance being in running state when persisted
|
||||
// (this would happen if the instance was running when llamactl was stopped)
|
||||
inst.SetStatus(instance.Running)
|
||||
|
||||
// Shutdown first manager
|
||||
manager1.Shutdown()
|
||||
|
||||
// Create second manager (simulating restart of llamactl)
|
||||
manager2 := manager.NewInstanceManager(backendConfig, cfg)
|
||||
|
||||
// Get the loaded instance
|
||||
loadedInst, err := manager2.GetInstance("test-instance")
|
||||
if err != nil {
|
||||
t.Fatalf("GetInstance failed: %v", err)
|
||||
}
|
||||
|
||||
// The instance should be marked as Stopped, not Running
|
||||
// because auto-restart is disabled
|
||||
if loadedInst.IsRunning() {
|
||||
t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
|
||||
}
|
||||
|
||||
if loadedInst.GetStatus() != instance.Stopped {
|
||||
t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
|
||||
}
|
||||
|
||||
manager2.Shutdown()
|
||||
}
|
||||
|
||||
@@ -131,11 +131,16 @@ func (h *Handler) ListInstances() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(instances); err != nil {
|
||||
// Marshal to bytes first to set Content-Length header
|
||||
data, err := json.Marshal(instances)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("Content-Length", strconv.Itoa(len(data)))
|
||||
w.Write(data)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,7 +207,7 @@ func (h *Handler) GetInstance() http.HandlerFunc {
|
||||
|
||||
inst, err := h.InstanceManager.GetInstance(name)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
||||
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -475,29 +480,15 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
||||
|
||||
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
|
||||
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
|
||||
proxyPath := r.URL.Path[len(prefix):]
|
||||
|
||||
// Ensure the proxy path starts with "/"
|
||||
if !strings.HasPrefix(proxyPath, "/") {
|
||||
proxyPath = "/" + proxyPath
|
||||
}
|
||||
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
||||
|
||||
// Update the last request time for the instance
|
||||
inst.UpdateLastRequestTime()
|
||||
|
||||
// Modify the request to remove the proxy prefix
|
||||
originalPath := r.URL.Path
|
||||
r.URL.Path = proxyPath
|
||||
|
||||
// Set forwarded headers
|
||||
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
|
||||
r.Header.Set("X-Forwarded-Proto", "http")
|
||||
|
||||
// Restore original path for logging purposes
|
||||
defer func() {
|
||||
r.URL.Path = originalPath
|
||||
}()
|
||||
|
||||
// Forward the request using the cached proxy
|
||||
proxy.ServeHTTP(w, r)
|
||||
}
|
||||
@@ -580,12 +571,13 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
||||
// Route to the appropriate inst based on instance name
|
||||
inst, err := h.InstanceManager.GetInstance(modelName)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
||||
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if !inst.IsRunning() {
|
||||
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
|
||||
options := inst.GetOptions()
|
||||
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
|
||||
if !allowOnDemand {
|
||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||
return
|
||||
@@ -634,6 +626,84 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// Get the instance name from the URL parameter
|
||||
name := chi.URLParam(r, "name")
|
||||
if name == "" {
|
||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Route to the appropriate inst based on instance name
|
||||
inst, err := h.InstanceManager.GetInstance(name)
|
||||
if err != nil {
|
||||
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
options := inst.GetOptions()
|
||||
if options == nil {
|
||||
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if options.BackendType != backends.BackendTypeLlamaCpp {
|
||||
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if !inst.IsRunning() {
|
||||
|
||||
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
|
||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
|
||||
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||
if h.cfg.Instances.EnableLRUEviction {
|
||||
err := h.InstanceManager.EvictLRUInstance()
|
||||
if err != nil {
|
||||
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// If on-demand start is enabled, start the instance
|
||||
if _, err := h.InstanceManager.StartInstance(name); err != nil {
|
||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Wait for the instance to become healthy before proceeding
|
||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
proxy, err := inst.GetProxy()
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Strip the "/llama-cpp/<name>" prefix from the request URL
|
||||
prefix := fmt.Sprintf("/llama-cpp/%s", name)
|
||||
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
||||
|
||||
// Update the last request time for the instance
|
||||
inst.UpdateLastRequestTime()
|
||||
|
||||
proxy.ServeHTTP(w, r)
|
||||
}
|
||||
}
|
||||
|
||||
// ParseCommandRequest represents the request body for command parsing
|
||||
type ParseCommandRequest struct {
|
||||
Command string `json:"command"`
|
||||
@@ -714,21 +784,21 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
|
||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
if strings.TrimSpace(req.Command) == "" {
|
||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
|
||||
if err != nil {
|
||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Currently only support mlx_lm backend type
|
||||
backendType := backends.BackendTypeMlxLm
|
||||
|
||||
|
||||
options := &instance.CreateInstanceOptions{
|
||||
BackendType: backendType,
|
||||
MlxServerOptions: mlxOptions,
|
||||
|
||||
@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
||||
r.Use(cors.Handler(cors.Options{
|
||||
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
|
||||
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
|
||||
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
|
||||
AllowedHeaders: handler.cfg.Server.AllowedHeaders,
|
||||
ExposedHeaders: []string{"Link"},
|
||||
AllowCredentials: false,
|
||||
MaxAge: 300,
|
||||
@@ -103,6 +103,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
||||
|
||||
})
|
||||
|
||||
r.Route("/llama-cpp/{name}", func(r chi.Router) {
|
||||
|
||||
// Public Routes
|
||||
// Allow llama-cpp server to serve its own WebUI if it is running.
|
||||
// Don't auto start the server since it can be accessed without an API key
|
||||
r.Get("/", handler.LlamaCppProxy(false))
|
||||
|
||||
// Private Routes
|
||||
r.Group(func(r chi.Router) {
|
||||
|
||||
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
|
||||
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
|
||||
}
|
||||
|
||||
// This handler auto start the server if it's not running
|
||||
llamaCppHandler := handler.LlamaCppProxy(true)
|
||||
|
||||
// llama.cpp server specific proxy endpoints
|
||||
r.Get("/props", llamaCppHandler)
|
||||
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
|
||||
r.Get("/slots", llamaCppHandler)
|
||||
r.Post("/apply-template", llamaCppHandler)
|
||||
r.Post("/completion", llamaCppHandler)
|
||||
r.Post("/detokenize", llamaCppHandler)
|
||||
r.Post("/embeddings", llamaCppHandler)
|
||||
r.Post("/infill", llamaCppHandler)
|
||||
r.Post("/metrics", llamaCppHandler)
|
||||
r.Post("/props", llamaCppHandler)
|
||||
r.Post("/reranking", llamaCppHandler)
|
||||
r.Post("/tokenize", llamaCppHandler)
|
||||
|
||||
// OpenAI-compatible proxy endpoint
|
||||
// Handles all POST requests to /v1/*, including:
|
||||
// - /v1/completions
|
||||
// - /v1/chat/completions
|
||||
// - /v1/embeddings
|
||||
// - /v1/rerank
|
||||
// - /v1/reranking
|
||||
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
|
||||
// on "model" field being optional, and handler.OpenAIProxy requires it.
|
||||
r.Post("/v1/*", llamaCppHandler)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
// Serve WebUI files
|
||||
if err := webui.SetupWebUI(r); err != nil {
|
||||
fmt.Printf("Failed to set up WebUI: %v\n", err)
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
import React from 'react'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import { Checkbox } from '@/components/ui/checkbox'
|
||||
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
||||
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
|
||||
|
||||
interface ZodFormFieldProps {
|
||||
fieldKey: keyof CreateInstanceOptions
|
||||
value: string | number | boolean | string[] | undefined
|
||||
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
|
||||
}
|
||||
|
||||
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
|
||||
// Get configuration for basic fields, or use field name for advanced fields
|
||||
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
||||
|
||||
// Get type from Zod schema
|
||||
const fieldType = getFieldType(fieldKey)
|
||||
|
||||
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
|
||||
onChange(fieldKey, newValue)
|
||||
}
|
||||
|
||||
const renderField = () => {
|
||||
// Special handling for backend_type field - render as dropdown
|
||||
if (fieldKey === 'backend_type') {
|
||||
return (
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor={fieldKey}>
|
||||
{config.label}
|
||||
</Label>
|
||||
<select
|
||||
id={fieldKey}
|
||||
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
|
||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
||||
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
||||
<option value={BackendType.MLX_LM}>MLX LM</option>
|
||||
<option value={BackendType.VLLM}>vLLM</option>
|
||||
</select>
|
||||
{config.description && (
|
||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
switch (fieldType) {
|
||||
case 'boolean':
|
||||
return (
|
||||
<div className="flex items-center space-x-2">
|
||||
<Checkbox
|
||||
id={fieldKey}
|
||||
checked={typeof value === 'boolean' ? value : false}
|
||||
onCheckedChange={(checked) => handleChange(checked)}
|
||||
/>
|
||||
<Label htmlFor={fieldKey} className="text-sm font-normal">
|
||||
{config.label}
|
||||
{config.description && (
|
||||
<span className="text-muted-foreground ml-1">- {config.description}</span>
|
||||
)}
|
||||
</Label>
|
||||
</div>
|
||||
)
|
||||
|
||||
case 'number':
|
||||
return (
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor={fieldKey}>
|
||||
{config.label}
|
||||
</Label>
|
||||
<Input
|
||||
id={fieldKey}
|
||||
type="number"
|
||||
step="any" // This allows decimal numbers
|
||||
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
||||
onChange={(e) => {
|
||||
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
|
||||
// Only update if the parsed value is valid or the input is empty
|
||||
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
|
||||
handleChange(numValue)
|
||||
}
|
||||
}}
|
||||
placeholder={config.placeholder}
|
||||
/>
|
||||
{config.description && (
|
||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
|
||||
case 'array':
|
||||
return (
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor={fieldKey}>
|
||||
{config.label}
|
||||
</Label>
|
||||
<Input
|
||||
id={fieldKey}
|
||||
type="text"
|
||||
value={Array.isArray(value) ? value.join(', ') : ''}
|
||||
onChange={(e) => {
|
||||
const arrayValue = e.target.value
|
||||
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
|
||||
: undefined
|
||||
handleChange(arrayValue)
|
||||
}}
|
||||
placeholder="item1, item2, item3"
|
||||
/>
|
||||
{config.description && (
|
||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
||||
)}
|
||||
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
|
||||
</div>
|
||||
)
|
||||
|
||||
case 'text':
|
||||
default:
|
||||
return (
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor={fieldKey}>
|
||||
{config.label}
|
||||
</Label>
|
||||
<Input
|
||||
id={fieldKey}
|
||||
type="text"
|
||||
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
||||
placeholder={config.placeholder}
|
||||
/>
|
||||
{config.description && (
|
||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return <div className="space-y-2">{renderField()}</div>
|
||||
}
|
||||
|
||||
export default ZodFormField
|
||||
144
webui/src/components/form/EnvironmentVariablesInput.tsx
Normal file
144
webui/src/components/form/EnvironmentVariablesInput.tsx
Normal file
@@ -0,0 +1,144 @@
|
||||
import React, { useState } from 'react'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { X, Plus } from 'lucide-react'
|
||||
|
||||
interface EnvironmentVariablesInputProps {
|
||||
id: string
|
||||
label: string
|
||||
value: Record<string, string> | undefined
|
||||
onChange: (value: Record<string, string> | undefined) => void
|
||||
description?: string
|
||||
disabled?: boolean
|
||||
className?: string
|
||||
}
|
||||
|
||||
interface EnvVar {
|
||||
key: string
|
||||
value: string
|
||||
}
|
||||
|
||||
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
|
||||
id,
|
||||
label,
|
||||
value,
|
||||
onChange,
|
||||
description,
|
||||
disabled = false,
|
||||
className
|
||||
}) => {
|
||||
// Convert the value object to an array of key-value pairs for editing
|
||||
const envVarsFromValue = value
|
||||
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
|
||||
: []
|
||||
|
||||
const [envVars, setEnvVars] = useState<EnvVar[]>(
|
||||
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
|
||||
)
|
||||
|
||||
// Update parent component when env vars change
|
||||
const updateParent = (newEnvVars: EnvVar[]) => {
|
||||
// Filter out empty entries
|
||||
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
|
||||
|
||||
if (validVars.length === 0) {
|
||||
onChange(undefined)
|
||||
} else {
|
||||
const envObject = validVars.reduce((acc, env) => {
|
||||
acc[env.key.trim()] = env.value.trim()
|
||||
return acc
|
||||
}, {} as Record<string, string>)
|
||||
onChange(envObject)
|
||||
}
|
||||
}
|
||||
|
||||
const handleKeyChange = (index: number, newKey: string) => {
|
||||
const newEnvVars = [...envVars]
|
||||
newEnvVars[index].key = newKey
|
||||
setEnvVars(newEnvVars)
|
||||
updateParent(newEnvVars)
|
||||
}
|
||||
|
||||
const handleValueChange = (index: number, newValue: string) => {
|
||||
const newEnvVars = [...envVars]
|
||||
newEnvVars[index].value = newValue
|
||||
setEnvVars(newEnvVars)
|
||||
updateParent(newEnvVars)
|
||||
}
|
||||
|
||||
const addEnvVar = () => {
|
||||
const newEnvVars = [...envVars, { key: '', value: '' }]
|
||||
setEnvVars(newEnvVars)
|
||||
}
|
||||
|
||||
const removeEnvVar = (index: number) => {
|
||||
if (envVars.length === 1) {
|
||||
// Reset to empty if it's the last one
|
||||
const newEnvVars = [{ key: '', value: '' }]
|
||||
setEnvVars(newEnvVars)
|
||||
updateParent(newEnvVars)
|
||||
} else {
|
||||
const newEnvVars = envVars.filter((_, i) => i !== index)
|
||||
setEnvVars(newEnvVars)
|
||||
updateParent(newEnvVars)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className={`grid gap-2 ${className || ''}`}>
|
||||
<Label htmlFor={id}>
|
||||
{label}
|
||||
</Label>
|
||||
<div className="space-y-2">
|
||||
{envVars.map((envVar, index) => (
|
||||
<div key={index} className="flex gap-2 items-center">
|
||||
<Input
|
||||
placeholder="Variable name"
|
||||
value={envVar.key}
|
||||
onChange={(e) => handleKeyChange(index, e.target.value)}
|
||||
disabled={disabled}
|
||||
className="flex-1"
|
||||
/>
|
||||
<Input
|
||||
placeholder="Variable value"
|
||||
value={envVar.value}
|
||||
onChange={(e) => handleValueChange(index, e.target.value)}
|
||||
disabled={disabled}
|
||||
className="flex-1"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => removeEnvVar(index)}
|
||||
disabled={disabled}
|
||||
className="shrink-0"
|
||||
>
|
||||
<X className="h-4 w-4" />
|
||||
</Button>
|
||||
</div>
|
||||
))}
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={addEnvVar}
|
||||
disabled={disabled}
|
||||
className="w-fit"
|
||||
>
|
||||
<Plus className="h-4 w-4 mr-2" />
|
||||
Add Variable
|
||||
</Button>
|
||||
</div>
|
||||
{description && (
|
||||
<p className="text-sm text-muted-foreground">{description}</p>
|
||||
)}
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Environment variables that will be passed to the backend process
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default EnvironmentVariablesInput
|
||||
@@ -1,99 +0,0 @@
|
||||
import React from 'react'
|
||||
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
||||
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
|
||||
import { getFieldType } from '@/schemas/instanceOptions'
|
||||
import TextInput from '@/components/form/TextInput'
|
||||
import NumberInput from '@/components/form/NumberInput'
|
||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||
import SelectInput from '@/components/form/SelectInput'
|
||||
|
||||
interface BasicInstanceFieldsProps {
|
||||
formData: CreateInstanceOptions
|
||||
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
||||
}
|
||||
|
||||
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
|
||||
formData,
|
||||
onChange
|
||||
}) => {
|
||||
const basicFields = getBasicFields()
|
||||
|
||||
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
|
||||
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
||||
const fieldType = getFieldType(fieldKey)
|
||||
|
||||
// Special handling for backend_type field
|
||||
if (fieldKey === 'backend_type') {
|
||||
return (
|
||||
<SelectInput
|
||||
key={fieldKey}
|
||||
id={fieldKey}
|
||||
label={config.label}
|
||||
value={formData[fieldKey] || BackendType.LLAMA_CPP}
|
||||
onChange={(value) => onChange(fieldKey, value)}
|
||||
options={[
|
||||
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
|
||||
{ value: BackendType.MLX_LM, label: 'MLX LM' },
|
||||
{ value: BackendType.VLLM, label: 'vLLM' }
|
||||
]}
|
||||
description={config.description}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
// Render based on field type
|
||||
switch (fieldType) {
|
||||
case 'boolean':
|
||||
return (
|
||||
<CheckboxInput
|
||||
key={fieldKey}
|
||||
id={fieldKey}
|
||||
label={config.label}
|
||||
value={formData[fieldKey] as boolean | undefined}
|
||||
onChange={(value) => onChange(fieldKey, value)}
|
||||
description={config.description}
|
||||
/>
|
||||
)
|
||||
|
||||
case 'number':
|
||||
return (
|
||||
<NumberInput
|
||||
key={fieldKey}
|
||||
id={fieldKey}
|
||||
label={config.label}
|
||||
value={formData[fieldKey] as number | undefined}
|
||||
onChange={(value) => onChange(fieldKey, value)}
|
||||
placeholder={config.placeholder}
|
||||
description={config.description}
|
||||
/>
|
||||
)
|
||||
|
||||
default:
|
||||
return (
|
||||
<TextInput
|
||||
key={fieldKey}
|
||||
id={fieldKey}
|
||||
label={config.label}
|
||||
value={formData[fieldKey] as string | number | undefined}
|
||||
onChange={(value) => onChange(fieldKey, value)}
|
||||
placeholder={config.placeholder}
|
||||
description={config.description}
|
||||
/>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out auto restart fields and backend_options (handled separately)
|
||||
const fieldsToRender = basicFields.filter(
|
||||
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
|
||||
)
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<h3 className="text-lg font-medium">Basic Configuration</h3>
|
||||
{fieldsToRender.map(renderField)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default BasicInstanceFields
|
||||
@@ -6,6 +6,7 @@ import { Input } from '@/components/ui/input'
|
||||
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
|
||||
import NumberInput from '@/components/form/NumberInput'
|
||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
|
||||
|
||||
interface InstanceSettingsCardProps {
|
||||
instanceName: string
|
||||
@@ -75,6 +76,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
||||
onChange={(value) => onChange('on_demand_start', value)}
|
||||
description="Start instance only when needed"
|
||||
/>
|
||||
|
||||
<EnvironmentVariablesInput
|
||||
id="environment"
|
||||
label="Environment Variables"
|
||||
value={formData.environment}
|
||||
onChange={(value) => onChange('environment', value)}
|
||||
description="Custom environment variables for the instance"
|
||||
/>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
|
||||
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
|
||||
|
||||
interface AuthContextState {
|
||||
isAuthenticated: boolean
|
||||
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
|
||||
// Validate API key by making a test request
|
||||
const validateApiKey = async (key: string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await fetch('/api/v1/instances', {
|
||||
const response = await fetch(document.baseURI + 'api/v1/instances', {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json'
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
||||
import { instancesApi } from '@/lib/api'
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
// Mock fetch globally
|
||||
const mockFetch = vi.fn()
|
||||
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
|
||||
})
|
||||
|
||||
it('converts HTTP errors to meaningful messages', async () => {
|
||||
mockFetch.mockResolvedValue({
|
||||
const mockResponse = {
|
||||
ok: false,
|
||||
status: 409,
|
||||
text: () => Promise.resolve('Instance already exists')
|
||||
})
|
||||
text: () => Promise.resolve('Instance already exists'),
|
||||
clone: function() { return this }
|
||||
}
|
||||
mockFetch.mockResolvedValue(mockResponse)
|
||||
|
||||
await expect(instancesApi.create('existing', {}))
|
||||
.rejects
|
||||
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
|
||||
})
|
||||
|
||||
it('handles empty error responses gracefully', async () => {
|
||||
mockFetch.mockResolvedValue({
|
||||
const mockResponse = {
|
||||
ok: false,
|
||||
status: 500,
|
||||
text: () => Promise.resolve('')
|
||||
})
|
||||
text: () => Promise.resolve(''),
|
||||
clone: function() { return this }
|
||||
}
|
||||
mockFetch.mockResolvedValue(mockResponse)
|
||||
|
||||
await expect(instancesApi.list())
|
||||
.rejects
|
||||
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
|
||||
await instancesApi.getLogs('test-instance', 100)
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledWith(
|
||||
'/api/v1/instances/test-instance/logs?lines=100',
|
||||
expect.stringMatching(
|
||||
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
|
||||
),
|
||||
expect.any(Object)
|
||||
)
|
||||
})
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import type { CreateInstanceOptions, Instance } from "@/types/instance";
|
||||
import { handleApiError } from "./errorUtils";
|
||||
|
||||
const API_BASE = "/api/v1";
|
||||
// Adding baseURI as a prefix to support being served behind a subpath
|
||||
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
|
||||
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
|
||||
export const API_BASE = document.baseURI + "api/v1";
|
||||
|
||||
// Base API call function with error handling
|
||||
async function apiCall<T>(
|
||||
@@ -46,11 +49,8 @@ async function apiCall<T>(
|
||||
} else {
|
||||
// Handle empty responses for JSON endpoints
|
||||
const contentLength = response.headers.get('content-length');
|
||||
if (contentLength === '0' || contentLength === null) {
|
||||
const text = await response.text();
|
||||
if (text.trim() === '') {
|
||||
return {} as T; // Return empty object for empty JSON responses
|
||||
}
|
||||
if (contentLength === '0') {
|
||||
return {} as T; // Return empty object for empty JSON responses
|
||||
}
|
||||
const data = await response.json() as T;
|
||||
return data;
|
||||
|
||||
@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorMessage = await parseErrorResponse(response)
|
||||
// Clone the response before reading to avoid consuming the body stream
|
||||
const errorMessage = await parseErrorResponse(response.clone())
|
||||
throw new Error(errorMessage)
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,10 @@
|
||||
import {
|
||||
type CreateInstanceOptions,
|
||||
type LlamaCppBackendOptions,
|
||||
type MlxBackendOptions,
|
||||
type VllmBackendOptions,
|
||||
LlamaCppBackendOptionsSchema,
|
||||
MlxBackendOptionsSchema,
|
||||
VllmBackendOptionsSchema,
|
||||
getAllFieldKeys,
|
||||
getAllLlamaCppFieldKeys,
|
||||
getAllMlxFieldKeys,
|
||||
getAllVllmFieldKeys,
|
||||
@@ -15,41 +13,6 @@ import {
|
||||
getVllmFieldType
|
||||
} from '@/schemas/instanceOptions'
|
||||
|
||||
// Instance-level basic fields (not backend-specific)
|
||||
export const basicFieldsConfig: Record<string, {
|
||||
label: string
|
||||
description?: string
|
||||
placeholder?: string
|
||||
}> = {
|
||||
auto_restart: {
|
||||
label: 'Auto Restart',
|
||||
description: 'Automatically restart the instance on failure'
|
||||
},
|
||||
max_restarts: {
|
||||
label: 'Max Restarts',
|
||||
placeholder: '3',
|
||||
description: 'Maximum number of restart attempts (0 = unlimited)'
|
||||
},
|
||||
restart_delay: {
|
||||
label: 'Restart Delay (seconds)',
|
||||
placeholder: '5',
|
||||
description: 'Delay in seconds before attempting restart'
|
||||
},
|
||||
idle_timeout: {
|
||||
label: 'Idle Timeout (minutes)',
|
||||
placeholder: '60',
|
||||
description: 'Time in minutes before instance is considered idle and stopped'
|
||||
},
|
||||
on_demand_start: {
|
||||
label: 'On-Demand Start',
|
||||
description: 'Start instance upon receiving OpenAI-compatible API request'
|
||||
},
|
||||
backend_type: {
|
||||
label: 'Backend Type',
|
||||
description: 'Type of backend to use for this instance'
|
||||
}
|
||||
}
|
||||
|
||||
// LlamaCpp backend-specific basic fields
|
||||
const basicLlamaCppFieldsConfig: Record<string, {
|
||||
label: string
|
||||
@@ -152,18 +115,6 @@ const backendFieldGetters = {
|
||||
llama_cpp: getAllLlamaCppFieldKeys,
|
||||
} as const
|
||||
|
||||
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
||||
return key in basicFieldsConfig
|
||||
}
|
||||
|
||||
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
|
||||
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
|
||||
}
|
||||
|
||||
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
|
||||
return getAllFieldKeys().filter(key => !isBasicField(key))
|
||||
}
|
||||
|
||||
export function getBasicBackendFields(backendType?: string): string[] {
|
||||
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
|
||||
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
||||
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
|
||||
return 'text'
|
||||
}
|
||||
|
||||
// Re-export the Zod-based functions
|
||||
export { getFieldType } from '@/schemas/instanceOptions'
|
||||
@@ -33,6 +33,9 @@ export const CreateInstanceOptionsSchema = z.object({
|
||||
idle_timeout: z.number().optional(),
|
||||
on_demand_start: z.boolean().optional(),
|
||||
|
||||
// Environment variables
|
||||
environment: z.record(z.string(), z.string()).optional(),
|
||||
|
||||
// Backend configuration
|
||||
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
||||
backend_options: BackendOptionsSchema.optional(),
|
||||
@@ -75,5 +78,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
|
||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||
if (innerSchema instanceof z.ZodObject) return 'object'
|
||||
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
|
||||
return 'text' // ZodString and others default to text
|
||||
}
|
||||
@@ -21,4 +21,6 @@ export default defineConfig({
|
||||
setupFiles: ['./src/test/setup.ts'],
|
||||
css: true,
|
||||
},
|
||||
// ensures relative asset paths to support being served behind a subpath
|
||||
base: "./"
|
||||
})
|
||||
Reference in New Issue
Block a user