30 Commits

Author SHA1 Message Date
1892dc8315 Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy
feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`
2025-10-06 20:23:44 +02:00
Anuruth Lertpiya
997bd1b063 Changed status code to StatusBadRequest (400) if requested invalid model name. 2025-10-05 14:53:20 +00:00
Anuruth Lertpiya
fa43f9e967 Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/ 2025-10-05 14:28:33 +00:00
db9eebeb8b Merge pull request #56 from lordmathis/fix/body-already-read
Fix double read of json response when content-length header is missing
2025-10-04 22:28:22 +02:00
bd062f8ca0 Mock Response.clone for tests 2025-10-04 22:22:25 +02:00
8ebdb1a183 Fix double read of json response when content-length header is missing 2025-10-04 22:16:28 +02:00
7272212081 Merge pull request #55 from lordmathis/fix/auto-restart
fix: Set status to Stopped for instances with auto-restart disabled
2025-10-04 21:45:12 +02:00
035e184789 Merge branch 'main' into fix/auto-restart 2025-10-04 21:22:50 +02:00
d15976e7aa Implement auto-stop for instances with auto-restart disabled and add corresponding tests 2025-10-04 21:17:55 +02:00
4fa75d9801 Merge pull request #52 from BobbyL2k/feat/config-cors-headers
feat: Added support for configuring access-control-request-headers for CORS
2025-10-04 20:45:27 +02:00
Anuruth Lertpiya
0e1bc8a352 Added support for configuring CORS headers 2025-10-04 09:13:40 +00:00
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
20 changed files with 633 additions and 64 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

View File

@@ -95,7 +95,30 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page
```
### Option 2: Build from Source
### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
@@ -147,9 +170,9 @@ pip install vllm
# Or use Docker - no local installation required
```
## Docker Support
## Backend Docker Support
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
llamactl can run backends in Docker containers:
```yaml
backends:
@@ -174,6 +197,7 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY apidocs/ ./apidocs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

View File

@@ -17,33 +17,37 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
instances:
port_range: [8000, 9000] # Port range for instances
@@ -101,6 +105,7 @@ server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
@@ -116,40 +121,46 @@ backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false
enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
# MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:**
**LlamaCpp Backend:**
@@ -160,6 +171,7 @@ backends:
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
@@ -169,11 +181,13 @@ backends:
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Instance Configuration

View File

@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page
```
### Option 2: Build from Source
### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
#### Using Docker Compose
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
#### Using Docker Build and Run
**llamactl with llama.cpp CUDA:**
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
**llamactl with vLLM CUDA:**
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
**llamactl built from source:**
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements:
- Go 1.24 or later

View File

@@ -13,10 +13,11 @@ import (
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
@@ -56,8 +57,14 @@ type ServerConfig struct {
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// InstancesConfig contains instance management configuration
@@ -132,6 +139,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false,
},
Backends: BackendConfig{
@@ -337,6 +345,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
@@ -380,6 +394,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
@@ -394,6 +414,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -481,6 +507,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {

View File

@@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(targetURL)
var responseHeaders map[string]string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
case backends.BackendTypeVllm:
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
case backends.BackendTypeMlxLm:
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
}
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
@@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range responseHeaders {
resp.Header.Set(key, value)
}
return nil
}

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"log"
"net/http"
"os"
"os/exec"
"runtime"
"syscall"
@@ -37,6 +38,9 @@ func (i *Process) Start() error {
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
@@ -47,8 +51,6 @@ func (i *Process) Start() error {
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = cmd
if runtime.GOOS != "windows" {
@@ -383,7 +385,11 @@ func (i *Process) buildCommand() (*exec.Cmd, error) {
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
cmd.Env = []string{}
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}

View File

@@ -263,19 +263,32 @@ func (im *instanceManager) loadInstance(name, path string) error {
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
// For instances with auto-restart disabled, it sets their status to Stopped
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
var instancesToStop []*instance.Process
for _, inst := range im.instances {
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
inst.GetOptions().AutoRestart != nil {
if *inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
} else {
// Instance was running but auto-restart is disabled, mark as stopped
instancesToStop = append(instancesToStop, inst)
}
}
}
im.mu.RUnlock()
// Stop instances that have auto-restart disabled
for _, inst := range instancesToStop {
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
inst.SetStatus(instance.Stopped)
}
// Start instances that have auto-restart enabled
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)

View File

@@ -209,3 +209,66 @@ func createTestManager() manager.InstanceManager {
}
return manager.NewInstanceManager(backendConfig, cfg)
}
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Create first manager and instance with auto-restart disabled
manager1 := manager.NewInstanceManager(backendConfig, cfg)
autoRestart := false
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
AutoRestart: &autoRestart,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Simulate instance being in running state when persisted
// (this would happen if the instance was running when llamactl was stopped)
inst.SetStatus(instance.Running)
// Shutdown first manager
manager1.Shutdown()
// Create second manager (simulating restart of llamactl)
manager2 := manager.NewInstanceManager(backendConfig, cfg)
// Get the loaded instance
loadedInst, err := manager2.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
// The instance should be marked as Stopped, not Running
// because auto-restart is disabled
if loadedInst.IsRunning() {
t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
}
if loadedInst.GetStatus() != instance.Stopped {
t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
}
manager2.Shutdown()
}

View File

@@ -131,11 +131,16 @@ func (h *Handler) ListInstances() http.HandlerFunc {
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
// Marshal to bytes first to set Content-Length header
data, err := json.Marshal(instances)
if err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.Header().Set("Content-Length", strconv.Itoa(len(data)))
w.Write(data)
}
}
@@ -202,7 +207,7 @@ func (h *Handler) GetInstance() http.HandlerFunc {
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
@@ -475,29 +480,15 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
@@ -580,12 +571,13 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
@@ -634,6 +626,84 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
}
}
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Get the instance name from the URL parameter
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
options := inst.GetOptions()
if options == nil {
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
return
}
if options.BackendType != backends.BackendTypeLlamaCpp {
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
return
}
if !inst.IsRunning() {
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(name); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`

View File

@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
AllowedHeaders: handler.cfg.Server.AllowedHeaders,
ExposedHeaders: []string{"Link"},
AllowCredentials: false,
MaxAge: 300,
@@ -103,6 +103,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
})
r.Route("/llama-cpp/{name}", func(r chi.Router) {
// Public Routes
// Allow llama-cpp server to serve its own WebUI if it is running.
// Don't auto start the server since it can be accessed without an API key
r.Get("/", handler.LlamaCppProxy(false))
// Private Routes
r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}
// This handler auto start the server if it's not running
llamaCppHandler := handler.LlamaCppProxy(true)
// llama.cpp server specific proxy endpoints
r.Get("/props", llamaCppHandler)
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
r.Get("/slots", llamaCppHandler)
r.Post("/apply-template", llamaCppHandler)
r.Post("/completion", llamaCppHandler)
r.Post("/detokenize", llamaCppHandler)
r.Post("/embeddings", llamaCppHandler)
r.Post("/infill", llamaCppHandler)
r.Post("/metrics", llamaCppHandler)
r.Post("/props", llamaCppHandler)
r.Post("/reranking", llamaCppHandler)
r.Post("/tokenize", llamaCppHandler)
// OpenAI-compatible proxy endpoint
// Handles all POST requests to /v1/*, including:
// - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
// on "model" field being optional, and handler.OpenAIProxy requires it.
r.Post("/v1/*", llamaCppHandler)
})
})
// Serve WebUI files
if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err)

View File

@@ -1,4 +1,4 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState {
isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => {
try {
const response = await fetch('/api/v1/instances', {
const response = await fetch(document.baseURI + 'api/v1/instances', {
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json'

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { instancesApi } from '@/lib/api'
import { beforeEach, describe, expect, it, vi } from 'vitest'
// Mock fetch globally
const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
})
it('converts HTTP errors to meaningful messages', async () => {
mockFetch.mockResolvedValue({
const mockResponse = {
ok: false,
status: 409,
text: () => Promise.resolve('Instance already exists')
})
text: () => Promise.resolve('Instance already exists'),
clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.create('existing', {}))
.rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
})
it('handles empty error responses gracefully', async () => {
mockFetch.mockResolvedValue({
const mockResponse = {
ok: false,
status: 500,
text: () => Promise.resolve('')
})
text: () => Promise.resolve(''),
clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.list())
.rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
await instancesApi.getLogs('test-instance', 100)
expect(mockFetch).toHaveBeenCalledWith(
'/api/v1/instances/test-instance/logs?lines=100',
expect.stringMatching(
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
),
expect.any(Object)
)
})

View File

@@ -1,7 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1";
// Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling
async function apiCall<T>(
@@ -46,11 +49,8 @@ async function apiCall<T>(
} else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
if (contentLength === '0') {
return {} as T; // Return empty object for empty JSON responses
}
const data = await response.json() as T;
return data;

View File

@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
// Clone the response before reading to avoid consuming the body stream
const errorMessage = await parseErrorResponse(response.clone())
throw new Error(errorMessage)
}
}

View File

@@ -21,4 +21,6 @@ export default defineConfig({
setupFiles: ['./src/test/setup.ts'],
css: true,
},
// ensures relative asset paths to support being served behind a subpath
base: "./"
})