68 Commits

Author SHA1 Message Date
c3037f914d Merge pull request #60 from lordmathis/lordmathis-patch-1
Update docs.yaml
2025-10-09 22:31:38 +02:00
81266b4bc4 Update docs.yaml 2025-10-09 22:29:23 +02:00
a31af94e7b Merge pull request #59 from lordmathis/feat/multi-host
feat: Implement multi node support
2025-10-09 22:23:27 +02:00
9ee0a184b3 Re-validate instance name in DeleteInstance for improved security 2025-10-09 22:18:53 +02:00
5436c28a1f Add instance name validation before deletion for security 2025-10-09 22:10:40 +02:00
73b9dd5bc7 Rename workflows for consistency 2025-10-09 21:53:14 +02:00
f61e8dad5c Add User Docs badge to README 2025-10-09 21:51:38 +02:00
ab2770bdd9 Add documentation for remote node deployment and configuration 2025-10-09 21:50:39 +02:00
e7a6a7003e Skip remote instances in checkAllTimeouts and EvictLRUInstance methods 2025-10-09 21:13:38 +02:00
2b950ee649 Implement updateLocalInstanceFromRemote to preserve Nodes field when syncing remote instance data 2025-10-09 20:39:21 +02:00
b965b77c18 Prevent remote instances from using local proxy in GetProxy method 2025-10-09 20:24:54 +02:00
8a16a195de Fix getting remote instance logs 2025-10-09 20:22:32 +02:00
9684a8a09b Enhance instance management to preserve local state for remote instances 2025-10-09 19:34:52 +02:00
9d5f01d4ae Auto-select first node in InstanceSettingsCard if none is selected 2025-10-09 19:13:58 +02:00
e281708b20 Enhance auto-start logic to differentiate between remote and local instances 2025-10-09 18:56:23 +02:00
8d9b0c0621 Initialize timeProvider and logger in UnmarshalJSON for Process 2025-10-09 18:56:12 +02:00
6c1a76691d Improve cleanup of options in InstanceDialog to skip empty strings and arrays 2025-10-09 18:49:36 +02:00
5d958ed283 Fix backend_options cleanup to exclude empty arrays in InstanceDialog 2025-10-09 18:38:33 +02:00
56b95d1243 Refactor InstanceSettingsCard and API types to use NodesMap 2025-10-08 19:52:39 +02:00
688b815ca7 Add LocalNode configuration 2025-10-08 19:43:53 +02:00
7f6725da96 Refactor NodeConfig handling to use a map 2025-10-08 19:24:24 +02:00
3418735204 Add stripNodesFromOptions function to prevent routing loops in remote requests 2025-10-07 20:27:31 +02:00
2f1cf5acdc Refactor CreateRemoteInstance and UpdateRemoteInstance to directly use options parameter in API requests 2025-10-07 19:57:21 +02:00
01380e6641 Update instance manager tests to use empty NodeConfig slice 2025-10-07 19:18:13 +02:00
6298b03636 Refactor RemoteOpenAIProxy to use cached proxies and restore request body handling 2025-10-07 18:57:08 +02:00
aae3f84d49 Implement caching for remote instance proxies and enhance proxy request handling 2025-10-07 18:44:23 +02:00
554796391b Remove test config file 2025-10-07 18:05:30 +02:00
16b28bac05 Merge branch 'main' into feat/multi-host 2025-10-07 18:04:24 +02:00
1892dc8315 Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy
feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`
2025-10-06 20:23:44 +02:00
Anuruth Lertpiya
997bd1b063 Changed status code to StatusBadRequest (400) if requested invalid model name. 2025-10-05 14:53:20 +00:00
Anuruth Lertpiya
fa43f9e967 Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/ 2025-10-05 14:28:33 +00:00
db9eebeb8b Merge pull request #56 from lordmathis/fix/body-already-read
Fix double read of json response when content-length header is missing
2025-10-04 22:28:22 +02:00
bd062f8ca0 Mock Response.clone for tests 2025-10-04 22:22:25 +02:00
8ebdb1a183 Fix double read of json response when content-length header is missing 2025-10-04 22:16:28 +02:00
7272212081 Merge pull request #55 from lordmathis/fix/auto-restart
fix: Set status to Stopped for instances with auto-restart disabled
2025-10-04 21:45:12 +02:00
035e184789 Merge branch 'main' into fix/auto-restart 2025-10-04 21:22:50 +02:00
d15976e7aa Implement auto-stop for instances with auto-restart disabled and add corresponding tests 2025-10-04 21:17:55 +02:00
4fa75d9801 Merge pull request #52 from BobbyL2k/feat/config-cors-headers
feat: Added support for configuring access-control-request-headers for CORS
2025-10-04 20:45:27 +02:00
Anuruth Lertpiya
0e1bc8a352 Added support for configuring CORS headers 2025-10-04 09:13:40 +00:00
b728a7c6b2 Fix fetchNodes call to ensure proper handling of promise 2025-10-03 10:53:29 +02:00
a491f29483 Add node selection functionality to InstanceSettingsCard and define Node API 2025-10-02 23:18:33 +02:00
670f8ff81b Split up handlers 2025-10-02 23:11:20 +02:00
da56456504 Add node management endpoints to handle listing and retrieving node details 2025-10-02 22:51:41 +02:00
c30053e51c Enhance instance loading to support remote instances and handle node configuration 2025-10-01 22:59:45 +02:00
347c58e15f Enhance instance manager to persist remote instances and update tracking on modifications 2025-10-01 22:58:57 +02:00
2ed67eb672 Add remote instance proxying functionality to handler 2025-10-01 22:17:19 +02:00
0188f82306 Implement remote instance creation and deletion in instance manager 2025-10-01 22:05:18 +02:00
e0f176de10 Enhance instance manager to support remote instance management and update related tests 2025-10-01 20:25:06 +02:00
2759be65a5 Add remote instance management functionality and configuration support 2025-09-30 21:09:05 +02:00
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
42 changed files with 2680 additions and 912 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

View File

@@ -1,4 +1,4 @@
name: Build and Deploy Documentation
name: User Docs
on:
push:

View File

@@ -1,6 +1,6 @@
# llamactl
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
@@ -25,6 +25,11 @@
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
### 🔗 Remote Instance Deployment
- **Remote Node Support**: Deploy instances on remote hosts
- **Central Management**: Manage remote instances from a single dashboard
- **Seamless Routing**: Automatic request routing to remote instances
![Dashboard Screenshot](docs/images/dashboard.png)
## Quick Start
@@ -95,7 +100,30 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page
```
### Option 2: Build from Source
### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
@@ -147,9 +175,9 @@ pip install vllm
# Or use Docker - no local installation required
```
## Docker Support
## Backend Docker Support
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
llamactl can run backends in Docker containers:
```yaml
backends:
@@ -174,6 +202,7 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:

View File

@@ -58,7 +58,7 @@ func main() {
}
// Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes)
// Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg)

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY apidocs/ ./apidocs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

View File

@@ -17,6 +17,7 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
@@ -29,6 +30,7 @@ backends:
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
@@ -39,11 +41,13 @@ backends:
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
instances:
port_range: [8000, 9000] # Port range for instances
@@ -66,6 +70,10 @@ auth:
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration for multi-node deployment
main: # Default local node (empty config)
```
## Configuration Files
@@ -101,6 +109,7 @@ server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
@@ -122,34 +131,40 @@ backends:
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:**
**LlamaCpp Backend:**
@@ -160,6 +175,7 @@ backends:
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
@@ -169,11 +185,13 @@ backends:
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Instance Configuration
@@ -227,12 +245,26 @@ auth:
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
### Remote Node Configuration
View all available command line options:
llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
```bash
llamactl --help
```yaml
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration map
main: # Local node (empty address means local)
address: "" # Not used for local node
api_key: "" # Not used for local node
worker1: # Remote worker node
address: "http://192.168.1.10:8080"
api_key: "worker1-api-key" # Management API key for authentication
```
You can also override configuration using command line flags when starting llamactl.
**Node Configuration Fields:**
- `local_node`: Specifies which node in the `nodes` map represents the local node
- `nodes`: Map of node configurations
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
- `api_key`: Management API key for authenticating with the remote node
**Environment Variables:**
- `LLAMACTL_LOCAL_NODE` - Name of the local node

View File

@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page
```
### Option 2: Build from Source
### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
#### Using Docker Compose
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
#### Using Docker Build and Run
**llamactl with llama.cpp CUDA:**
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
**llamactl with vLLM CUDA:**
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
**llamactl built from source:**
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements:
- Go 1.24 or later
@@ -92,6 +157,12 @@ cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
## Remote Node Installation
For deployments with remote nodes:
- Install llamactl on each node using any of the methods above
- Configure API keys for authentication between nodes
## Verification
Verify your installation by checking the version:
@@ -103,3 +174,5 @@ llamactl --version
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.

View File

@@ -126,6 +126,7 @@ POST /api/v1/instances/{name}
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
See [Managing Instances](managing-instances.md) for complete configuration options.
@@ -405,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Remote Node Instance Example
```bash
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/v1/instances/remote-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
# Check status of remote instance
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/remote-model
# Use remote instance with OpenAI-compatible API
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "remote-model",
"messages": [
{"role": "user", "content": "Hello from remote node!"}
]
}'
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:

View File

@@ -39,26 +39,27 @@ Each instance is displayed as a card showing:
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. **Choose Backend Type**:
3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
4. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source:
5. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings:
6. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
6. Configure backend-specific options:
7. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance
8. Click **"Create"** to save the instance
### Via API
@@ -121,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
"gpu_layers": 32
}
}'
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/instances/remote-llama \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
```
## Start Instance
@@ -227,3 +240,4 @@ Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -125,6 +125,30 @@ This helps determine if the issue is with llamactl or with the underlying llama.
http://localhost:8080/api/v1/instances
```
## Remote Node Issues
### Node Configuration
**Problem:** Remote instances not appearing or cannot be managed
**Solutions:**
1. **Verify node configuration:**
```yaml
local_node: "main" # Must match a key in nodes map
nodes:
main:
address: "" # Empty for local node
worker1:
address: "http://worker1.internal:8080"
api_key: "secure-key" # Must match worker1's management key
```
2. **Test remote node connectivity:**
```bash
curl -H "Authorization: Bearer remote-node-key" \
http://remote-node:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs

View File

@@ -17,6 +17,7 @@ type BackendSettings struct {
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
@@ -40,6 +41,8 @@ type AppConfig struct {
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
LocalNode string `yaml:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
@@ -56,8 +59,14 @@ type ServerConfig struct {
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// InstancesConfig contains instance management configuration
@@ -121,6 +130,11 @@ type AuthConfig struct {
ManagementKeys []string `yaml:"management_keys"`
}
type NodeConfig struct {
Address string `yaml:"address"`
APIKey string `yaml:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
@@ -132,8 +146,13 @@ func LoadConfig(configPath string) (AppConfig, error) {
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false,
},
LocalNode: "main",
Nodes: map[string]NodeConfig{
"main": {}, // Local node with empty config
},
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
@@ -337,6 +356,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
@@ -380,6 +405,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
@@ -394,6 +425,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -443,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -481,6 +523,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {

View File

@@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
}
func TestLoadConfig_LocalNode(t *testing.T) {
t.Run("default local node", func(t *testing.T) {
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "main" {
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
}
})
t.Run("local node from file", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "worker1"
nodes:
worker1:
address: ""
worker2:
address: "http://192.168.1.10:8080"
api_key: "test-key"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "worker1" {
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
}
// Verify nodes map (includes default "main" + worker1 + worker2)
if len(cfg.Nodes) != 3 {
t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
}
// Verify local node exists and is empty
localNode, exists := cfg.Nodes["worker1"]
if !exists {
t.Error("Expected local node 'worker1' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
if localNode.APIKey != "" {
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
}
// Verify remote node
remoteNode, exists := cfg.Nodes["worker2"]
if !exists {
t.Error("Expected remote node 'worker2' to exist in nodes map")
}
if remoteNode.Address != "http://192.168.1.10:8080" {
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
}
// Verify default main node still exists
_, exists = cfg.Nodes["main"]
if !exists {
t.Error("Expected default 'main' node to still exist in nodes map")
}
})
t.Run("custom local node name in config", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "primary"
nodes:
primary:
address: ""
worker1:
address: "http://192.168.1.10:8080"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "primary" {
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
}
// Verify nodes map includes default "main" + primary + worker1
if len(cfg.Nodes) != 3 {
t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
}
localNode, exists := cfg.Nodes["primary"]
if !exists {
t.Error("Expected local node 'primary' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
})
t.Run("local node from environment variable", func(t *testing.T) {
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "custom-node" {
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
}
})
}

View File

@@ -171,6 +171,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
if len(i.options.Nodes) > 0 {
return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
}
var host string
var port int
switch i.options.BackendType {
@@ -198,6 +203,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(targetURL)
var responseHeaders map[string]string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
case backends.BackendTypeVllm:
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
case backends.BackendTypeMlxLm:
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
}
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
@@ -207,6 +221,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range responseHeaders {
resp.Header.Set(key, value)
}
return nil
}
@@ -272,5 +290,24 @@ func (i *Process) UnmarshalJSON(data []byte) error {
i.options = aux.Options
}
// Initialize fields that are not serialized
if i.timeProvider == nil {
i.timeProvider = realTimeProvider{}
}
if i.logger == nil && i.globalInstanceSettings != nil {
i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
}
return nil
}
func (i *Process) IsRemote() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options == nil {
return false
}
return len(i.options.Nodes) > 0
}

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"log"
"net/http"
"os"
"os/exec"
"runtime"
"syscall"
@@ -37,6 +38,9 @@ func (i *Process) Start() error {
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
@@ -47,8 +51,6 @@ func (i *Process) Start() error {
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = cmd
if runtime.GOOS != "windows" {
@@ -383,7 +385,11 @@ func (i *Process) buildCommand() (*exec.Cmd, error) {
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
cmd.Env = []string{}
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}

View File

@@ -27,6 +27,8 @@ type CreateInstanceOptions struct {
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
Nodes []string `json:"nodes,omitempty"`
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`

View File

@@ -6,6 +6,7 @@ import (
"llamactl/pkg/config"
"llamactl/pkg/instance"
"log"
"net/http"
"os"
"path/filepath"
"strings"
@@ -25,10 +26,22 @@ type InstanceManager interface {
StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
GetInstanceLogs(name string, numLines int) (string, error)
Shutdown()
}
type RemoteManager interface {
ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteRemoteInstance(node *config.NodeConfig, name string) error
StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
@@ -42,13 +55,26 @@ type instanceManager struct {
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
// Remote instance management
httpClient *http.Client
instanceNodeMap map[string]*config.NodeConfig // Maps instance name to its node config
nodeConfigMap map[string]*config.NodeConfig // Maps node name to node config for quick lookup
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
// Build node config map for quick lookup
nodeConfigMap := make(map[string]*config.NodeConfig)
for name := range nodesConfig {
nodeCopy := nodesConfig[name]
nodeConfigMap[name] = &nodeCopy
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
@@ -59,6 +85,13 @@ func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig con
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
instanceNodeMap: make(map[string]*config.NodeConfig),
nodeConfigMap: nodeConfigMap,
}
// Load existing instances from disk
@@ -238,18 +271,36 @@ func (im *instanceManager) loadInstance(name, path string) error {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
options := persistedInstance.GetOptions()
// Check if this is a remote instance
isRemote := options != nil && len(options.Nodes) > 0
var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
if !isRemote {
// Only set status callback for local instances
statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps
// Handle remote instance mapping
if isRemote {
nodeName := options.Nodes[0]
nodeConfig, exists := im.nodeConfigMap[nodeName]
if !exists {
return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
}
im.instanceNodeMap[name] = nodeConfig
} else {
// Check for port conflicts only for local instances
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
@@ -257,34 +308,58 @@ func (im *instanceManager) loadInstance(name, path string) error {
}
im.ports[port] = true
}
}
im.instances[name] = inst
return nil
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
// For instances with auto-restart disabled, it sets their status to Stopped
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
var instancesToStop []*instance.Process
for _, inst := range im.instances {
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
inst.GetOptions().AutoRestart != nil {
if *inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
} else {
// Instance was running but auto-restart is disabled, mark as stopped
instancesToStop = append(instancesToStop, inst)
}
}
}
im.mu.RUnlock()
// Stop instances that have auto-restart disabled
for _, inst := range instancesToStop {
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
inst.SetStatus(instance.Stopped)
}
// Start instances that have auto-restart enabled
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped)
// Check if this is a remote instance
if node := im.getNodeForInstance(inst); node != nil {
// Remote instance - use StartRemoteInstance
if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
}
} else {
// Local instance - call Start() directly
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
@@ -296,3 +371,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
delete(im.runningInstances, name)
}
}
// getNodeForInstance returns the node configuration for a remote instance
// Returns nil if the instance is not remote or the node is not found
func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
if !inst.IsRemote() {
return nil
}
// Check if we have a cached mapping
if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
return nodeConfig
}
return nil
}

View File

@@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) {
TimeoutCheckInterval: 5,
}
mgr := manager.NewInstanceManager(backendConfig, cfg)
mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
@@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) {
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(backendConfig, cfg)
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) {
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(backendConfig, cfg)
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
@@ -207,5 +207,68 @@ func createTestManager() manager.InstanceManager {
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(backendConfig, cfg)
return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
}
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Create first manager and instance with auto-restart disabled
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
autoRestart := false
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
AutoRestart: &autoRestart,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Simulate instance being in running state when persisted
// (this would happen if the instance was running when llamactl was stopped)
inst.SetStatus(instance.Running)
// Shutdown first manager
manager1.Shutdown()
// Create second manager (simulating restart of llamactl)
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
// Get the loaded instance
loadedInst, err := manager2.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
// The instance should be marked as Stopped, not Running
// because auto-restart is disabled
if loadedInst.IsRunning() {
t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
}
if loadedInst.GetStatus() != instance.Stopped {
t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
}
manager2.Shutdown()
}

View File

@@ -3,6 +3,7 @@ package manager
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
@@ -11,16 +12,65 @@ import (
type MaxRunningInstancesError error
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
// while preserving the Nodes field to maintain remote instance tracking
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
if localInst == nil || remoteInst == nil {
return
}
// Get the remote instance options
remoteOptions := remoteInst.GetOptions()
if remoteOptions == nil {
return
}
// Preserve the Nodes field from the local instance
localOptions := localInst.GetOptions()
var preservedNodes []string
if localOptions != nil && len(localOptions.Nodes) > 0 {
preservedNodes = make([]string, len(localOptions.Nodes))
copy(preservedNodes, localOptions.Nodes)
}
// Create a copy of remote options and restore the Nodes field
updatedOptions := *remoteOptions
updatedOptions.Nodes = preservedNodes
// Update the local instance with all remote data
localInst.SetOptions(&updatedOptions)
localInst.Status = remoteInst.Status
localInst.Created = remoteInst.Created
}
// ListInstances returns a list of all instances managed by the instance manager.
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*instance.Process, 0, len(im.instances))
localInstances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances {
instances = append(instances, inst)
localInstances = append(localInstances, inst)
}
return instances, nil
im.mu.RUnlock()
// Update remote instances with live state
for _, inst := range localInstances {
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.GetRemoteInstance(node, inst.Name)
if err != nil {
// Log error but continue with stale data
// Don't fail the entire list operation due to one remote failure
continue
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
}
}
return localInstances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
@@ -43,16 +93,56 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.mu.Lock()
defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
// Check if instance with this name already exists (must be globally unique)
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Check if this is a remote instance
isRemote := len(options.Nodes) > 0
var nodeConfig *config.NodeConfig
if isRemote {
// Validate that the node exists
nodeName := options.Nodes[0] // Use first node for now
var exists bool
nodeConfig, exists = im.nodeConfigMap[nodeName]
if !exists {
return nil, fmt.Errorf("node %s not found", nodeName)
}
// Create the remote instance on the remote node
remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
if err != nil {
return nil, err
}
// Create a local stub that preserves the Nodes field for tracking
// We keep the original options (with Nodes) so IsRemote() works correctly
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, nil)
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Add to local tracking maps (but don't count towards limits)
im.instances[name] = inst
im.instanceNodeMap[name] = nodeConfig
// Persist the remote instance locally for tracking across restarts
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
}
return inst, nil
}
// Local instance creation
// Check max instances limit for local instances only
localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil {
return nil, err
@@ -73,28 +163,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
}
// GetInstance retrieves an instance by its name.
// For remote instances, this fetches the live state from the remote node and updates the local stub.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
inst, exists := im.instances[name]
im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
// Check if instance is remote and fetch live state
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.GetRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
// Return the local stub (preserving Nodes field)
return inst, nil
}
return inst, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.UpdateRemoteInstance(node, name, options)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
// Persist the updated remote instance locally
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
}
return inst, nil
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
@@ -105,55 +235,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
}
// Check if instance is running before updating options
wasRunning := instance.IsRunning()
wasRunning := inst.IsRunning()
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
inst.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil {
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
}
return instance, nil
return inst, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
inst, exists := im.instances[name]
im.mu.Unlock()
instance, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
err := im.DeleteRemoteInstance(node, name)
if err != nil {
return err
}
// Clean up local tracking
im.mu.Lock()
defer im.mu.Unlock()
delete(im.instances, name)
delete(im.instanceNodeMap, name)
// Delete the instance's config file if persistence is enabled
// Re-validate instance name for security (defense in depth)
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
return fmt.Errorf("invalid instance name for file deletion: %w", err)
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
}
return nil
}
if inst.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, instance.GetPort())
im.mu.Lock()
defer im.mu.Unlock()
delete(im.ports, inst.GetPort())
delete(im.instances, name)
// Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
// Re-validate instance name for security (defense in depth)
validatedName, err := validation.ValidateInstanceName(inst.Name)
if err != nil {
return fmt.Errorf("invalid instance name for file deletion: %w", err)
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
}
return nil
@@ -163,33 +328,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.StartRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
}
if inst.IsRunning() {
return inst, fmt.Errorf("instance with name %s is already running", name)
}
// Check max running instances limit for local instances only
im.mu.RLock()
localRunningCount := 0
for instName := range im.runningInstances {
if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
localRunningCount++
}
}
maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
}
if err := instance.Start(); err != nil {
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
err := im.persistInstance(inst)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
return inst, nil
}
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
@@ -206,51 +397,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.StopRemoteInstance(node, name)
if err != nil {
return nil, err
}
if err := instance.Stop(); err != nil {
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
}
if !inst.IsRunning() {
return inst, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
err := im.persistInstance(inst)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
return inst, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name)
im.mu.RLock()
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.RestartRemoteInstance(node, name)
if err != nil {
return nil, err
}
return im.StartInstance(instance.Name)
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
}
inst, err := im.StopInstance(name)
if err != nil {
return nil, err
}
return im.StartInstance(inst.Name)
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
return im.GetRemoteInstanceLogs(node, name, numLines)
}
// Get logs from the local instance
return inst.GetLogs(numLines)
}
// getPortFromOptions extracts the port from backend-specific options

View File

@@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {

243
pkg/manager/remote_ops.go Normal file
View File

@@ -0,0 +1,243 @@
package manager
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"net/http"
)
// stripNodesFromOptions creates a copy of the instance options without the Nodes field
// to prevent routing loops when sending requests to remote nodes
func (im *instanceManager) stripNodesFromOptions(options *instance.CreateInstanceOptions) *instance.CreateInstanceOptions {
if options == nil {
return nil
}
// Create a copy of the options struct
optionsCopy := *options
// Clear the Nodes field to prevent the remote node from trying to route further
optionsCopy.Nodes = nil
return &optionsCopy
}
// makeRemoteRequest is a helper function to make HTTP requests to a remote node
func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
var reqBody io.Reader
if body != nil {
// Strip nodes from CreateInstanceOptions to prevent routing loops
if options, ok := body.(*instance.CreateInstanceOptions); ok {
body = im.stripNodesFromOptions(options)
}
jsonData, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("failed to marshal request body: %w", err)
}
reqBody = bytes.NewBuffer(jsonData)
}
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
req, err := http.NewRequest(method, url, reqBody)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
if nodeConfig.APIKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
}
resp, err := im.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to execute request: %w", err)
}
return resp, nil
}
// parseRemoteResponse is a helper function to parse API responses
func parseRemoteResponse(resp *http.Response, result any) error {
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
if result != nil {
if err := json.Unmarshal(body, result); err != nil {
return fmt.Errorf("failed to unmarshal response: %w", err)
}
}
return nil
}
// ListRemoteInstances lists all instances on the remote node
func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
if err != nil {
return nil, err
}
var instances []*instance.Process
if err := parseRemoteResponse(resp, &instances); err != nil {
return nil, err
}
return instances, nil
}
// CreateRemoteInstance creates a new instance on the remote node
func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// GetRemoteInstance retrieves an instance by name from the remote node
func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// UpdateRemoteInstance updates an existing instance on the remote node
func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// DeleteRemoteInstance deletes an instance from the remote node
func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
if err != nil {
return err
}
return parseRemoteResponse(resp, nil)
}
// StartRemoteInstance starts an instance on the remote node
func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/start", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// StopRemoteInstance stops an instance on the remote node
func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// RestartRemoteInstance restarts an instance on the remote node
func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Logs endpoint might return plain text or JSON
// Try to parse as JSON first (in case it's wrapped in a response object)
var logResponse struct {
Logs string `json:"logs"`
}
if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
return logResponse.Logs, nil
}
// Otherwise, return as plain text
return string(body), nil
}

View File

@@ -0,0 +1,39 @@
package manager
import (
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"testing"
)
func TestStripNodesFromOptions(t *testing.T) {
im := &instanceManager{}
// Test nil case
if result := im.stripNodesFromOptions(nil); result != nil {
t.Errorf("Expected nil, got %+v", result)
}
// Test main case: nodes should be stripped, other fields preserved
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
Nodes: []string{"node1", "node2"},
Environment: map[string]string{"TEST": "value"},
}
result := im.stripNodesFromOptions(options)
if result.Nodes != nil {
t.Errorf("Expected Nodes to be nil, got %+v", result.Nodes)
}
if result.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected BackendType preserved")
}
if result.Environment["TEST"] != "value" {
t.Errorf("Expected Environment preserved")
}
// Original should not be modified
if len(options.Nodes) != 2 {
t.Errorf("Original options should not be modified")
}
}

View File

@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {
// Identify instances that should timeout
for _, inst := range im.instances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
@@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error {
continue
}
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}

View File

@@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) {
MaxInstances: 5,
}
manager := manager.NewInstanceManager(backendConfig, cfg)
manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}

View File

@@ -1,795 +1,29 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"os/exec"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
"net/http/httputil"
"sync"
"time"
)
type Handler struct {
InstanceManager manager.InstanceManager
cfg config.AppConfig
httpClient *http.Client
remoteProxies map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
remoteProxiesMu sync.RWMutex
}
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
InstanceManager: im,
cfg: cfg,
}
}
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
if lines == "" {
lines = "-1"
}
num_lines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
remoteProxies: make(map[string]*httputil.ReverseProxy),
}
}

View File

@@ -0,0 +1,320 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/instance"
"net/http"
"os/exec"
"strings"
"github.com/go-chi/chi/v5"
)
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Get the instance name from the URL parameter
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
options := inst.GetOptions()
if options == nil {
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
return
}
if options.BackendType != backends.BackendTypeLlamaCpp {
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
return
}
if !inst.IsRunning() {
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(name); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
proxy.ServeHTTP(w, r)
}
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}

View File

@@ -0,0 +1,445 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"net/http/httputil"
"net/url"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
)
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
numLines := -1 // Default to all lines
if lines != "" {
parsedLines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
numLines = parsedLines
}
// Use the instance manager which handles both local and remote instances
logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Check if this is a remote instance
if inst.IsRemote() {
h.RemoteInstanceProxy(w, r, name, inst)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// RemoteInstanceProxy proxies requests to a remote instance
func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
// Get the node name from instance options
options := inst.GetOptions()
if options == nil || len(options.Nodes) == 0 {
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
return
}
nodeName := options.Nodes[0]
// Check if we have a cached proxy for this node
h.remoteProxiesMu.RLock()
proxy, exists := h.remoteProxies[nodeName]
h.remoteProxiesMu.RUnlock()
if !exists {
// Find node configuration
nodeConfig, exists := h.cfg.Nodes[nodeName]
if !exists {
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
return
}
// Create reverse proxy to remote node
targetURL, err := url.Parse(nodeConfig.Address)
if err != nil {
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
return
}
proxy = httputil.NewSingleHostReverseProxy(targetURL)
// Modify request before forwarding
originalDirector := proxy.Director
apiKey := nodeConfig.APIKey // Capture for closure
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key if configured
if apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
}
}
// Cache the proxy by node name
h.remoteProxiesMu.Lock()
h.remoteProxies[nodeName] = proxy
h.remoteProxiesMu.Unlock()
}
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}

View File

@@ -0,0 +1,79 @@
package server
import (
"encoding/json"
"net/http"
"github.com/go-chi/chi/v5"
)
// NodeResponse represents a sanitized node configuration for API responses
type NodeResponse struct {
Address string `json:"address"`
}
// ListNodes godoc
// @Summary List all configured nodes
// @Description Returns a map of all nodes configured in the server (node name -> node config)
// @Tags nodes
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
// @Failure 500 {string} string "Internal Server Error"
// @Router /nodes [get]
func (h *Handler) ListNodes() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Convert to sanitized response format (map of name -> NodeResponse)
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
for name, node := range h.cfg.Nodes {
nodeResponses[name] = NodeResponse{
Address: node.Address,
}
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetNode godoc
// @Summary Get details of a specific node
// @Description Returns the details of a specific node by name
// @Tags nodes
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Node Name"
// @Success 200 {object} NodeResponse "Node details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 404 {string} string "Node not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /nodes/{name} [get]
func (h *Handler) GetNode() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
return
}
nodeConfig, exists := h.cfg.Nodes[name]
if !exists {
http.Error(w, "Node not found", http.StatusNotFound)
return
}
// Convert to sanitized response format
nodeResponse := NodeResponse{
Address: nodeConfig.Address,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
return
}
}
}

View File

@@ -0,0 +1,206 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/instance"
"net/http"
"net/http/httputil"
"net/url"
)
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
// Check if this is a remote instance
if inst.IsRemote() {
// Restore the body for the remote proxy
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
h.RemoteOpenAIProxy(w, r, modelName, inst)
return
}
if !inst.IsRunning() {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
// Get the node name from instance options
options := inst.GetOptions()
if options == nil || len(options.Nodes) == 0 {
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
return
}
nodeName := options.Nodes[0]
// Check if we have a cached proxy for this node
h.remoteProxiesMu.RLock()
proxy, exists := h.remoteProxies[nodeName]
h.remoteProxiesMu.RUnlock()
if !exists {
// Find node configuration
nodeConfig, exists := h.cfg.Nodes[nodeName]
if !exists {
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
return
}
// Create reverse proxy to remote node
targetURL, err := url.Parse(nodeConfig.Address)
if err != nil {
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
return
}
proxy = httputil.NewSingleHostReverseProxy(targetURL)
// Modify request before forwarding
originalDirector := proxy.Director
apiKey := nodeConfig.APIKey // Capture for closure
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key if configured
if apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
}
}
// Cache the proxy
h.remoteProxiesMu.Lock()
h.remoteProxies[nodeName] = proxy
h.remoteProxiesMu.Unlock()
}
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}

View File

@@ -0,0 +1,22 @@
package server
import (
"fmt"
"net/http"
)
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}

View File

@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
AllowedHeaders: handler.cfg.Server.AllowedHeaders,
ExposedHeaders: []string{"Link"},
AllowCredentials: false,
MaxAge: 300,
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
})
})
// Node management endpoints
r.Route("/nodes", func(r chi.Router) {
r.Get("/", handler.ListNodes()) // List all nodes
r.Route("/{name}", func(r chi.Router) {
r.Get("/", handler.GetNode())
})
})
// Instance management endpoints
r.Route("/instances", func(r chi.Router) {
r.Get("/", handler.ListInstances()) // List all instances
@@ -103,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
})
r.Route("/llama-cpp/{name}", func(r chi.Router) {
// Public Routes
// Allow llama-cpp server to serve its own WebUI if it is running.
// Don't auto start the server since it can be accessed without an API key
r.Get("/", handler.LlamaCppProxy(false))
// Private Routes
r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}
// This handler auto start the server if it's not running
llamaCppHandler := handler.LlamaCppProxy(true)
// llama.cpp server specific proxy endpoints
r.Get("/props", llamaCppHandler)
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
r.Get("/slots", llamaCppHandler)
r.Post("/apply-template", llamaCppHandler)
r.Post("/completion", llamaCppHandler)
r.Post("/detokenize", llamaCppHandler)
r.Post("/embeddings", llamaCppHandler)
r.Post("/infill", llamaCppHandler)
r.Post("/metrics", llamaCppHandler)
r.Post("/props", llamaCppHandler)
r.Post("/reranking", llamaCppHandler)
r.Post("/tokenize", llamaCppHandler)
// OpenAI-compatible proxy endpoint
// Handles all POST requests to /v1/*, including:
// - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
// on "model" field being optional, and handler.OpenAIProxy requires it.
r.Post("/v1/*", llamaCppHandler)
})
})
// Serve WebUI files
if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err)

View File

@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => {
if (key === 'backend_options' && value && typeof value === 'object') {
if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
@@ -123,8 +123,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays
} else if (value !== undefined && value !== null) {
// Skip empty strings
if (typeof value === 'string' && value.trim() === "") {
return;
}
// Skip empty arrays
if (Array.isArray(value) && value.length === 0) {
return;
}

View File

@@ -1,4 +1,4 @@
import React from 'react'
import React, { useState, useEffect } from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
@@ -7,6 +7,8 @@ import AutoRestartConfiguration from '@/components/instance/AutoRestartConfigura
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
import SelectInput from '@/components/form/SelectInput'
import { nodesApi, type NodesMap } from '@/lib/api'
interface InstanceSettingsCardProps {
instanceName: string
@@ -25,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onNameChange,
onChange
}) => {
const [nodes, setNodes] = useState<NodesMap>({})
const [loadingNodes, setLoadingNodes] = useState(true)
useEffect(() => {
const fetchNodes = async () => {
try {
const fetchedNodes = await nodesApi.list()
setNodes(fetchedNodes)
// Auto-select first node if none selected
const nodeNames = Object.keys(fetchedNodes)
if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
onChange('nodes', [nodeNames[0]])
}
} catch (error) {
console.error('Failed to fetch nodes:', error)
} finally {
setLoadingNodes(false)
}
}
void fetchNodes()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const nodeOptions = Object.keys(nodes).map(nodeName => ({
value: nodeName,
label: nodeName
}))
const handleNodeChange = (value: string | undefined) => {
if (value) {
onChange('nodes', [value])
} else {
onChange('nodes', undefined)
}
}
const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
return (
<Card>
<CardHeader>
@@ -50,6 +92,18 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
</p>
</div>
{/* Node Selection */}
{!loadingNodes && Object.keys(nodes).length > 0 && (
<SelectInput
id="node"
label="Node"
value={selectedNode}
onChange={handleNodeChange}
options={nodeOptions}
description="Select the node where the instance will run (default: main node)"
/>
)}
{/* Auto Restart Configuration */}
<AutoRestartConfiguration
formData={formData}

View File

@@ -1,4 +1,4 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState {
isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => {
try {
const response = await fetch('/api/v1/instances', {
const response = await fetch(document.baseURI + 'api/v1/instances', {
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json'

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { instancesApi } from '@/lib/api'
import { beforeEach, describe, expect, it, vi } from 'vitest'
// Mock fetch globally
const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
})
it('converts HTTP errors to meaningful messages', async () => {
mockFetch.mockResolvedValue({
const mockResponse = {
ok: false,
status: 409,
text: () => Promise.resolve('Instance already exists')
})
text: () => Promise.resolve('Instance already exists'),
clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.create('existing', {}))
.rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
})
it('handles empty error responses gracefully', async () => {
mockFetch.mockResolvedValue({
const mockResponse = {
ok: false,
status: 500,
text: () => Promise.resolve('')
})
text: () => Promise.resolve(''),
clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.list())
.rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
await instancesApi.getLogs('test-instance', 100)
expect(mockFetch).toHaveBeenCalledWith(
'/api/v1/instances/test-instance/logs?lines=100',
expect.stringMatching(
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
),
expect.any(Object)
)
})

View File

@@ -1,7 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1";
// Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling
async function apiCall<T>(
@@ -46,12 +49,9 @@ async function apiCall<T>(
} else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
if (contentLength === '0') {
return {} as T; // Return empty object for empty JSON responses
}
}
const data = await response.json() as T;
return data;
}
@@ -103,6 +103,22 @@ export const backendsApi = {
},
};
// Node API types
export interface NodeResponse {
address: string;
}
export type NodesMap = Record<string, NodeResponse>;
// Node API functions
export const nodesApi = {
// GET /nodes - returns map of node name to NodeResponse
list: () => apiCall<NodesMap>("/nodes"),
// GET /nodes/{name}
get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
};
// Instance API functions
export const instancesApi = {
// GET /instances

View File

@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
// Clone the response before reading to avoid consuming the body stream
const errorMessage = await parseErrorResponse(response.clone())
throw new Error(errorMessage)
}
}

View File

@@ -39,6 +39,9 @@ export const CreateInstanceOptionsSchema = z.object({
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(),
// Node configuration
nodes: z.array(z.string()).optional(),
})
// Re-export types and schemas from backend files

View File

@@ -21,4 +21,6 @@ export default defineConfig({
setupFiles: ['./src/test/setup.ts'],
css: true,
},
// ensures relative asset paths to support being served behind a subpath
base: "./"
})