90 Commits

Author SHA1 Message Date
eb5abae173 Merge pull request #66 from lordmathis/fix/disable-node-edit
fix: Prevent node change on update
2025-10-16 22:37:59 +02:00
696a2cb18b Prevent node change on update 2025-10-16 22:35:29 +02:00
e7402f0029 Merge pull request #65 from lordmathis/fix/local-node
fix: Detect local instances based on local node in nodes array
2025-10-16 22:28:01 +02:00
5c9a397746 Fix get local proxy 2025-10-16 22:11:29 +02:00
e97ca727d1 Clarify node configuration in docs 2025-10-16 21:50:06 +02:00
9f3c01384b Remove stripNodesFromOptions function 2025-10-16 21:29:27 +02:00
c5097e59be Fix local instance detection 2025-10-16 21:26:04 +02:00
cf20f304b3 Merge pull request #61 from lordmathis/fix/docs-formatting
fix: Add MkDocs hook to fix line endings in markdown files
2025-10-09 23:28:09 +02:00
72eba48b80 Add MkDocs hook to fix line endings in markdown files 2025-10-09 23:23:17 +02:00
c3037f914d Merge pull request #60 from lordmathis/lordmathis-patch-1
Update docs.yaml
2025-10-09 22:31:38 +02:00
81266b4bc4 Update docs.yaml 2025-10-09 22:29:23 +02:00
a31af94e7b Merge pull request #59 from lordmathis/feat/multi-host
feat: Implement multi node support
2025-10-09 22:23:27 +02:00
9ee0a184b3 Re-validate instance name in DeleteInstance for improved security 2025-10-09 22:18:53 +02:00
5436c28a1f Add instance name validation before deletion for security 2025-10-09 22:10:40 +02:00
73b9dd5bc7 Rename workflows for consistency 2025-10-09 21:53:14 +02:00
f61e8dad5c Add User Docs badge to README 2025-10-09 21:51:38 +02:00
ab2770bdd9 Add documentation for remote node deployment and configuration 2025-10-09 21:50:39 +02:00
e7a6a7003e Skip remote instances in checkAllTimeouts and EvictLRUInstance methods 2025-10-09 21:13:38 +02:00
2b950ee649 Implement updateLocalInstanceFromRemote to preserve Nodes field when syncing remote instance data 2025-10-09 20:39:21 +02:00
b965b77c18 Prevent remote instances from using local proxy in GetProxy method 2025-10-09 20:24:54 +02:00
8a16a195de Fix getting remote instance logs 2025-10-09 20:22:32 +02:00
9684a8a09b Enhance instance management to preserve local state for remote instances 2025-10-09 19:34:52 +02:00
9d5f01d4ae Auto-select first node in InstanceSettingsCard if none is selected 2025-10-09 19:13:58 +02:00
e281708b20 Enhance auto-start logic to differentiate between remote and local instances 2025-10-09 18:56:23 +02:00
8d9b0c0621 Initialize timeProvider and logger in UnmarshalJSON for Process 2025-10-09 18:56:12 +02:00
6c1a76691d Improve cleanup of options in InstanceDialog to skip empty strings and arrays 2025-10-09 18:49:36 +02:00
5d958ed283 Fix backend_options cleanup to exclude empty arrays in InstanceDialog 2025-10-09 18:38:33 +02:00
56b95d1243 Refactor InstanceSettingsCard and API types to use NodesMap 2025-10-08 19:52:39 +02:00
688b815ca7 Add LocalNode configuration 2025-10-08 19:43:53 +02:00
7f6725da96 Refactor NodeConfig handling to use a map 2025-10-08 19:24:24 +02:00
3418735204 Add stripNodesFromOptions function to prevent routing loops in remote requests 2025-10-07 20:27:31 +02:00
2f1cf5acdc Refactor CreateRemoteInstance and UpdateRemoteInstance to directly use options parameter in API requests 2025-10-07 19:57:21 +02:00
01380e6641 Update instance manager tests to use empty NodeConfig slice 2025-10-07 19:18:13 +02:00
6298b03636 Refactor RemoteOpenAIProxy to use cached proxies and restore request body handling 2025-10-07 18:57:08 +02:00
aae3f84d49 Implement caching for remote instance proxies and enhance proxy request handling 2025-10-07 18:44:23 +02:00
554796391b Remove test config file 2025-10-07 18:05:30 +02:00
16b28bac05 Merge branch 'main' into feat/multi-host 2025-10-07 18:04:24 +02:00
1892dc8315 Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy
feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`
2025-10-06 20:23:44 +02:00
Anuruth Lertpiya
997bd1b063 Changed status code to StatusBadRequest (400) if requested invalid model name. 2025-10-05 14:53:20 +00:00
Anuruth Lertpiya
fa43f9e967 Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/ 2025-10-05 14:28:33 +00:00
db9eebeb8b Merge pull request #56 from lordmathis/fix/body-already-read
Fix double read of json response when content-length header is missing
2025-10-04 22:28:22 +02:00
bd062f8ca0 Mock Response.clone for tests 2025-10-04 22:22:25 +02:00
8ebdb1a183 Fix double read of json response when content-length header is missing 2025-10-04 22:16:28 +02:00
7272212081 Merge pull request #55 from lordmathis/fix/auto-restart
fix: Set status to Stopped for instances with auto-restart disabled
2025-10-04 21:45:12 +02:00
035e184789 Merge branch 'main' into fix/auto-restart 2025-10-04 21:22:50 +02:00
d15976e7aa Implement auto-stop for instances with auto-restart disabled and add corresponding tests 2025-10-04 21:17:55 +02:00
4fa75d9801 Merge pull request #52 from BobbyL2k/feat/config-cors-headers
feat: Added support for configuring access-control-request-headers for CORS
2025-10-04 20:45:27 +02:00
Anuruth Lertpiya
0e1bc8a352 Added support for configuring CORS headers 2025-10-04 09:13:40 +00:00
b728a7c6b2 Fix fetchNodes call to ensure proper handling of promise 2025-10-03 10:53:29 +02:00
a491f29483 Add node selection functionality to InstanceSettingsCard and define Node API 2025-10-02 23:18:33 +02:00
670f8ff81b Split up handlers 2025-10-02 23:11:20 +02:00
da56456504 Add node management endpoints to handle listing and retrieving node details 2025-10-02 22:51:41 +02:00
c30053e51c Enhance instance loading to support remote instances and handle node configuration 2025-10-01 22:59:45 +02:00
347c58e15f Enhance instance manager to persist remote instances and update tracking on modifications 2025-10-01 22:58:57 +02:00
2ed67eb672 Add remote instance proxying functionality to handler 2025-10-01 22:17:19 +02:00
0188f82306 Implement remote instance creation and deletion in instance manager 2025-10-01 22:05:18 +02:00
e0f176de10 Enhance instance manager to support remote instance management and update related tests 2025-10-01 20:25:06 +02:00
2759be65a5 Add remote instance management functionality and configuration support 2025-09-30 21:09:05 +02:00
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
50 changed files with 3092 additions and 1265 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

View File

@@ -1,4 +1,4 @@
name: Build and Deploy Documentation name: User Docs
on: on:
push: push:

2
.gitignore vendored
View File

@@ -35,3 +35,5 @@ node_modules/
dist/ dist/
__pycache__/ __pycache__/
site/

View File

@@ -1,6 +1,6 @@
# llamactl # llamactl
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.** **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
@@ -23,6 +23,12 @@
### ⚡ Smart Operations ### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management - **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
### 🔗 Remote Instance Deployment
- **Remote Node Support**: Deploy instances on remote hosts
- **Central Management**: Manage remote instances from a single dashboard
- **Seamless Routing**: Automatic request routing to remote instances
![Dashboard Screenshot](docs/images/dashboard.png) ![Dashboard Screenshot](docs/images/dashboard.png)
@@ -52,7 +58,8 @@ llamactl
2. Click "Create Instance" 2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM) 3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options 4. Set model path and backend-specific options
5. Start or stop the instance 5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API: ### Or use the REST API:
```bash ```bash
@@ -66,10 +73,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance # Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \ curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}' -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK # Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \ curl -X POST localhost:8080/v1/chat/completions \
@@ -93,7 +100,30 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page # Windows - Download from releases page
``` ```
### Option 2: Build from Source ### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+ Requires Go 1.24+ and Node.js 22+
```bash ```bash
git clone https://github.com/lordmathis/llamactl.git git clone https://github.com/lordmathis/llamactl.git
@@ -145,47 +175,23 @@ pip install vllm
# Or use Docker - no local installation required # Or use Docker - no local installation required
``` ```
## Docker Support ## Backend Docker Support
llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for: llamactl can run backends in Docker containers:
- Production deployments without local backend installation
- Isolating backend dependencies
- GPU-accelerated inference using official Docker images
### Docker Configuration
Enable Docker support using the new structured backend configuration:
```yaml ```yaml
backends: backends:
llama-cpp: llama-cpp:
command: "llama-server"
docker: docker:
enabled: true enabled: true
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
vllm: vllm:
command: "vllm"
args: ["serve"]
docker: docker:
enabled: true enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
``` ```
### Key Features **Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
- **Host Networking**: Uses `--network host` for seamless port management For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
- **GPU Support**: Includes `--gpus all` for GPU acceleration
- **Environment Variables**: Configure container environment as needed
- **Flexible Configuration**: Per-backend Docker settings with sensible defaults
### Requirements
- Docker installed and running
- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support
- No local backend installation required when using Docker
## Configuration ## Configuration
@@ -196,30 +202,34 @@ server:
host: "0.0.0.0" # Server host to bind to host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all) allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends: backends:
llama-cpp: llama-cpp:
command: "llama-server" command: "llama-server"
args: [] args: []
environment: {} # Environment variables for the backend process
docker: docker:
enabled: false enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server" image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"] args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} environment: {} # Environment variables for the container
vllm: vllm:
command: "vllm" command: "vllm"
args: ["serve"] args: ["serve"]
environment: {} # Environment variables for the backend process
docker: docker:
enabled: false enabled: false
image: "vllm/vllm-openai:latest" image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} environment: {} # Environment variables for the container
mlx: mlx:
command: "mlx_lm.server" command: "mlx_lm.server"
args: [] args: []
environment: {} # Environment variables for the backend process
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances

View File

@@ -58,7 +58,7 @@ func main() {
} }
// Initialize the instance manager // Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances) instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes, cfg.LocalNode)
// Create a new handler with the instance manager // Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg) handler := server.NewHandler(instanceManager, cfg)

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY apidocs/ ./apidocs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

60
docs/fix_line_endings.py Normal file
View File

@@ -0,0 +1,60 @@
"""
MkDocs hook to fix line endings for proper rendering.
Automatically adds two spaces at the end of lines that need line breaks.
"""
import re
def on_page_markdown(markdown, page, config, **kwargs):
"""
Fix line endings in markdown content for proper MkDocs rendering.
Adds two spaces at the end of lines that need line breaks.
"""
lines = markdown.split('\n')
processed_lines = []
in_code_block = False
for i, line in enumerate(lines):
stripped = line.strip()
# Track code blocks
if stripped.startswith('```'):
in_code_block = not in_code_block
processed_lines.append(line)
continue
# Skip processing inside code blocks
if in_code_block:
processed_lines.append(line)
continue
# Skip empty lines
if not stripped:
processed_lines.append(line)
continue
# Skip lines that shouldn't have line breaks:
# - Headers (# ## ###)
# - Blockquotes (>)
# - Table rows (|)
# - Lines already ending with two spaces
# - YAML front matter and HTML tags
# - Standalone punctuation lines
if (stripped.startswith('#') or
stripped.startswith('>') or
'|' in stripped or
line.endswith(' ') or
stripped.startswith('---') or
stripped.startswith('<') or
stripped.endswith('>') or
stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
processed_lines.append(line)
continue
# Add two spaces to lines that end with regular text or most punctuation
if stripped and not in_code_block:
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -17,30 +17,37 @@ server:
host: "0.0.0.0" # Server host to bind to host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all) allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends: backends:
llama-cpp: llama-cpp:
command: "llama-server" command: "llama-server"
args: [] args: []
environment: {} # Environment variables for the backend process
docker: docker:
enabled: false enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server" image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"] args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} environment: {}
response_headers: {} # Additional response headers to send with responses
vllm: vllm:
command: "vllm" command: "vllm"
args: ["serve"] args: ["serve"]
environment: {} # Environment variables for the backend process
docker: docker:
enabled: false enabled: false
image: "vllm/vllm-openai:latest" image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} environment: {}
response_headers: {} # Additional response headers to send with responses
mlx: mlx:
command: "mlx_lm.server" command: "mlx_lm.server"
args: [] args: []
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
@@ -63,6 +70,10 @@ auth:
inference_keys: [] # Keys for inference endpoints inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints management_keys: [] # Keys for management endpoints
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration for multi-node deployment
main: # Default local node (empty config)
``` ```
## Configuration Files ## Configuration Files
@@ -98,6 +109,7 @@ server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0") host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080) port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"]) allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false) enable_swagger: false # Enable Swagger UI (default: false)
``` ```
@@ -113,36 +125,74 @@ backends:
llama-cpp: llama-cpp:
command: "llama-server" command: "llama-server"
args: [] args: []
environment: {} # Environment variables for the backend process
docker: docker:
enabled: false # Enable Docker runtime (default: false) enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server" image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"] args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} environment: {}
response_headers: {} # Additional response headers to send with responses
vllm: vllm:
command: "vllm" command: "vllm"
args: ["serve"] args: ["serve"]
environment: {} # Environment variables for the backend process
docker: docker:
enabled: false enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest" image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} environment: {}
response_headers: {} # Additional response headers to send with responses
mlx: mlx:
command: "mlx_lm.server" command: "mlx_lm.server"
args: [] args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker # MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
``` ```
**Backend Configuration Fields:** **Backend Configuration Fields:**
- `command`: Executable name/path for the backend - `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances - `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional) - `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime - `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use - `image`: Docker image to use
- `args`: Additional arguments passed to `docker run` - `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional) - `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:**
**LlamaCpp Backend:**
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Instance Configuration ### Instance Configuration
```yaml ```yaml
@@ -195,12 +245,26 @@ auth:
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false) - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options ### Remote Node Configuration
View all available command line options: llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
```bash ```yaml
llamactl --help local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration map
main: # Local node (empty address means local)
address: "" # Not used for local node
api_key: "" # Not used for local node
worker1: # Remote worker node
address: "http://192.168.1.10:8080"
api_key: "worker1-api-key" # Management API key for authentication
``` ```
You can also override configuration using command line flags when starting llamactl. **Node Configuration Fields:**
- `local_node`: Specifies which node in the `nodes` map represents the local node. Must match exactly what other nodes call this node.
- `nodes`: Map of node configurations
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
- `api_key`: Management API key for authenticating with the remote node
**Environment Variables:**
- `LLAMACTL_LOCAL_NODE` - Name of the local node

View File

@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page # Windows - Download from releases page
``` ```
### Option 2: Build from Source ### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
#### Using Docker Compose
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
#### Using Docker Build and Run
**llamactl with llama.cpp CUDA:**
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
**llamactl with vLLM CUDA:**
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
**llamactl built from source:**
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements: Requirements:
- Go 1.24 or later - Go 1.24 or later
@@ -92,6 +157,13 @@ cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server go build -o llamactl ./cmd/server
``` ```
## Remote Node Installation
For deployments with remote nodes:
- Install llamactl on each node using any of the methods above
- Configure API keys for authentication between nodes
- Ensure node names are consistent across all configurations
## Verification ## Verification
Verify your installation by checking the version: Verify your installation by checking the version:
@@ -103,3 +175,5 @@ llamactl --version
## Next Steps ## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running! Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.

View File

@@ -116,7 +116,19 @@ Create and start a new instance.
POST /api/v1/instances/{name} POST /api/v1/instances/{name}
``` ```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options. **Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:** **Response:**
```json ```json
@@ -354,7 +366,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \ -H "Authorization: Bearer your-api-key" \
-d '{ -d '{
"model": "/models/llama-2-7b.gguf" "backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}' }'
# Check instance status # Check instance status
@@ -386,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model http://localhost:8080/api/v1/instances/my-model
``` ```
### Remote Node Instance Example
```bash
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/v1/instances/remote-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
# Check status of remote instance
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/remote-model
# Use remote instance with OpenAI-compatible API
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "remote-model",
"messages": [
{"role": "user", "content": "Hello from remote node!"}
]
}'
```
### Using the Proxy Endpoint ### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance: You can also directly proxy requests to the llama-server instance:

View File

@@ -39,25 +39,27 @@ Each instance is displayed as a card showing:
1. Click the **"Create Instance"** button on the dashboard 1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field) 2. Enter a unique **Name** for your instance (only required field)
3. **Choose Backend Type**: 3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
4. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server - **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only) - **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference - **vLLM**: For distributed serving and high-throughput inference
4. Configure model source: 5. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo - **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`) - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`) - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings: 6. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure - **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts - **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts - **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
6. Configure backend-specific options: - **Environment Variables**: Set custom environment variables for the instance process
7. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc. - **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc. - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance 8. Click **"Create"** to save the instance
### Via API ### Via API
@@ -101,7 +103,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
"gpu_memory_utilization": 0.9 "gpu_memory_utilization": 0.9
}, },
"auto_restart": true, "auto_restart": true,
"on_demand_start": true "on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
}' }'
# Create llama.cpp instance with HuggingFace model # Create llama.cpp instance with HuggingFace model
@@ -115,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
"gpu_layers": 32 "gpu_layers": 32
} }
}' }'
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/instances/remote-llama \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
``` ```
## Start Instance ## Start Instance
@@ -221,3 +240,4 @@ Check the health status of your instances:
```bash ```bash
curl http://localhost:8080/api/instances/{name}/proxy/health curl http://localhost:8080/api/instances/{name}/proxy/health
``` ```

View File

@@ -125,6 +125,34 @@ This helps determine if the issue is with llamactl or with the underlying llama.
http://localhost:8080/api/v1/instances http://localhost:8080/api/v1/instances
``` ```
## Remote Node Issues
### Node Configuration
**Problem:** Remote instances not appearing or cannot be managed
**Solutions:**
1. **Verify node configuration:**
```yaml
local_node: "main" # Must match a key in nodes map
nodes:
main:
address: "" # Empty for local node
worker1:
address: "http://worker1.internal:8080"
api_key: "secure-key" # Must match worker1's management key
```
2. **Check node name consistency:**
- `local_node` on each node must match what other nodes call it
- Node names are case-sensitive
3. **Test remote node connectivity:**
```bash
curl -H "Authorization: Bearer remote-node-key" \
http://remote-node:8080/api/v1/instances
```
## Debugging and Logs ## Debugging and Logs
### Viewing Instance Logs ### Viewing Instance Logs

View File

@@ -69,6 +69,7 @@ plugins:
hooks: hooks:
- docs/readme_sync.py - docs/readme_sync.py
- docs/fix_line_endings.py
extra: extra:
version: version:

View File

@@ -1,6 +1,7 @@
package config package config
import ( import (
"log"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
@@ -12,9 +13,11 @@ import (
// BackendSettings contains structured backend configuration // BackendSettings contains structured backend configuration
type BackendSettings struct { type BackendSettings struct {
Command string `yaml:"command"` Command string `yaml:"command"`
Args []string `yaml:"args"` Args []string `yaml:"args"`
Docker *DockerSettings `yaml:"docker,omitempty"` Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
} }
// DockerSettings contains Docker-specific configuration // DockerSettings contains Docker-specific configuration
@@ -34,13 +37,15 @@ type BackendConfig struct {
// AppConfig represents the configuration for llamactl // AppConfig represents the configuration for llamactl
type AppConfig struct { type AppConfig struct {
Server ServerConfig `yaml:"server"` Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"` Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"` Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"` Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"` LocalNode string `yaml:"local_node,omitempty"`
CommitHash string `yaml:"-"` Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
BuildTime string `yaml:"-"` Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
} }
// ServerConfig contains HTTP server configuration // ServerConfig contains HTTP server configuration
@@ -54,8 +59,14 @@ type ServerConfig struct {
// Allowed origins for CORS (e.g., "http://localhost:3000") // Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"` AllowedOrigins []string `yaml:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers"`
// Enable Swagger UI for API documentation // Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"` EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
} }
// InstancesConfig contains instance management configuration // InstancesConfig contains instance management configuration
@@ -119,6 +130,11 @@ type AuthConfig struct {
ManagementKeys []string `yaml:"management_keys"` ManagementKeys []string `yaml:"management_keys"`
} }
type NodeConfig struct {
Address string `yaml:"address"`
APIKey string `yaml:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence: // LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults // 1. Hardcoded defaults
// 2. Config file // 2. Config file
@@ -130,12 +146,18 @@ func LoadConfig(configPath string) (AppConfig, error) {
Host: "0.0.0.0", Host: "0.0.0.0",
Port: 8080, Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false, EnableSwagger: false,
}, },
LocalNode: "main",
Nodes: map[string]NodeConfig{
"main": {}, // Local node with empty config
},
Backends: BackendConfig{ Backends: BackendConfig{
LlamaCpp: BackendSettings{ LlamaCpp: BackendSettings{
Command: "llama-server", Command: "llama-server",
Args: []string{}, Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{ Docker: &DockerSettings{
Enabled: false, Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server", Image: "ghcr.io/ggml-org/llama.cpp:server",
@@ -165,10 +187,12 @@ func LoadConfig(configPath string) (AppConfig, error) {
}, },
}, },
Instances: InstancesConfig{ Instances: InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(), DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"), // NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), // should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true, AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited MaxRunningInstances: -1, // -1 means unlimited
@@ -196,6 +220,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables // 3. Override with environment variables
loadEnvVars(&cfg) loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
return cfg, nil return cfg, nil
} }
@@ -216,6 +248,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
if err := yaml.Unmarshal(data, cfg); err != nil { if err := yaml.Unmarshal(data, cfg); err != nil {
return err return err
} }
log.Printf("Read config at %s", path)
return nil return nil
} }
} }
@@ -288,6 +321,12 @@ func loadEnvVars(cfg *AppConfig) {
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" { if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ") cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
} }
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" { if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil { if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil { if cfg.Backends.LlamaCpp.Docker == nil {
@@ -315,18 +354,28 @@ func loadEnvVars(cfg *AppConfig) {
if cfg.Backends.LlamaCpp.Docker.Environment == nil { if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string) cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
} }
// Parse env vars in format "KEY1=value1,KEY2=value2" parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
for _, envPair := range strings.Split(llamaDockerEnv, ",") { }
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1] if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
} cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
} }
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
} }
// vLLM backend // vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" { if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd cfg.Backends.VLLM.Command = vllmCmd
} }
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" { if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil { if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil { if cfg.Backends.VLLM.Docker == nil {
@@ -354,12 +403,13 @@ func loadEnvVars(cfg *AppConfig) {
if cfg.Backends.VLLM.Docker.Environment == nil { if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string) cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
} }
// Parse env vars in format "KEY1=value1,KEY2=value2" parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
for _, envPair := range strings.Split(vllmDockerEnv, ",") { }
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1] if cfg.Backends.VLLM.ResponseHeaders == nil {
} cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
} }
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
} }
// MLX backend // MLX backend
@@ -369,6 +419,18 @@ func loadEnvVars(cfg *AppConfig) {
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" { if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ") cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
} }
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults // Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -418,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" { if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",") cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
} }
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
} }
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000" // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -443,6 +510,32 @@ func ParsePortRange(s string) [2]int {
return [2]int{0, 0} // Invalid format return [2]int{0, 0} // Invalid format
} }
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory // getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string { func getDefaultDataDirectory() string {
switch runtime.GOOS { switch runtime.GOOS {
@@ -475,6 +568,10 @@ func getDefaultDataDirectory() string {
// getDefaultConfigLocations returns platform-specific config file locations // getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string { func getDefaultConfigLocations() []string {
var locations []string var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir() homeDir, _ := os.UserHomeDir()
switch runtime.GOOS { switch runtime.GOOS {

View File

@@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command) t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
} }
} }
func TestLoadConfig_LocalNode(t *testing.T) {
t.Run("default local node", func(t *testing.T) {
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "main" {
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
}
})
t.Run("local node from file", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "worker1"
nodes:
worker1:
address: ""
worker2:
address: "http://192.168.1.10:8080"
api_key: "test-key"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "worker1" {
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
}
// Verify nodes map (includes default "main" + worker1 + worker2)
if len(cfg.Nodes) != 3 {
t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
}
// Verify local node exists and is empty
localNode, exists := cfg.Nodes["worker1"]
if !exists {
t.Error("Expected local node 'worker1' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
if localNode.APIKey != "" {
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
}
// Verify remote node
remoteNode, exists := cfg.Nodes["worker2"]
if !exists {
t.Error("Expected remote node 'worker2' to exist in nodes map")
}
if remoteNode.Address != "http://192.168.1.10:8080" {
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
}
// Verify default main node still exists
_, exists = cfg.Nodes["main"]
if !exists {
t.Error("Expected default 'main' node to still exist in nodes map")
}
})
t.Run("custom local node name in config", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "primary"
nodes:
primary:
address: ""
worker1:
address: "http://192.168.1.10:8080"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "primary" {
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
}
// Verify nodes map includes default "main" + primary + worker1
if len(cfg.Nodes) != 3 {
t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
}
localNode, exists := cfg.Nodes["primary"]
if !exists {
t.Error("Expected local node 'primary' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
})
t.Run("local node from environment variable", func(t *testing.T) {
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "custom-node" {
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
}
})
}

View File

@@ -35,6 +35,7 @@ type Process struct {
options *CreateInstanceOptions `json:"-"` options *CreateInstanceOptions `json:"-"`
globalInstanceSettings *config.InstancesConfig globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig globalBackendSettings *config.BackendConfig
localNodeName string `json:"-"` // Name of the local node for remote detection
// Status // Status
Status InstanceStatus `json:"status"` Status InstanceStatus `json:"status"`
@@ -66,7 +67,7 @@ type Process struct {
} }
// NewInstance creates a new instance with the given name, log path, and options // NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process { func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, localNodeName string, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options // Validate and copy options
options.ValidateAndApplyDefaults(name, globalInstanceSettings) options.ValidateAndApplyDefaults(name, globalInstanceSettings)
@@ -78,6 +79,7 @@ func NewInstance(name string, globalBackendSettings *config.BackendConfig, globa
options: options, options: options,
globalInstanceSettings: globalInstanceSettings, globalInstanceSettings: globalInstanceSettings,
globalBackendSettings: globalBackendSettings, globalBackendSettings: globalBackendSettings,
localNodeName: localNodeName,
logger: logger, logger: logger,
timeProvider: realTimeProvider{}, timeProvider: realTimeProvider{},
Created: time.Now().Unix(), Created: time.Now().Unix(),
@@ -145,6 +147,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
return return
} }
// Preserve the original nodes to prevent changing instance location
if i.options != nil && i.options.Nodes != nil {
options.Nodes = i.options.Nodes
}
// Validate and copy options // Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings) options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
@@ -171,6 +178,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
return nil, fmt.Errorf("instance %s has no options set", i.Name) return nil, fmt.Errorf("instance %s has no options set", i.Name)
} }
// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
if len(i.options.Nodes) > 0 && i.options.Nodes[0] != i.localNodeName {
return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
}
var host string var host string
var port int var port int
switch i.options.BackendType { switch i.options.BackendType {
@@ -198,6 +210,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(targetURL) proxy := httputil.NewSingleHostReverseProxy(targetURL)
var responseHeaders map[string]string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
case backends.BackendTypeVllm:
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
case backends.BackendTypeMlxLm:
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
}
proxy.ModifyResponse = func(resp *http.Response) error { proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts // Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers // llamactl will add its own CORS headers
@@ -207,6 +228,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
resp.Header.Del("Access-Control-Allow-Credentials") resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age") resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers") resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range responseHeaders {
resp.Header.Set(key, value)
}
return nil return nil
} }
@@ -272,5 +297,35 @@ func (i *Process) UnmarshalJSON(data []byte) error {
i.options = aux.Options i.options = aux.Options
} }
// Initialize fields that are not serialized
if i.timeProvider == nil {
i.timeProvider = realTimeProvider{}
}
if i.logger == nil && i.globalInstanceSettings != nil {
i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
}
return nil return nil
} }
func (i *Process) IsRemote() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options == nil {
return false
}
// If no nodes specified, it's a local instance
if len(i.options.Nodes) == 0 {
return false
}
// If the first node is the local node, treat it as a local instance
if i.options.Nodes[0] == i.localNodeName {
return false
}
// Otherwise, it's a remote instance
return true
}

View File

@@ -44,7 +44,7 @@ func TestNewInstance(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
if inst.Name != "test-instance" { if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name) t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -115,7 +115,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
// Check that explicit values override defaults // Check that explicit values override defaults
@@ -164,7 +164,7 @@ func TestSetOptions(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, "main", mockOnStatusChange)
// Update options // Update options
newOptions := &instance.CreateInstanceOptions{ newOptions := &instance.CreateInstanceOptions{
@@ -191,6 +191,58 @@ func TestSetOptions(t *testing.T) {
} }
} }
func TestSetOptions_PreservesNodes(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Create instance with initial nodes
initialOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
Nodes: []string{"worker1"},
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, "main", mockOnStatusChange)
// Try to update with different nodes
updatedOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
Nodes: []string{"worker2"}, // Attempt to change node
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
inst.SetOptions(updatedOptions)
opts := inst.GetOptions()
// Nodes should remain unchanged
if len(opts.Nodes) != 1 || opts.Nodes[0] != "worker1" {
t.Errorf("Expected nodes to remain ['worker1'], got %v", opts.Nodes)
}
// Other options should be updated
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
}
}
func TestGetProxy(t *testing.T) { func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{ backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{ LlamaCpp: config.BackendSettings{
@@ -222,7 +274,7 @@ func TestGetProxy(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Get proxy for the first time // Get proxy for the first time
proxy1, err := inst.GetProxy() proxy1, err := inst.GetProxy()
@@ -277,7 +329,7 @@ func TestMarshalJSON(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
data, err := json.Marshal(instance) data, err := json.Marshal(instance)
if err != nil { if err != nil {
@@ -446,7 +498,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange) instance := instance.NewInstance("test", backendConfig, globalSettings, options, "main", mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
if opts.MaxRestarts == nil { if opts.MaxRestarts == nil {

View File

@@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"os"
"os/exec" "os/exec"
"runtime" "runtime"
"syscall" "syscall"
@@ -37,6 +38,9 @@ func (i *Process) Start() error {
// Initialize last request time to current time when starting // Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix()) i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files // Create log files
if err := i.logger.Create(); err != nil { if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
@@ -47,8 +51,6 @@ func (i *Process) Start() error {
if cmdErr != nil { if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr) return fmt.Errorf("failed to build command: %w", cmdErr)
} }
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = cmd i.cmd = cmd
if runtime.GOOS != "windows" { if runtime.GOOS != "windows" {
@@ -372,13 +374,27 @@ func (i *Process) buildCommand() (*exec.Cmd, error) {
return nil, err return nil, err
} }
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute // Get the command to execute
cmd := i.options.GetCommand(backendConfig) command := i.options.GetCommand(backendConfig)
// Build command arguments // Build command arguments
args := i.options.BuildCommandArgs(backendConfig) args := i.options.BuildCommandArgs(backendConfig)
return exec.Command(cmd, args...), nil // Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
} }
// getBackendConfig resolves the backend configuration for the current instance // getBackendConfig resolves the backend configuration for the current instance

View File

@@ -9,6 +9,7 @@ import (
"llamactl/pkg/backends/vllm" "llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
"maps"
) )
type CreateInstanceOptions struct { type CreateInstanceOptions struct {
@@ -20,10 +21,14 @@ type CreateInstanceOptions struct {
OnDemandStart *bool `json:"on_demand_start,omitempty"` OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout // Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"` BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"` BackendOptions map[string]any `json:"backend_options,omitempty"`
Nodes []string `json:"nodes,omitempty"`
// Backend-specific options // Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"` LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"` MlxServerOptions *mlx.MlxServerOptions `json:"-"`
@@ -240,3 +245,23 @@ func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSe
return args return args
} }
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
}

View File

@@ -56,7 +56,7 @@ func TestUpdateLastRequestTime(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic // Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime() inst.UpdateLastRequestTime()
@@ -88,7 +88,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration // Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() { if inst.ShouldTimeout() {
@@ -132,7 +132,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
}, },
} }
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Simulate running state // Simulate running state
inst.SetStatus(instance.Running) inst.SetStatus(instance.Running)
@@ -169,7 +169,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
inst.SetStatus(instance.Running) inst.SetStatus(instance.Running)
// Update last request time to now // Update last request time to now
@@ -207,7 +207,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
inst.SetStatus(instance.Running) inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time // Use MockTimeProvider to simulate old last request time
@@ -263,7 +263,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
opts := inst.GetOptions() opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout { if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {

View File

@@ -6,6 +6,7 @@ import (
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"log" "log"
"net/http"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@@ -25,10 +26,22 @@ type InstanceManager interface {
StopInstance(name string) (*instance.Process, error) StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error) RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error) GetInstanceLogs(name string, numLines int) (string, error)
Shutdown() Shutdown()
} }
type RemoteManager interface {
ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteRemoteInstance(node *config.NodeConfig, name string) error
StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
}
type instanceManager struct { type instanceManager struct {
mu sync.RWMutex mu sync.RWMutex
instances map[string]*instance.Process instances map[string]*instance.Process
@@ -36,29 +49,51 @@ type instanceManager struct {
ports map[int]bool ports map[int]bool
instancesConfig config.InstancesConfig instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig backendsConfig config.BackendConfig
localNodeName string // Name of the local node
// Timeout checker // Timeout checker
timeoutChecker *time.Ticker timeoutChecker *time.Ticker
shutdownChan chan struct{} shutdownChan chan struct{}
shutdownDone chan struct{} shutdownDone chan struct{}
isShutdown bool isShutdown bool
// Remote instance management
httpClient *http.Client
instanceNodeMap map[string]*config.NodeConfig // Maps instance name to its node config
nodeConfigMap map[string]*config.NodeConfig // Maps node name to node config for quick lookup
} }
// NewInstanceManager creates a new instance of InstanceManager. // NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager { func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig, localNodeName string) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 { if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
} }
// Build node config map for quick lookup
nodeConfigMap := make(map[string]*config.NodeConfig)
for name := range nodesConfig {
nodeCopy := nodesConfig[name]
nodeConfigMap[name] = &nodeCopy
}
im := &instanceManager{ im := &instanceManager{
instances: make(map[string]*instance.Process), instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}), runningInstances: make(map[string]struct{}),
ports: make(map[int]bool), ports: make(map[int]bool),
instancesConfig: instancesConfig, instancesConfig: instancesConfig,
backendsConfig: backendsConfig, backendsConfig: backendsConfig,
localNodeName: localNodeName,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}), shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}), shutdownDone: make(chan struct{}),
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
instanceNodeMap: make(map[string]*config.NodeConfig),
nodeConfigMap: nodeConfigMap,
} }
// Load existing instances from disk // Load existing instances from disk
@@ -238,24 +273,44 @@ func (im *instanceManager) loadInstance(name, path string) error {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name) return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
} }
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) { options := persistedInstance.GetOptions()
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
// Check if this is a remote instance
// An instance is remote if Nodes is specified AND the first node is not the local node
isRemote := options != nil && len(options.Nodes) > 0 && options.Nodes[0] != im.localNodeName
var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
if !isRemote {
// Only set status callback for local instances
statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
} }
// Create new inst using NewInstance (handles validation, defaults, setup) // Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback) inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, statusCallback)
// Restore persisted fields that NewInstance doesn't set // Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status) inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps // Handle remote instance mapping
if inst.GetPort() > 0 { if isRemote {
port := inst.GetPort() nodeName := options.Nodes[0]
if im.ports[port] { nodeConfig, exists := im.nodeConfigMap[nodeName]
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port) if !exists {
return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
}
im.instanceNodeMap[name] = nodeConfig
} else {
// Check for port conflicts only for local instances
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
}
im.ports[port] = true
} }
im.ports[port] = true
} }
im.instances[name] = inst im.instances[name] = inst
@@ -263,25 +318,48 @@ func (im *instanceManager) loadInstance(name, path string) error {
} }
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled // autoStartInstances starts instances that were running when persisted and have auto-restart enabled
// For instances with auto-restart disabled, it sets their status to Stopped
func (im *instanceManager) autoStartInstances() { func (im *instanceManager) autoStartInstances() {
im.mu.RLock() im.mu.RLock()
var instancesToStart []*instance.Process var instancesToStart []*instance.Process
var instancesToStop []*instance.Process
for _, inst := range im.instances { for _, inst := range im.instances {
if inst.IsRunning() && // Was running when persisted if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil && inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil && inst.GetOptions().AutoRestart != nil {
*inst.GetOptions().AutoRestart { if *inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst) instancesToStart = append(instancesToStart, inst)
} else {
// Instance was running but auto-restart is disabled, mark as stopped
instancesToStop = append(instancesToStop, inst)
}
} }
} }
im.mu.RUnlock() im.mu.RUnlock()
// Stop instances that have auto-restart disabled
for _, inst := range instancesToStop {
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
inst.SetStatus(instance.Stopped)
}
// Start instances that have auto-restart enabled
for _, inst := range instancesToStart { for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name) log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance) // Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped) inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err) // Check if this is a remote instance
if node := im.getNodeForInstance(inst); node != nil {
// Remote instance - use StartRemoteInstance
if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
}
} else {
// Local instance - call Start() directly
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
} }
} }
} }
@@ -296,3 +374,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
delete(im.runningInstances, name) delete(im.runningInstances, name)
} }
} }
// getNodeForInstance returns the node configuration for a remote instance
// Returns nil if the instance is not remote or the node is not found
func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
if !inst.IsRemote() {
return nil
}
// Check if we have a cached mapping
if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
return nodeConfig
}
return nil
}

View File

@@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) {
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
mgr := manager.NewInstanceManager(backendConfig, cfg) mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
if mgr == nil { if mgr == nil {
t.Fatal("NewInstanceManager returned nil") t.Fatal("NewInstanceManager returned nil")
} }
@@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) {
} }
// Test instance persistence on creation // Test instance persistence on creation
manager1 := manager.NewInstanceManager(backendConfig, cfg) manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp, BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{ LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) {
} }
// Test loading instances from disk // Test loading instances from disk
manager2 := manager.NewInstanceManager(backendConfig, cfg) manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
instances, err := manager2.ListInstances() instances, err := manager2.ListInstances()
if err != nil { if err != nil {
t.Fatalf("ListInstances failed: %v", err) t.Fatalf("ListInstances failed: %v", err)
@@ -207,5 +207,68 @@ func createTestManager() manager.InstanceManager {
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
return manager.NewInstanceManager(backendConfig, cfg) return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
}
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Create first manager and instance with auto-restart disabled
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
autoRestart := false
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
AutoRestart: &autoRestart,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Simulate instance being in running state when persisted
// (this would happen if the instance was running when llamactl was stopped)
inst.SetStatus(instance.Running)
// Shutdown first manager
manager1.Shutdown()
// Create second manager (simulating restart of llamactl)
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
// Get the loaded instance
loadedInst, err := manager2.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
// The instance should be marked as Stopped, not Running
// because auto-restart is disabled
if loadedInst.IsRunning() {
t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
}
if loadedInst.GetStatus() != instance.Stopped {
t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
}
manager2.Shutdown()
} }

View File

@@ -3,6 +3,7 @@ package manager
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/validation" "llamactl/pkg/validation"
"os" "os"
@@ -11,16 +12,65 @@ import (
type MaxRunningInstancesError error type MaxRunningInstancesError error
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
// while preserving the Nodes field to maintain remote instance tracking
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
if localInst == nil || remoteInst == nil {
return
}
// Get the remote instance options
remoteOptions := remoteInst.GetOptions()
if remoteOptions == nil {
return
}
// Preserve the Nodes field from the local instance
localOptions := localInst.GetOptions()
var preservedNodes []string
if localOptions != nil && len(localOptions.Nodes) > 0 {
preservedNodes = make([]string, len(localOptions.Nodes))
copy(preservedNodes, localOptions.Nodes)
}
// Create a copy of remote options and restore the Nodes field
updatedOptions := *remoteOptions
updatedOptions.Nodes = preservedNodes
// Update the local instance with all remote data
localInst.SetOptions(&updatedOptions)
localInst.Status = remoteInst.Status
localInst.Created = remoteInst.Created
}
// ListInstances returns a list of all instances managed by the instance manager. // ListInstances returns a list of all instances managed by the instance manager.
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) { func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
defer im.mu.RUnlock() localInstances := make([]*instance.Process, 0, len(im.instances))
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances { for _, inst := range im.instances {
instances = append(instances, inst) localInstances = append(localInstances, inst)
} }
return instances, nil im.mu.RUnlock()
// Update remote instances with live state
for _, inst := range localInstances {
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.GetRemoteInstance(node, inst.Name)
if err != nil {
// Log error but continue with stale data
// Don't fail the entire list operation due to one remote failure
continue
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
}
}
return localInstances, nil
} }
// CreateInstance creates a new instance with the given options and returns it. // CreateInstance creates a new instance with the given options and returns it.
@@ -43,16 +93,57 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
// Check max instances limit after acquiring the lock // Check if instance with this name already exists (must be globally unique)
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil { if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name) return nil, fmt.Errorf("instance with name %s already exists", name)
} }
// Check if this is a remote instance
// An instance is remote if Nodes is specified AND the first node is not the local node
isRemote := len(options.Nodes) > 0 && options.Nodes[0] != im.localNodeName
var nodeConfig *config.NodeConfig
if isRemote {
// Validate that the node exists
nodeName := options.Nodes[0] // Use first node for now
var exists bool
nodeConfig, exists = im.nodeConfigMap[nodeName]
if !exists {
return nil, fmt.Errorf("node %s not found", nodeName)
}
// Create the remote instance on the remote node
remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
if err != nil {
return nil, err
}
// Create a local stub that preserves the Nodes field for tracking
// We keep the original options (with Nodes) so IsRemote() works correctly
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, nil)
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Add to local tracking maps (but don't count towards limits)
im.instances[name] = inst
im.instanceNodeMap[name] = nodeConfig
// Persist the remote instance locally for tracking across restarts
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
}
return inst, nil
}
// Local instance creation
// Check max instances limit for local instances only
localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Assign and validate port for backend-specific options // Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil { if err := im.assignAndValidatePort(options); err != nil {
return nil, err return nil, err
@@ -62,7 +153,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.onStatusChange(name, oldStatus, newStatus) im.onStatusChange(name, oldStatus, newStatus)
} }
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback) inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, statusCallback)
im.instances[inst.Name] = inst im.instances[inst.Name] = inst
if err := im.persistInstance(inst); err != nil { if err := im.persistInstance(inst); err != nil {
@@ -73,28 +164,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
} }
// GetInstance retrieves an instance by its name. // GetInstance retrieves an instance by its name.
// For remote instances, this fetches the live state from the remote node and updates the local stub.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) { func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
defer im.mu.RUnlock() inst, exists := im.instances[name]
im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
return instance, nil
// Check if instance is remote and fetch live state
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.GetRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
// Return the local stub (preserving Nodes field)
return inst, nil
}
return inst, nil
} }
// UpdateInstance updates the options of an existing instance and returns it. // UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options. // If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) { func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] inst, exists := im.instances[name]
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.UpdateRemoteInstance(node, name, options)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
// Persist the updated remote instance locally
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
}
return inst, nil
}
if options == nil { if options == nil {
return nil, fmt.Errorf("instance options cannot be nil") return nil, fmt.Errorf("instance options cannot be nil")
} }
@@ -105,55 +236,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
} }
// Check if instance is running before updating options // Check if instance is running before updating options
wasRunning := instance.IsRunning() wasRunning := inst.IsRunning()
// If the instance is running, stop it first // If the instance is running, stop it first
if wasRunning { if wasRunning {
if err := instance.Stop(); err != nil { if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err) return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
} }
} }
// Now update the options while the instance is stopped // Now update the options while the instance is stopped
instance.SetOptions(options) inst.SetOptions(options)
// If it was running before, start it again with the new options // If it was running before, start it again with the new options
if wasRunning { if wasRunning {
if err := instance.Start(); err != nil { if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err) return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
} }
} }
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil { if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
} }
return instance, nil return inst, nil
} }
// DeleteInstance removes stopped instance by its name. // DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error { func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() inst, exists := im.instances[name]
im.mu.Unlock()
instance, exists := im.instances[name]
if !exists { if !exists {
return fmt.Errorf("instance with name %s not found", name) return fmt.Errorf("instance with name %s not found", name)
} }
if instance.IsRunning() { // Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
err := im.DeleteRemoteInstance(node, name)
if err != nil {
return err
}
// Clean up local tracking
im.mu.Lock()
defer im.mu.Unlock()
delete(im.instances, name)
delete(im.instanceNodeMap, name)
// Delete the instance's config file if persistence is enabled
// Re-validate instance name for security (defense in depth)
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
return fmt.Errorf("invalid instance name for file deletion: %w", err)
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
}
return nil
}
if inst.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name) return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
} }
delete(im.ports, instance.GetPort()) im.mu.Lock()
defer im.mu.Unlock()
delete(im.ports, inst.GetPort())
delete(im.instances, name) delete(im.instances, name)
// Delete the instance's config file if persistence is enabled // Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json") // Re-validate instance name for security (defense in depth)
validatedName, err := validation.ValidateInstanceName(inst.Name)
if err != nil {
return fmt.Errorf("invalid instance name for file deletion: %w", err)
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) { if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err) return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
} }
return nil return nil
@@ -163,33 +329,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
// If the instance is already running, it returns an error. // If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) { func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] inst, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name) // Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.StartRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
} }
if inst.IsRunning() {
return inst, fmt.Errorf("instance with name %s is already running", name)
}
// Check max running instances limit for local instances only
im.mu.RLock()
localRunningCount := 0
for instName := range im.runningInstances {
if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
localRunningCount++
}
}
maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
if maxRunningExceeded { if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances)) return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
} }
if err := instance.Start(); err != nil { if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err) return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
} }
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
err := im.persistInstance(instance) err := im.persistInstance(inst)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
} }
return instance, nil return inst, nil
} }
func (im *instanceManager) IsMaxRunningInstancesReached() bool { func (im *instanceManager) IsMaxRunningInstancesReached() bool {
@@ -206,51 +398,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
// StopInstance stops a running instance and returns it. // StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) { func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] inst, exists := im.instances[name]
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name) // Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.StopRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
} }
if err := instance.Stop(); err != nil { if !inst.IsRunning() {
return inst, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err) return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
} }
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
err := im.persistInstance(instance) err := im.persistInstance(inst)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
} }
return instance, nil return inst, nil
} }
// RestartInstance stops and then starts an instance, returning the updated instance. // RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) { func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name) im.mu.RLock()
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.RestartRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
}
inst, err := im.StopInstance(name)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return im.StartInstance(instance.Name) return im.StartInstance(inst.Name)
} }
// GetInstanceLogs retrieves the logs for a specific instance by its name. // GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) { func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
im.mu.RLock() im.mu.RLock()
_, exists := im.instances[name] inst, exists := im.instances[name]
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return "", fmt.Errorf("instance with name %s not found", name) return "", fmt.Errorf("instance with name %s not found", name)
} }
// TODO: Implement actual log retrieval logic // Check if instance is remote and delegate to remote operation
return fmt.Sprintf("Logs for instance %s", name), nil if node := im.getNodeForInstance(inst); node != nil {
return im.GetRemoteInstanceLogs(node, name, numLines)
}
// Get logs from the local instance
return inst.GetLogs(numLines)
} }
// getPortFromOptions extracts the port from backend-specific options // getPortFromOptions extracts the port from backend-specific options

View File

@@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
MaxInstances: 1, // Very low limit for testing MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
limitedManager := manager.NewInstanceManager(backendConfig, cfg) limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
_, err = limitedManager.CreateInstance("instance1", options) _, err = limitedManager.CreateInstance("instance1", options)
if err != nil { if err != nil {

222
pkg/manager/remote_ops.go Normal file
View File

@@ -0,0 +1,222 @@
package manager
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"net/http"
)
// makeRemoteRequest is a helper function to make HTTP requests to a remote node
func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
var reqBody io.Reader
if body != nil {
jsonData, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("failed to marshal request body: %w", err)
}
reqBody = bytes.NewBuffer(jsonData)
}
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
req, err := http.NewRequest(method, url, reqBody)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
if nodeConfig.APIKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
}
resp, err := im.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to execute request: %w", err)
}
return resp, nil
}
// parseRemoteResponse is a helper function to parse API responses
func parseRemoteResponse(resp *http.Response, result any) error {
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
if result != nil {
if err := json.Unmarshal(body, result); err != nil {
return fmt.Errorf("failed to unmarshal response: %w", err)
}
}
return nil
}
// ListRemoteInstances lists all instances on the remote node
func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
if err != nil {
return nil, err
}
var instances []*instance.Process
if err := parseRemoteResponse(resp, &instances); err != nil {
return nil, err
}
return instances, nil
}
// CreateRemoteInstance creates a new instance on the remote node
func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// GetRemoteInstance retrieves an instance by name from the remote node
func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// UpdateRemoteInstance updates an existing instance on the remote node
func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// DeleteRemoteInstance deletes an instance from the remote node
func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
if err != nil {
return err
}
return parseRemoteResponse(resp, nil)
}
// StartRemoteInstance starts an instance on the remote node
func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/start", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// StopRemoteInstance stops an instance on the remote node
func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// RestartRemoteInstance restarts an instance on the remote node
func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Logs endpoint might return plain text or JSON
// Try to parse as JSON first (in case it's wrapped in a response object)
var logResponse struct {
Logs string `json:"logs"`
}
if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
return logResponse.Logs, nil
}
// Otherwise, return as plain text
return string(body), nil
}

View File

@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {
// Identify instances that should timeout // Identify instances that should timeout
for _, inst := range im.instances { for _, inst := range im.instances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
if inst.ShouldTimeout() { if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name) timeoutInstances = append(timeoutInstances, inst.Name)
} }
@@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error {
continue continue
} }
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 { if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout continue // Skip instances without idle timeout
} }

View File

@@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) {
MaxInstances: 5, MaxInstances: 5,
} }
manager := manager.NewInstanceManager(backendConfig, cfg) manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
if manager == nil { if manager == nil {
t.Fatal("Manager should be initialized with timeout checker") t.Fatal("Manager should be initialized with timeout checker")
} }

View File

@@ -1,795 +1,29 @@
package server package server
import ( import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager" "llamactl/pkg/manager"
"net/http" "net/http"
"os/exec" "net/http/httputil"
"strconv" "sync"
"strings" "time"
"github.com/go-chi/chi/v5"
) )
type Handler struct { type Handler struct {
InstanceManager manager.InstanceManager InstanceManager manager.InstanceManager
cfg config.AppConfig cfg config.AppConfig
httpClient *http.Client
remoteProxies map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
remoteProxiesMu sync.RWMutex
} }
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler { func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{ return &Handler{
InstanceManager: im, InstanceManager: im,
cfg: cfg, cfg: cfg,
} httpClient: &http.Client{
} Timeout: 30 * time.Second,
},
// VersionHandler godoc remoteProxies: make(map[string]*httputil.ReverseProxy),
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
if lines == "" {
lines = "-1"
}
num_lines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
} }
} }

View File

@@ -0,0 +1,320 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/instance"
"net/http"
"os/exec"
"strings"
"github.com/go-chi/chi/v5"
)
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Get the instance name from the URL parameter
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
options := inst.GetOptions()
if options == nil {
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
return
}
if options.BackendType != backends.BackendTypeLlamaCpp {
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
return
}
if !inst.IsRunning() {
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(name); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
proxy.ServeHTTP(w, r)
}
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}

View File

@@ -0,0 +1,445 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"net/http/httputil"
"net/url"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
)
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
numLines := -1 // Default to all lines
if lines != "" {
parsedLines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
numLines = parsedLines
}
// Use the instance manager which handles both local and remote instances
logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Check if this is a remote instance
if inst.IsRemote() {
h.RemoteInstanceProxy(w, r, name, inst)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// RemoteInstanceProxy proxies requests to a remote instance
func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
// Get the node name from instance options
options := inst.GetOptions()
if options == nil || len(options.Nodes) == 0 {
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
return
}
nodeName := options.Nodes[0]
// Check if we have a cached proxy for this node
h.remoteProxiesMu.RLock()
proxy, exists := h.remoteProxies[nodeName]
h.remoteProxiesMu.RUnlock()
if !exists {
// Find node configuration
nodeConfig, exists := h.cfg.Nodes[nodeName]
if !exists {
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
return
}
// Create reverse proxy to remote node
targetURL, err := url.Parse(nodeConfig.Address)
if err != nil {
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
return
}
proxy = httputil.NewSingleHostReverseProxy(targetURL)
// Modify request before forwarding
originalDirector := proxy.Director
apiKey := nodeConfig.APIKey // Capture for closure
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key if configured
if apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
}
}
// Cache the proxy by node name
h.remoteProxiesMu.Lock()
h.remoteProxies[nodeName] = proxy
h.remoteProxiesMu.Unlock()
}
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}

View File

@@ -0,0 +1,79 @@
package server
import (
"encoding/json"
"net/http"
"github.com/go-chi/chi/v5"
)
// NodeResponse represents a sanitized node configuration for API responses
type NodeResponse struct {
Address string `json:"address"`
}
// ListNodes godoc
// @Summary List all configured nodes
// @Description Returns a map of all nodes configured in the server (node name -> node config)
// @Tags nodes
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
// @Failure 500 {string} string "Internal Server Error"
// @Router /nodes [get]
func (h *Handler) ListNodes() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Convert to sanitized response format (map of name -> NodeResponse)
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
for name, node := range h.cfg.Nodes {
nodeResponses[name] = NodeResponse{
Address: node.Address,
}
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetNode godoc
// @Summary Get details of a specific node
// @Description Returns the details of a specific node by name
// @Tags nodes
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Node Name"
// @Success 200 {object} NodeResponse "Node details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 404 {string} string "Node not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /nodes/{name} [get]
func (h *Handler) GetNode() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
return
}
nodeConfig, exists := h.cfg.Nodes[name]
if !exists {
http.Error(w, "Node not found", http.StatusNotFound)
return
}
// Convert to sanitized response format
nodeResponse := NodeResponse{
Address: nodeConfig.Address,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
return
}
}
}

View File

@@ -0,0 +1,206 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/instance"
"net/http"
"net/http/httputil"
"net/url"
)
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
// Check if this is a remote instance
if inst.IsRemote() {
// Restore the body for the remote proxy
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
h.RemoteOpenAIProxy(w, r, modelName, inst)
return
}
if !inst.IsRunning() {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
// Get the node name from instance options
options := inst.GetOptions()
if options == nil || len(options.Nodes) == 0 {
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
return
}
nodeName := options.Nodes[0]
// Check if we have a cached proxy for this node
h.remoteProxiesMu.RLock()
proxy, exists := h.remoteProxies[nodeName]
h.remoteProxiesMu.RUnlock()
if !exists {
// Find node configuration
nodeConfig, exists := h.cfg.Nodes[nodeName]
if !exists {
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
return
}
// Create reverse proxy to remote node
targetURL, err := url.Parse(nodeConfig.Address)
if err != nil {
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
return
}
proxy = httputil.NewSingleHostReverseProxy(targetURL)
// Modify request before forwarding
originalDirector := proxy.Director
apiKey := nodeConfig.APIKey // Capture for closure
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key if configured
if apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
}
}
// Cache the proxy
h.remoteProxiesMu.Lock()
h.remoteProxies[nodeName] = proxy
h.remoteProxiesMu.Unlock()
}
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}

View File

@@ -0,0 +1,22 @@
package server
import (
"fmt"
"net/http"
)
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}

View File

@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(cors.Handler(cors.Options{ r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.cfg.Server.AllowedOrigins, AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"}, AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"}, AllowedHeaders: handler.cfg.Server.AllowedHeaders,
ExposedHeaders: []string{"Link"}, ExposedHeaders: []string{"Link"},
AllowCredentials: false, AllowCredentials: false,
MaxAge: 300, MaxAge: 300,
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
}) })
}) })
// Node management endpoints
r.Route("/nodes", func(r chi.Router) {
r.Get("/", handler.ListNodes()) // List all nodes
r.Route("/{name}", func(r chi.Router) {
r.Get("/", handler.GetNode())
})
})
// Instance management endpoints // Instance management endpoints
r.Route("/instances", func(r chi.Router) { r.Route("/instances", func(r chi.Router) {
r.Get("/", handler.ListInstances()) // List all instances r.Get("/", handler.ListInstances()) // List all instances
@@ -103,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
}) })
r.Route("/llama-cpp/{name}", func(r chi.Router) {
// Public Routes
// Allow llama-cpp server to serve its own WebUI if it is running.
// Don't auto start the server since it can be accessed without an API key
r.Get("/", handler.LlamaCppProxy(false))
// Private Routes
r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}
// This handler auto start the server if it's not running
llamaCppHandler := handler.LlamaCppProxy(true)
// llama.cpp server specific proxy endpoints
r.Get("/props", llamaCppHandler)
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
r.Get("/slots", llamaCppHandler)
r.Post("/apply-template", llamaCppHandler)
r.Post("/completion", llamaCppHandler)
r.Post("/detokenize", llamaCppHandler)
r.Post("/embeddings", llamaCppHandler)
r.Post("/infill", llamaCppHandler)
r.Post("/metrics", llamaCppHandler)
r.Post("/props", llamaCppHandler)
r.Post("/reranking", llamaCppHandler)
r.Post("/tokenize", llamaCppHandler)
// OpenAI-compatible proxy endpoint
// Handles all POST requests to /v1/*, including:
// - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
// on "model" field being optional, and handler.OpenAIProxy requires it.
r.Post("/v1/*", llamaCppHandler)
})
})
// Serve WebUI files // Serve WebUI files
if err := webui.SetupWebUI(r); err != nil { if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err) fmt.Printf("Failed to set up WebUI: %v\n", err)

View File

@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields // Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {}; const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => { Object.entries(formData).forEach(([key, value]) => {
if (key === 'backend_options' && value && typeof value === 'object') { if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
// Handle backend_options specially - clean nested object // Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {}; const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => { Object.entries(value).forEach(([backendKey, backendValue]) => {
@@ -123,8 +123,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
if (Object.keys(cleanBackendOptions).length > 0) { if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions; (cleanOptions as any)[key] = cleanBackendOptions;
} }
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) { } else if (value !== undefined && value !== null) {
// Handle arrays - don't include empty arrays // Skip empty strings
if (typeof value === 'string' && value.trim() === "") {
return;
}
// Skip empty arrays
if (Array.isArray(value) && value.length === 0) { if (Array.isArray(value) && value.length === 0) {
return; return;
} }

View File

@@ -1,144 +0,0 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue)
}
const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
<option value={BackendType.MLX_LM}>MLX LM</option>
<option value={BackendType.VLLM}>vLLM</option>
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default ZodFormField

View File

@@ -0,0 +1,144 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -1,99 +0,0 @@
import React from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getFieldType } from '@/schemas/instanceOptions'
import TextInput from '@/components/form/TextInput'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import SelectInput from '@/components/form/SelectInput'
interface BasicInstanceFieldsProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
formData,
onChange
}) => {
const basicFields = getBasicFields()
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const fieldType = getFieldType(fieldKey)
// Special handling for backend_type field
if (fieldKey === 'backend_type') {
return (
<SelectInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] || BackendType.LLAMA_CPP}
onChange={(value) => onChange(fieldKey, value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description={config.description}
/>
)
}
// Render based on field type
switch (fieldType) {
case 'boolean':
return (
<CheckboxInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as boolean | undefined}
onChange={(value) => onChange(fieldKey, value)}
description={config.description}
/>
)
case 'number':
return (
<NumberInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
default:
return (
<TextInput
key={fieldKey}
id={fieldKey}
label={config.label}
value={formData[fieldKey] as string | number | undefined}
onChange={(value) => onChange(fieldKey, value)}
placeholder={config.placeholder}
description={config.description}
/>
)
}
}
// Filter out auto restart fields and backend_options (handled separately)
const fieldsToRender = basicFields.filter(
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{fieldsToRender.map(renderField)}
</div>
)
}
export default BasicInstanceFields

View File

@@ -1,4 +1,4 @@
import React from 'react' import React, { useState, useEffect } from 'react'
import type { CreateInstanceOptions } from '@/types/instance' import type { CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label' import { Label } from '@/components/ui/label'
@@ -6,6 +6,9 @@ import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration' import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput' import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput' import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
import SelectInput from '@/components/form/SelectInput'
import { nodesApi, type NodesMap } from '@/lib/api'
interface InstanceSettingsCardProps { interface InstanceSettingsCardProps {
instanceName: string instanceName: string
@@ -24,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onNameChange, onNameChange,
onChange onChange
}) => { }) => {
const [nodes, setNodes] = useState<NodesMap>({})
const [loadingNodes, setLoadingNodes] = useState(true)
useEffect(() => {
const fetchNodes = async () => {
try {
const fetchedNodes = await nodesApi.list()
setNodes(fetchedNodes)
// Auto-select first node if none selected
const nodeNames = Object.keys(fetchedNodes)
if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
onChange('nodes', [nodeNames[0]])
}
} catch (error) {
console.error('Failed to fetch nodes:', error)
} finally {
setLoadingNodes(false)
}
}
void fetchNodes()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const nodeOptions = Object.keys(nodes).map(nodeName => ({
value: nodeName,
label: nodeName
}))
const handleNodeChange = (value: string | undefined) => {
if (value) {
onChange('nodes', [value])
} else {
onChange('nodes', undefined)
}
}
const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
return ( return (
<Card> <Card>
<CardHeader> <CardHeader>
@@ -49,6 +92,19 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
</p> </p>
</div> </div>
{/* Node Selection */}
{!loadingNodes && Object.keys(nodes).length > 0 && (
<SelectInput
id="node"
label="Node"
value={selectedNode}
onChange={handleNodeChange}
options={nodeOptions}
description={isEditing ? "Node cannot be changed after instance creation" : "Select the node where the instance will run"}
disabled={isEditing}
/>
)}
{/* Auto Restart Configuration */} {/* Auto Restart Configuration */}
<AutoRestartConfiguration <AutoRestartConfiguration
formData={formData} formData={formData}
@@ -75,6 +131,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onChange={(value) => onChange('on_demand_start', value)} onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed" description="Start instance only when needed"
/> />
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div> </div>
</CardContent> </CardContent>
</Card> </Card>

View File

@@ -1,4 +1,4 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react' import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState { interface AuthContextState {
isAuthenticated: boolean isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request // Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => { const validateApiKey = async (key: string): Promise<boolean> => {
try { try {
const response = await fetch('/api/v1/instances', { const response = await fetch(document.baseURI + 'api/v1/instances', {
headers: { headers: {
'Authorization': `Bearer ${key}`, 'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json' 'Content-Type': 'application/json'

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import { beforeEach, describe, expect, it, vi } from 'vitest'
// Mock fetch globally // Mock fetch globally
const mockFetch = vi.fn() const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
}) })
it('converts HTTP errors to meaningful messages', async () => { it('converts HTTP errors to meaningful messages', async () => {
mockFetch.mockResolvedValue({ const mockResponse = {
ok: false, ok: false,
status: 409, status: 409,
text: () => Promise.resolve('Instance already exists') text: () => Promise.resolve('Instance already exists'),
}) clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.create('existing', {})) await expect(instancesApi.create('existing', {}))
.rejects .rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
}) })
it('handles empty error responses gracefully', async () => { it('handles empty error responses gracefully', async () => {
mockFetch.mockResolvedValue({ const mockResponse = {
ok: false, ok: false,
status: 500, status: 500,
text: () => Promise.resolve('') text: () => Promise.resolve(''),
}) clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.list()) await expect(instancesApi.list())
.rejects .rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
await instancesApi.getLogs('test-instance', 100) await instancesApi.getLogs('test-instance', 100)
expect(mockFetch).toHaveBeenCalledWith( expect(mockFetch).toHaveBeenCalledWith(
'/api/v1/instances/test-instance/logs?lines=100', expect.stringMatching(
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
),
expect.any(Object) expect.any(Object)
) )
}) })

View File

@@ -1,7 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils"; import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1"; // Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling // Base API call function with error handling
async function apiCall<T>( async function apiCall<T>(
@@ -46,11 +49,8 @@ async function apiCall<T>(
} else { } else {
// Handle empty responses for JSON endpoints // Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length'); const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) { if (contentLength === '0') {
const text = await response.text(); return {} as T; // Return empty object for empty JSON responses
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
} }
const data = await response.json() as T; const data = await response.json() as T;
return data; return data;
@@ -103,6 +103,22 @@ export const backendsApi = {
}, },
}; };
// Node API types
export interface NodeResponse {
address: string;
}
export type NodesMap = Record<string, NodeResponse>;
// Node API functions
export const nodesApi = {
// GET /nodes - returns map of node name to NodeResponse
list: () => apiCall<NodesMap>("/nodes"),
// GET /nodes/{name}
get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
};
// Instance API functions // Instance API functions
export const instancesApi = { export const instancesApi = {
// GET /instances // GET /instances

View File

@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
} }
if (!response.ok) { if (!response.ok) {
const errorMessage = await parseErrorResponse(response) // Clone the response before reading to avoid consuming the body stream
const errorMessage = await parseErrorResponse(response.clone())
throw new Error(errorMessage) throw new Error(errorMessage)
} }
} }

View File

@@ -1,12 +1,10 @@
import { import {
type CreateInstanceOptions,
type LlamaCppBackendOptions, type LlamaCppBackendOptions,
type MlxBackendOptions, type MlxBackendOptions,
type VllmBackendOptions, type VllmBackendOptions,
LlamaCppBackendOptionsSchema, LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema, MlxBackendOptionsSchema,
VllmBackendOptionsSchema, VllmBackendOptionsSchema,
getAllFieldKeys,
getAllLlamaCppFieldKeys, getAllLlamaCppFieldKeys,
getAllMlxFieldKeys, getAllMlxFieldKeys,
getAllVllmFieldKeys, getAllVllmFieldKeys,
@@ -15,41 +13,6 @@ import {
getVllmFieldType getVllmFieldType
} from '@/schemas/instanceOptions' } from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
auto_restart: {
label: 'Auto Restart',
description: 'Automatically restart the instance on failure'
},
max_restarts: {
label: 'Max Restarts',
placeholder: '3',
description: 'Maximum number of restart attempts (0 = unlimited)'
},
restart_delay: {
label: 'Restart Delay (seconds)',
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// LlamaCpp backend-specific basic fields // LlamaCpp backend-specific basic fields
const basicLlamaCppFieldsConfig: Record<string, { const basicLlamaCppFieldsConfig: Record<string, {
label: string label: string
@@ -152,18 +115,6 @@ const backendFieldGetters = {
llama_cpp: getAllLlamaCppFieldKeys, llama_cpp: getAllLlamaCppFieldKeys,
} as const } as const
function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
}
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
}
export function getBasicBackendFields(backendType?: string): string[] { export function getBasicBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
return 'text' return 'text'
} }
// Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions'

View File

@@ -33,9 +33,15 @@ export const CreateInstanceOptionsSchema = z.object({
idle_timeout: z.number().optional(), idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(), on_demand_start: z.boolean().optional(),
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
// Backend configuration // Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(), backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(), backend_options: BackendOptionsSchema.optional(),
// Node configuration
nodes: z.array(z.string()).optional(),
}) })
// Re-export types and schemas from backend files // Re-export types and schemas from backend files
@@ -75,5 +81,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object' if (innerSchema instanceof z.ZodObject) return 'object'
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
return 'text' // ZodString and others default to text return 'text' // ZodString and others default to text
} }

View File

@@ -21,4 +21,6 @@ export default defineConfig({
setupFiles: ['./src/test/setup.ts'], setupFiles: ['./src/test/setup.ts'],
css: true, css: true,
}, },
// ensures relative asset paths to support being served behind a subpath
base: "./"
}) })