mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
Compare commits
90 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| eb5abae173 | |||
| 696a2cb18b | |||
| e7402f0029 | |||
| 5c9a397746 | |||
| e97ca727d1 | |||
| 9f3c01384b | |||
| c5097e59be | |||
| cf20f304b3 | |||
| 72eba48b80 | |||
| c3037f914d | |||
| 81266b4bc4 | |||
| a31af94e7b | |||
| 9ee0a184b3 | |||
| 5436c28a1f | |||
| 73b9dd5bc7 | |||
| f61e8dad5c | |||
| ab2770bdd9 | |||
| e7a6a7003e | |||
| 2b950ee649 | |||
| b965b77c18 | |||
| 8a16a195de | |||
| 9684a8a09b | |||
| 9d5f01d4ae | |||
| e281708b20 | |||
| 8d9b0c0621 | |||
| 6c1a76691d | |||
| 5d958ed283 | |||
| 56b95d1243 | |||
| 688b815ca7 | |||
| 7f6725da96 | |||
| 3418735204 | |||
| 2f1cf5acdc | |||
| 01380e6641 | |||
| 6298b03636 | |||
| aae3f84d49 | |||
| 554796391b | |||
| 16b28bac05 | |||
| 1892dc8315 | |||
|
|
997bd1b063 | ||
|
|
fa43f9e967 | ||
| db9eebeb8b | |||
| bd062f8ca0 | |||
| 8ebdb1a183 | |||
| 7272212081 | |||
| 035e184789 | |||
| d15976e7aa | |||
| 4fa75d9801 | |||
|
|
0e1bc8a352 | ||
| b728a7c6b2 | |||
| a491f29483 | |||
| 670f8ff81b | |||
| da56456504 | |||
| c30053e51c | |||
| 347c58e15f | |||
| 2ed67eb672 | |||
| 0188f82306 | |||
| e0f176de10 | |||
| 2759be65a5 | |||
| 1e5e86d2c3 | |||
| 25d3d70707 | |||
| e54cfd006d | |||
| 7d39e7ee86 | |||
| 222d913b4a | |||
|
|
03a7a5d139 | ||
|
|
e50660c379 | ||
|
|
5906d89f8d | ||
| cb2d95139f | |||
| 889a8707e7 | |||
| 070c91787d | |||
| 169ee422ec | |||
| bb0176b7f5 | |||
| 291ec7995f | |||
| b940b38e46 | |||
| 92cb57e816 | |||
| 0ecd55c354 | |||
| b4c17194eb | |||
| 808092decf | |||
| 12bbf34236 | |||
| 9a7255a52d | |||
| 97a7c9a4e3 | |||
| fa9335663a | |||
| d092518114 | |||
| ffa0a0c161 | |||
| 1fbf809a2d | |||
| c984d95723 | |||
| 50e1355205 | |||
| 7994fd05b3 | |||
|
|
f496a28f04 | ||
| f9371e876d | |||
|
|
3a979da815 |
45
.dockerignore
Normal file
45
.dockerignore
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Git and version control
|
||||||
|
.git/
|
||||||
|
.gitignore
|
||||||
|
|
||||||
|
# Documentation
|
||||||
|
*.md
|
||||||
|
docs/
|
||||||
|
|
||||||
|
# Development files
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Build artifacts
|
||||||
|
webui/node_modules/
|
||||||
|
webui/dist/
|
||||||
|
webui/.next/
|
||||||
|
*.log
|
||||||
|
*.tmp
|
||||||
|
|
||||||
|
# Data directories
|
||||||
|
data/
|
||||||
|
models/
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# Test files
|
||||||
|
*_test.go
|
||||||
|
**/*_test.go
|
||||||
|
|
||||||
|
# CI/CD
|
||||||
|
.github/
|
||||||
|
|
||||||
|
# Local configuration
|
||||||
|
llamactl.yaml
|
||||||
|
config.yaml
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# OS files
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Backup files
|
||||||
|
*.bak
|
||||||
|
*.backup
|
||||||
|
*~
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
name: Build and Deploy Documentation
|
name: User Docs
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -35,3 +35,5 @@ node_modules/
|
|||||||
dist/
|
dist/
|
||||||
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
||||||
|
site/
|
||||||
80
README.md
80
README.md
@@ -1,6 +1,6 @@
|
|||||||
# llamactl
|
# llamactl
|
||||||
|
|
||||||
  
|
   
|
||||||
|
|
||||||
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
|
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
|
||||||
|
|
||||||
@@ -23,6 +23,12 @@
|
|||||||
### ⚡ Smart Operations
|
### ⚡ Smart Operations
|
||||||
- **Instance Monitoring**: Health checks, auto-restart, log management
|
- **Instance Monitoring**: Health checks, auto-restart, log management
|
||||||
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
||||||
|
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
|
||||||
|
|
||||||
|
### 🔗 Remote Instance Deployment
|
||||||
|
- **Remote Node Support**: Deploy instances on remote hosts
|
||||||
|
- **Central Management**: Manage remote instances from a single dashboard
|
||||||
|
- **Seamless Routing**: Automatic request routing to remote instances
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -52,7 +58,8 @@ llamactl
|
|||||||
2. Click "Create Instance"
|
2. Click "Create Instance"
|
||||||
3. Choose backend type (llama.cpp, MLX, or vLLM)
|
3. Choose backend type (llama.cpp, MLX, or vLLM)
|
||||||
4. Set model path and backend-specific options
|
4. Set model path and backend-specific options
|
||||||
5. Start or stop the instance
|
5. Configure environment variables if needed (optional)
|
||||||
|
6. Start or stop the instance
|
||||||
|
|
||||||
### Or use the REST API:
|
### Or use the REST API:
|
||||||
```bash
|
```bash
|
||||||
@@ -66,10 +73,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
|
|||||||
-H "Authorization: Bearer your-key" \
|
-H "Authorization: Bearer your-key" \
|
||||||
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
|
||||||
|
|
||||||
# Create vLLM instance
|
# Create vLLM instance with environment variables
|
||||||
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
|
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
|
||||||
-H "Authorization: Bearer your-key" \
|
-H "Authorization: Bearer your-key" \
|
||||||
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
|
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
|
||||||
|
|
||||||
# Use with OpenAI SDK
|
# Use with OpenAI SDK
|
||||||
curl -X POST localhost:8080/v1/chat/completions \
|
curl -X POST localhost:8080/v1/chat/completions \
|
||||||
@@ -93,7 +100,30 @@ sudo mv llamactl /usr/local/bin/
|
|||||||
# Windows - Download from releases page
|
# Windows - Download from releases page
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option 2: Build from Source
|
### Option 2: Docker (No local backend installation required)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone repository and build Docker images
|
||||||
|
git clone https://github.com/lordmathis/llamactl.git
|
||||||
|
cd llamactl
|
||||||
|
mkdir -p data/llamacpp data/vllm models
|
||||||
|
|
||||||
|
# Build and start llamactl with llama.cpp CUDA backend
|
||||||
|
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
|
||||||
|
|
||||||
|
# Build and start llamactl with vLLM CUDA backend
|
||||||
|
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
|
||||||
|
|
||||||
|
# Build from source using multi-stage build
|
||||||
|
docker build -f docker/Dockerfile.source -t llamactl:source .
|
||||||
|
```
|
||||||
|
|
||||||
|
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
|
||||||
|
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
|
||||||
|
|
||||||
|
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
|
||||||
|
|
||||||
|
### Option 3: Build from Source
|
||||||
Requires Go 1.24+ and Node.js 22+
|
Requires Go 1.24+ and Node.js 22+
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/lordmathis/llamactl.git
|
git clone https://github.com/lordmathis/llamactl.git
|
||||||
@@ -145,47 +175,23 @@ pip install vllm
|
|||||||
# Or use Docker - no local installation required
|
# Or use Docker - no local installation required
|
||||||
```
|
```
|
||||||
|
|
||||||
## Docker Support
|
## Backend Docker Support
|
||||||
|
|
||||||
llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for:
|
llamactl can run backends in Docker containers:
|
||||||
- Production deployments without local backend installation
|
|
||||||
- Isolating backend dependencies
|
|
||||||
- GPU-accelerated inference using official Docker images
|
|
||||||
|
|
||||||
### Docker Configuration
|
|
||||||
|
|
||||||
Enable Docker support using the new structured backend configuration:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
backends:
|
backends:
|
||||||
llama-cpp:
|
llama-cpp:
|
||||||
command: "llama-server"
|
|
||||||
docker:
|
docker:
|
||||||
enabled: true
|
enabled: true
|
||||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
command: "vllm"
|
|
||||||
args: ["serve"]
|
|
||||||
docker:
|
docker:
|
||||||
enabled: true
|
enabled: true
|
||||||
image: "vllm/vllm-openai:latest"
|
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Key Features
|
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
|
||||||
|
|
||||||
- **Host Networking**: Uses `--network host` for seamless port management
|
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
|
||||||
- **GPU Support**: Includes `--gpus all` for GPU acceleration
|
|
||||||
- **Environment Variables**: Configure container environment as needed
|
|
||||||
- **Flexible Configuration**: Per-backend Docker settings with sensible defaults
|
|
||||||
|
|
||||||
### Requirements
|
|
||||||
|
|
||||||
- Docker installed and running
|
|
||||||
- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support
|
|
||||||
- No local backend installation required when using Docker
|
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
@@ -196,30 +202,34 @@ server:
|
|||||||
host: "0.0.0.0" # Server host to bind to
|
host: "0.0.0.0" # Server host to bind to
|
||||||
port: 8080 # Server port to bind to
|
port: 8080 # Server port to bind to
|
||||||
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
||||||
|
allowed_headers: ["*"] # Allowed CORS headers (default: all)
|
||||||
enable_swagger: false # Enable Swagger UI for API docs
|
enable_swagger: false # Enable Swagger UI for API docs
|
||||||
|
|
||||||
backends:
|
backends:
|
||||||
llama-cpp:
|
llama-cpp:
|
||||||
command: "llama-server"
|
command: "llama-server"
|
||||||
args: []
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
docker:
|
docker:
|
||||||
enabled: false
|
enabled: false
|
||||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||||
environment: {}
|
environment: {} # Environment variables for the container
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
command: "vllm"
|
command: "vllm"
|
||||||
args: ["serve"]
|
args: ["serve"]
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
docker:
|
docker:
|
||||||
enabled: false
|
enabled: false
|
||||||
image: "vllm/vllm-openai:latest"
|
image: "vllm/vllm-openai:latest"
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
environment: {}
|
environment: {} # Environment variables for the container
|
||||||
|
|
||||||
mlx:
|
mlx:
|
||||||
command: "mlx_lm.server"
|
command: "mlx_lm.server"
|
||||||
args: []
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances
|
port_range: [8000, 9000] # Port range for instances
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the instance manager
|
// Initialize the instance manager
|
||||||
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
|
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes, cfg.LocalNode)
|
||||||
|
|
||||||
// Create a new handler with the instance manager
|
// Create a new handler with the instance manager
|
||||||
handler := server.NewHandler(instanceManager, cfg)
|
handler := server.NewHandler(instanceManager, cfg)
|
||||||
|
|||||||
23
docker/Dockerfile.llamacpp
Normal file
23
docker/Dockerfile.llamacpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
|
||||||
|
|
||||||
|
# Install curl for downloading llamactl
|
||||||
|
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Download and install the latest llamactl release
|
||||||
|
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
|
||||||
|
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
|
||||||
|
mv llamactl /usr/local/bin/ && \
|
||||||
|
chmod +x /usr/local/bin/llamactl
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
RUN mkdir -p /data
|
||||||
|
WORKDIR /data
|
||||||
|
|
||||||
|
# Expose the default llamactl port
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
|
||||||
|
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
|
||||||
|
|
||||||
|
# Set llamactl as the entrypoint
|
||||||
|
ENTRYPOINT ["llamactl"]
|
||||||
64
docker/Dockerfile.source
Normal file
64
docker/Dockerfile.source
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# WebUI build stage
|
||||||
|
FROM node:20-alpine AS webui-builder
|
||||||
|
|
||||||
|
WORKDIR /webui
|
||||||
|
|
||||||
|
# Copy webui package files
|
||||||
|
COPY webui/package*.json ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
|
# Copy webui source
|
||||||
|
COPY webui/ ./
|
||||||
|
|
||||||
|
# Build webui
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Go build stage
|
||||||
|
FROM golang:1.24-alpine AS builder
|
||||||
|
|
||||||
|
# Install build dependencies
|
||||||
|
RUN apk add --no-cache git ca-certificates
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
# Copy go mod files
|
||||||
|
COPY go.mod go.sum ./
|
||||||
|
|
||||||
|
# Download dependencies
|
||||||
|
RUN go mod download
|
||||||
|
|
||||||
|
# Copy source code
|
||||||
|
COPY cmd/ ./cmd/
|
||||||
|
COPY pkg/ ./pkg/
|
||||||
|
COPY apidocs/ ./apidocs/
|
||||||
|
COPY webui/webui.go ./webui/
|
||||||
|
|
||||||
|
# Copy built webui from webui-builder
|
||||||
|
COPY --from=webui-builder /webui/dist ./webui/dist
|
||||||
|
|
||||||
|
# Build the application
|
||||||
|
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
|
||||||
|
|
||||||
|
# Final stage
|
||||||
|
FROM alpine:latest
|
||||||
|
|
||||||
|
# Install runtime dependencies
|
||||||
|
RUN apk --no-cache add ca-certificates
|
||||||
|
|
||||||
|
# Create data directory
|
||||||
|
RUN mkdir -p /data
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /data
|
||||||
|
|
||||||
|
# Copy binary from builder
|
||||||
|
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
|
||||||
|
|
||||||
|
# Expose the default port
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Set llamactl as the entrypoint
|
||||||
|
ENTRYPOINT ["llamactl"]
|
||||||
20
docker/Dockerfile.vllm
Normal file
20
docker/Dockerfile.vllm
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
FROM vllm/vllm-openai:latest
|
||||||
|
|
||||||
|
# Install curl for downloading llamactl
|
||||||
|
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Download and install the latest llamactl release
|
||||||
|
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
|
||||||
|
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
|
||||||
|
mv llamactl /usr/local/bin/ && \
|
||||||
|
chmod +x /usr/local/bin/llamactl
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
RUN mkdir -p /data
|
||||||
|
WORKDIR /data
|
||||||
|
|
||||||
|
# Expose the default llamactl port
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Set llamactl as the entrypoint
|
||||||
|
ENTRYPOINT ["llamactl"]
|
||||||
56
docker/docker-compose.yml
Normal file
56
docker/docker-compose.yml
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
llamactl-llamacpp:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: docker/Dockerfile.llamacpp
|
||||||
|
image: llamactl:llamacpp-cuda
|
||||||
|
container_name: llamactl-llamacpp
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
volumes:
|
||||||
|
- ./data/llamacpp:/data
|
||||||
|
- ./models:/models # Mount models directory
|
||||||
|
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
|
||||||
|
environment:
|
||||||
|
# Set data directory for persistence
|
||||||
|
- LLAMACTL_DATA_DIR=/data
|
||||||
|
# Enable Docker mode for nested containers (if needed)
|
||||||
|
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
llamactl-vllm:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: docker/Dockerfile.vllm
|
||||||
|
image: llamactl:vllm-cuda
|
||||||
|
container_name: llamactl-vllm
|
||||||
|
ports:
|
||||||
|
- "8081:8080" # Use different port to avoid conflicts
|
||||||
|
volumes:
|
||||||
|
- ./data/vllm:/data
|
||||||
|
- ./models:/models # Mount models directory
|
||||||
|
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
|
||||||
|
environment:
|
||||||
|
# Set data directory for persistence
|
||||||
|
- LLAMACTL_DATA_DIR=/data
|
||||||
|
# Enable Docker mode for nested containers (if needed)
|
||||||
|
- LLAMACTL_VLLM_DOCKER_ENABLED=false
|
||||||
|
# vLLM specific environment variables
|
||||||
|
- CUDA_VISIBLE_DEVICES=all
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
restart: unless-stopped
|
||||||
60
docs/fix_line_endings.py
Normal file
60
docs/fix_line_endings.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
MkDocs hook to fix line endings for proper rendering.
|
||||||
|
Automatically adds two spaces at the end of lines that need line breaks.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def on_page_markdown(markdown, page, config, **kwargs):
|
||||||
|
"""
|
||||||
|
Fix line endings in markdown content for proper MkDocs rendering.
|
||||||
|
Adds two spaces at the end of lines that need line breaks.
|
||||||
|
"""
|
||||||
|
lines = markdown.split('\n')
|
||||||
|
processed_lines = []
|
||||||
|
in_code_block = False
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
# Track code blocks
|
||||||
|
if stripped.startswith('```'):
|
||||||
|
in_code_block = not in_code_block
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip processing inside code blocks
|
||||||
|
if in_code_block:
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip empty lines
|
||||||
|
if not stripped:
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip lines that shouldn't have line breaks:
|
||||||
|
# - Headers (# ## ###)
|
||||||
|
# - Blockquotes (>)
|
||||||
|
# - Table rows (|)
|
||||||
|
# - Lines already ending with two spaces
|
||||||
|
# - YAML front matter and HTML tags
|
||||||
|
# - Standalone punctuation lines
|
||||||
|
if (stripped.startswith('#') or
|
||||||
|
stripped.startswith('>') or
|
||||||
|
'|' in stripped or
|
||||||
|
line.endswith(' ') or
|
||||||
|
stripped.startswith('---') or
|
||||||
|
stripped.startswith('<') or
|
||||||
|
stripped.endswith('>') or
|
||||||
|
stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add two spaces to lines that end with regular text or most punctuation
|
||||||
|
if stripped and not in_code_block:
|
||||||
|
processed_lines.append(line.rstrip() + ' ')
|
||||||
|
else:
|
||||||
|
processed_lines.append(line)
|
||||||
|
|
||||||
|
return '\n'.join(processed_lines)
|
||||||
@@ -17,30 +17,37 @@ server:
|
|||||||
host: "0.0.0.0" # Server host to bind to
|
host: "0.0.0.0" # Server host to bind to
|
||||||
port: 8080 # Server port to bind to
|
port: 8080 # Server port to bind to
|
||||||
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
||||||
|
allowed_headers: ["*"] # Allowed CORS headers (default: all)
|
||||||
enable_swagger: false # Enable Swagger UI for API docs
|
enable_swagger: false # Enable Swagger UI for API docs
|
||||||
|
|
||||||
backends:
|
backends:
|
||||||
llama-cpp:
|
llama-cpp:
|
||||||
command: "llama-server"
|
command: "llama-server"
|
||||||
args: []
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
docker:
|
docker:
|
||||||
enabled: false
|
enabled: false
|
||||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||||
environment: {}
|
environment: {}
|
||||||
|
response_headers: {} # Additional response headers to send with responses
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
command: "vllm"
|
command: "vllm"
|
||||||
args: ["serve"]
|
args: ["serve"]
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
docker:
|
docker:
|
||||||
enabled: false
|
enabled: false
|
||||||
image: "vllm/vllm-openai:latest"
|
image: "vllm/vllm-openai:latest"
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
environment: {}
|
environment: {}
|
||||||
|
response_headers: {} # Additional response headers to send with responses
|
||||||
|
|
||||||
mlx:
|
mlx:
|
||||||
command: "mlx_lm.server"
|
command: "mlx_lm.server"
|
||||||
args: []
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
|
response_headers: {} # Additional response headers to send with responses
|
||||||
|
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances
|
port_range: [8000, 9000] # Port range for instances
|
||||||
@@ -63,6 +70,10 @@ auth:
|
|||||||
inference_keys: [] # Keys for inference endpoints
|
inference_keys: [] # Keys for inference endpoints
|
||||||
require_management_auth: true # Require auth for management endpoints
|
require_management_auth: true # Require auth for management endpoints
|
||||||
management_keys: [] # Keys for management endpoints
|
management_keys: [] # Keys for management endpoints
|
||||||
|
|
||||||
|
local_node: "main" # Name of the local node (default: "main")
|
||||||
|
nodes: # Node configuration for multi-node deployment
|
||||||
|
main: # Default local node (empty config)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuration Files
|
## Configuration Files
|
||||||
@@ -98,6 +109,7 @@ server:
|
|||||||
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
|
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
|
||||||
port: 8080 # Server port to bind to (default: 8080)
|
port: 8080 # Server port to bind to (default: 8080)
|
||||||
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
|
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
|
||||||
|
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
|
||||||
enable_swagger: false # Enable Swagger UI (default: false)
|
enable_swagger: false # Enable Swagger UI (default: false)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -113,36 +125,74 @@ backends:
|
|||||||
llama-cpp:
|
llama-cpp:
|
||||||
command: "llama-server"
|
command: "llama-server"
|
||||||
args: []
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
docker:
|
docker:
|
||||||
enabled: false # Enable Docker runtime (default: false)
|
enabled: false # Enable Docker runtime (default: false)
|
||||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||||
environment: {}
|
environment: {}
|
||||||
|
response_headers: {} # Additional response headers to send with responses
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
command: "vllm"
|
command: "vllm"
|
||||||
args: ["serve"]
|
args: ["serve"]
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
docker:
|
docker:
|
||||||
enabled: false
|
enabled: false # Enable Docker runtime (default: false)
|
||||||
image: "vllm/vllm-openai:latest"
|
image: "vllm/vllm-openai:latest"
|
||||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||||
environment: {}
|
environment: {}
|
||||||
|
response_headers: {} # Additional response headers to send with responses
|
||||||
|
|
||||||
mlx:
|
mlx:
|
||||||
command: "mlx_lm.server"
|
command: "mlx_lm.server"
|
||||||
args: []
|
args: []
|
||||||
|
environment: {} # Environment variables for the backend process
|
||||||
# MLX does not support Docker
|
# MLX does not support Docker
|
||||||
|
response_headers: {} # Additional response headers to send with responses
|
||||||
```
|
```
|
||||||
|
|
||||||
**Backend Configuration Fields:**
|
**Backend Configuration Fields:**
|
||||||
- `command`: Executable name/path for the backend
|
- `command`: Executable name/path for the backend
|
||||||
- `args`: Default arguments prepended to all instances
|
- `args`: Default arguments prepended to all instances
|
||||||
|
- `environment`: Environment variables for the backend process (optional)
|
||||||
|
- `response_headers`: Additional response headers to send with responses (optional)
|
||||||
- `docker`: Docker-specific configuration (optional)
|
- `docker`: Docker-specific configuration (optional)
|
||||||
- `enabled`: Boolean flag to enable Docker runtime
|
- `enabled`: Boolean flag to enable Docker runtime
|
||||||
- `image`: Docker image to use
|
- `image`: Docker image to use
|
||||||
- `args`: Additional arguments passed to `docker run`
|
- `args`: Additional arguments passed to `docker run`
|
||||||
- `environment`: Environment variables for the container (optional)
|
- `environment`: Environment variables for the container (optional)
|
||||||
|
|
||||||
|
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
|
||||||
|
**LlamaCpp Backend:**
|
||||||
|
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
|
||||||
|
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
|
||||||
|
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
|
||||||
|
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
|
||||||
|
|
||||||
|
**VLLM Backend:**
|
||||||
|
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
|
||||||
|
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
|
||||||
|
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
|
||||||
|
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
|
||||||
|
|
||||||
|
**MLX Backend:**
|
||||||
|
- `LLAMACTL_MLX_COMMAND` - MLX executable command
|
||||||
|
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
|
||||||
|
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
|
||||||
|
|
||||||
### Instance Configuration
|
### Instance Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
@@ -195,12 +245,26 @@ auth:
|
|||||||
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
||||||
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
||||||
|
|
||||||
## Command Line Options
|
### Remote Node Configuration
|
||||||
|
|
||||||
View all available command line options:
|
llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
|
||||||
|
|
||||||
```bash
|
```yaml
|
||||||
llamactl --help
|
local_node: "main" # Name of the local node (default: "main")
|
||||||
|
nodes: # Node configuration map
|
||||||
|
main: # Local node (empty address means local)
|
||||||
|
address: "" # Not used for local node
|
||||||
|
api_key: "" # Not used for local node
|
||||||
|
worker1: # Remote worker node
|
||||||
|
address: "http://192.168.1.10:8080"
|
||||||
|
api_key: "worker1-api-key" # Management API key for authentication
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also override configuration using command line flags when starting llamactl.
|
**Node Configuration Fields:**
|
||||||
|
- `local_node`: Specifies which node in the `nodes` map represents the local node. Must match exactly what other nodes call this node.
|
||||||
|
- `nodes`: Map of node configurations
|
||||||
|
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
|
||||||
|
- `api_key`: Management API key for authenticating with the remote node
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
- `LLAMACTL_LOCAL_NODE` - Name of the local node
|
||||||
|
|||||||
@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
|
|||||||
# Windows - Download from releases page
|
# Windows - Download from releases page
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option 2: Build from Source
|
### Option 2: Docker
|
||||||
|
|
||||||
|
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
|
||||||
|
|
||||||
|
**Available Dockerfiles (CUDA):**
|
||||||
|
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
|
||||||
|
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
|
||||||
|
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
|
||||||
|
|
||||||
|
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
|
||||||
|
|
||||||
|
#### Using Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone the repository
|
||||||
|
git clone https://github.com/lordmathis/llamactl.git
|
||||||
|
cd llamactl
|
||||||
|
|
||||||
|
# Create directories for data and models
|
||||||
|
mkdir -p data/llamacpp data/vllm models
|
||||||
|
|
||||||
|
# Start llamactl with llama.cpp backend
|
||||||
|
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
|
||||||
|
|
||||||
|
# Or start llamactl with vLLM backend
|
||||||
|
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
|
||||||
|
```
|
||||||
|
|
||||||
|
Access the dashboard at:
|
||||||
|
- llamactl with llama.cpp: http://localhost:8080
|
||||||
|
- llamactl with vLLM: http://localhost:8081
|
||||||
|
|
||||||
|
#### Using Docker Build and Run
|
||||||
|
|
||||||
|
**llamactl with llama.cpp CUDA:**
|
||||||
|
```bash
|
||||||
|
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
|
||||||
|
docker run -d \
|
||||||
|
--name llamactl-llamacpp \
|
||||||
|
--gpus all \
|
||||||
|
-p 8080:8080 \
|
||||||
|
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
|
||||||
|
llamactl:llamacpp-cuda
|
||||||
|
```
|
||||||
|
|
||||||
|
**llamactl with vLLM CUDA:**
|
||||||
|
```bash
|
||||||
|
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
|
||||||
|
docker run -d \
|
||||||
|
--name llamactl-vllm \
|
||||||
|
--gpus all \
|
||||||
|
-p 8080:8080 \
|
||||||
|
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||||
|
llamactl:vllm-cuda
|
||||||
|
```
|
||||||
|
|
||||||
|
**llamactl built from source:**
|
||||||
|
```bash
|
||||||
|
docker build -f docker/Dockerfile.source -t llamactl:source .
|
||||||
|
docker run -d \
|
||||||
|
--name llamactl \
|
||||||
|
-p 8080:8080 \
|
||||||
|
llamactl:source
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 3: Build from Source
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- Go 1.24 or later
|
- Go 1.24 or later
|
||||||
@@ -92,6 +157,13 @@ cd webui && npm ci && npm run build && cd ..
|
|||||||
go build -o llamactl ./cmd/server
|
go build -o llamactl ./cmd/server
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Remote Node Installation
|
||||||
|
|
||||||
|
For deployments with remote nodes:
|
||||||
|
- Install llamactl on each node using any of the methods above
|
||||||
|
- Configure API keys for authentication between nodes
|
||||||
|
- Ensure node names are consistent across all configurations
|
||||||
|
|
||||||
## Verification
|
## Verification
|
||||||
|
|
||||||
Verify your installation by checking the version:
|
Verify your installation by checking the version:
|
||||||
@@ -103,3 +175,5 @@ llamactl --version
|
|||||||
## Next Steps
|
## Next Steps
|
||||||
|
|
||||||
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
|
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
|
||||||
|
|
||||||
|
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.
|
||||||
|
|||||||
@@ -116,7 +116,19 @@ Create and start a new instance.
|
|||||||
POST /api/v1/instances/{name}
|
POST /api/v1/instances/{name}
|
||||||
```
|
```
|
||||||
|
|
||||||
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
|
**Request Body:** JSON object with instance configuration. Common fields include:
|
||||||
|
|
||||||
|
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
|
||||||
|
- `backend_options`: Backend-specific configuration
|
||||||
|
- `auto_restart`: Enable automatic restart on failure
|
||||||
|
- `max_restarts`: Maximum restart attempts
|
||||||
|
- `restart_delay`: Delay between restarts in seconds
|
||||||
|
- `on_demand_start`: Start instance when receiving requests
|
||||||
|
- `idle_timeout`: Idle timeout in minutes
|
||||||
|
- `environment`: Environment variables as key-value pairs
|
||||||
|
- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
|
||||||
|
|
||||||
|
See [Managing Instances](managing-instances.md) for complete configuration options.
|
||||||
|
|
||||||
**Response:**
|
**Response:**
|
||||||
```json
|
```json
|
||||||
@@ -354,7 +366,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
|
|||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-H "Authorization: Bearer your-api-key" \
|
-H "Authorization: Bearer your-api-key" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "/models/llama-2-7b.gguf"
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/models/llama-2-7b.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"environment": {
|
||||||
|
"CUDA_VISIBLE_DEVICES": "0",
|
||||||
|
"OMP_NUM_THREADS": "8"
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
# Check instance status
|
# Check instance status
|
||||||
@@ -386,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
|
|||||||
http://localhost:8080/api/v1/instances/my-model
|
http://localhost:8080/api/v1/instances/my-model
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Remote Node Instance Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create instance on specific remote node
|
||||||
|
curl -X POST http://localhost:8080/api/v1/instances/remote-model \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-api-key" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/models/llama-2-7b.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"nodes": ["worker1"]
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Check status of remote instance
|
||||||
|
curl -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances/remote-model
|
||||||
|
|
||||||
|
# Use remote instance with OpenAI-compatible API
|
||||||
|
curl -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-inference-api-key" \
|
||||||
|
-d '{
|
||||||
|
"model": "remote-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello from remote node!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
### Using the Proxy Endpoint
|
### Using the Proxy Endpoint
|
||||||
|
|
||||||
You can also directly proxy requests to the llama-server instance:
|
You can also directly proxy requests to the llama-server instance:
|
||||||
|
|||||||
@@ -39,25 +39,27 @@ Each instance is displayed as a card showing:
|
|||||||
|
|
||||||
1. Click the **"Create Instance"** button on the dashboard
|
1. Click the **"Create Instance"** button on the dashboard
|
||||||
2. Enter a unique **Name** for your instance (only required field)
|
2. Enter a unique **Name** for your instance (only required field)
|
||||||
3. **Choose Backend Type**:
|
3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
|
||||||
|
4. **Choose Backend Type**:
|
||||||
- **llama.cpp**: For GGUF models using llama-server
|
- **llama.cpp**: For GGUF models using llama-server
|
||||||
- **MLX**: For MLX-optimized models (macOS only)
|
- **MLX**: For MLX-optimized models (macOS only)
|
||||||
- **vLLM**: For distributed serving and high-throughput inference
|
- **vLLM**: For distributed serving and high-throughput inference
|
||||||
4. Configure model source:
|
5. Configure model source:
|
||||||
- **For llama.cpp**: GGUF model path or HuggingFace repo
|
- **For llama.cpp**: GGUF model path or HuggingFace repo
|
||||||
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
|
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
|
||||||
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
|
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
|
||||||
5. Configure optional instance management settings:
|
6. Configure optional instance management settings:
|
||||||
- **Auto Restart**: Automatically restart instance on failure
|
- **Auto Restart**: Automatically restart instance on failure
|
||||||
- **Max Restarts**: Maximum number of restart attempts
|
- **Max Restarts**: Maximum number of restart attempts
|
||||||
- **Restart Delay**: Delay in seconds between restart attempts
|
- **Restart Delay**: Delay in seconds between restart attempts
|
||||||
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
||||||
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
||||||
6. Configure backend-specific options:
|
- **Environment Variables**: Set custom environment variables for the instance process
|
||||||
|
7. Configure backend-specific options:
|
||||||
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
||||||
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
||||||
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
|
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
|
||||||
7. Click **"Create"** to save the instance
|
8. Click **"Create"** to save the instance
|
||||||
|
|
||||||
### Via API
|
### Via API
|
||||||
|
|
||||||
@@ -101,7 +103,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
|
|||||||
"gpu_memory_utilization": 0.9
|
"gpu_memory_utilization": 0.9
|
||||||
},
|
},
|
||||||
"auto_restart": true,
|
"auto_restart": true,
|
||||||
"on_demand_start": true
|
"on_demand_start": true,
|
||||||
|
"environment": {
|
||||||
|
"CUDA_VISIBLE_DEVICES": "0,1",
|
||||||
|
"NCCL_DEBUG": "INFO",
|
||||||
|
"PYTHONPATH": "/custom/path"
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
# Create llama.cpp instance with HuggingFace model
|
# Create llama.cpp instance with HuggingFace model
|
||||||
@@ -115,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
|
|||||||
"gpu_layers": 32
|
"gpu_layers": 32
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
|
# Create instance on specific remote node
|
||||||
|
curl -X POST http://localhost:8080/api/instances/remote-llama \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/models/llama-7b.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"nodes": ["worker1"]
|
||||||
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
## Start Instance
|
## Start Instance
|
||||||
@@ -221,3 +240,4 @@ Check the health status of your instances:
|
|||||||
```bash
|
```bash
|
||||||
curl http://localhost:8080/api/instances/{name}/proxy/health
|
curl http://localhost:8080/api/instances/{name}/proxy/health
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -125,6 +125,34 @@ This helps determine if the issue is with llamactl or with the underlying llama.
|
|||||||
http://localhost:8080/api/v1/instances
|
http://localhost:8080/api/v1/instances
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Remote Node Issues
|
||||||
|
|
||||||
|
### Node Configuration
|
||||||
|
|
||||||
|
**Problem:** Remote instances not appearing or cannot be managed
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. **Verify node configuration:**
|
||||||
|
```yaml
|
||||||
|
local_node: "main" # Must match a key in nodes map
|
||||||
|
nodes:
|
||||||
|
main:
|
||||||
|
address: "" # Empty for local node
|
||||||
|
worker1:
|
||||||
|
address: "http://worker1.internal:8080"
|
||||||
|
api_key: "secure-key" # Must match worker1's management key
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Check node name consistency:**
|
||||||
|
- `local_node` on each node must match what other nodes call it
|
||||||
|
- Node names are case-sensitive
|
||||||
|
|
||||||
|
3. **Test remote node connectivity:**
|
||||||
|
```bash
|
||||||
|
curl -H "Authorization: Bearer remote-node-key" \
|
||||||
|
http://remote-node:8080/api/v1/instances
|
||||||
|
```
|
||||||
|
|
||||||
## Debugging and Logs
|
## Debugging and Logs
|
||||||
|
|
||||||
### Viewing Instance Logs
|
### Viewing Instance Logs
|
||||||
|
|||||||
@@ -69,6 +69,7 @@ plugins:
|
|||||||
|
|
||||||
hooks:
|
hooks:
|
||||||
- docs/readme_sync.py
|
- docs/readme_sync.py
|
||||||
|
- docs/fix_line_endings.py
|
||||||
|
|
||||||
extra:
|
extra:
|
||||||
version:
|
version:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
@@ -14,7 +15,9 @@ import (
|
|||||||
type BackendSettings struct {
|
type BackendSettings struct {
|
||||||
Command string `yaml:"command"`
|
Command string `yaml:"command"`
|
||||||
Args []string `yaml:"args"`
|
Args []string `yaml:"args"`
|
||||||
|
Environment map[string]string `yaml:"environment,omitempty"`
|
||||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||||
|
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// DockerSettings contains Docker-specific configuration
|
// DockerSettings contains Docker-specific configuration
|
||||||
@@ -38,6 +41,8 @@ type AppConfig struct {
|
|||||||
Backends BackendConfig `yaml:"backends"`
|
Backends BackendConfig `yaml:"backends"`
|
||||||
Instances InstancesConfig `yaml:"instances"`
|
Instances InstancesConfig `yaml:"instances"`
|
||||||
Auth AuthConfig `yaml:"auth"`
|
Auth AuthConfig `yaml:"auth"`
|
||||||
|
LocalNode string `yaml:"local_node,omitempty"`
|
||||||
|
Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
|
||||||
Version string `yaml:"-"`
|
Version string `yaml:"-"`
|
||||||
CommitHash string `yaml:"-"`
|
CommitHash string `yaml:"-"`
|
||||||
BuildTime string `yaml:"-"`
|
BuildTime string `yaml:"-"`
|
||||||
@@ -54,8 +59,14 @@ type ServerConfig struct {
|
|||||||
// Allowed origins for CORS (e.g., "http://localhost:3000")
|
// Allowed origins for CORS (e.g., "http://localhost:3000")
|
||||||
AllowedOrigins []string `yaml:"allowed_origins"`
|
AllowedOrigins []string `yaml:"allowed_origins"`
|
||||||
|
|
||||||
|
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
|
||||||
|
AllowedHeaders []string `yaml:"allowed_headers"`
|
||||||
|
|
||||||
// Enable Swagger UI for API documentation
|
// Enable Swagger UI for API documentation
|
||||||
EnableSwagger bool `yaml:"enable_swagger"`
|
EnableSwagger bool `yaml:"enable_swagger"`
|
||||||
|
|
||||||
|
// Response headers to send with responses
|
||||||
|
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// InstancesConfig contains instance management configuration
|
// InstancesConfig contains instance management configuration
|
||||||
@@ -119,6 +130,11 @@ type AuthConfig struct {
|
|||||||
ManagementKeys []string `yaml:"management_keys"`
|
ManagementKeys []string `yaml:"management_keys"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeConfig struct {
|
||||||
|
Address string `yaml:"address"`
|
||||||
|
APIKey string `yaml:"api_key,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// LoadConfig loads configuration with the following precedence:
|
// LoadConfig loads configuration with the following precedence:
|
||||||
// 1. Hardcoded defaults
|
// 1. Hardcoded defaults
|
||||||
// 2. Config file
|
// 2. Config file
|
||||||
@@ -130,12 +146,18 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
Host: "0.0.0.0",
|
Host: "0.0.0.0",
|
||||||
Port: 8080,
|
Port: 8080,
|
||||||
AllowedOrigins: []string{"*"}, // Default to allow all origins
|
AllowedOrigins: []string{"*"}, // Default to allow all origins
|
||||||
|
AllowedHeaders: []string{"*"}, // Default to allow all headers
|
||||||
EnableSwagger: false,
|
EnableSwagger: false,
|
||||||
},
|
},
|
||||||
|
LocalNode: "main",
|
||||||
|
Nodes: map[string]NodeConfig{
|
||||||
|
"main": {}, // Local node with empty config
|
||||||
|
},
|
||||||
Backends: BackendConfig{
|
Backends: BackendConfig{
|
||||||
LlamaCpp: BackendSettings{
|
LlamaCpp: BackendSettings{
|
||||||
Command: "llama-server",
|
Command: "llama-server",
|
||||||
Args: []string{},
|
Args: []string{},
|
||||||
|
Environment: map[string]string{},
|
||||||
Docker: &DockerSettings{
|
Docker: &DockerSettings{
|
||||||
Enabled: false,
|
Enabled: false,
|
||||||
Image: "ghcr.io/ggml-org/llama.cpp:server",
|
Image: "ghcr.io/ggml-org/llama.cpp:server",
|
||||||
@@ -167,8 +189,10 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
Instances: InstancesConfig{
|
Instances: InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
DataDir: getDefaultDataDirectory(),
|
DataDir: getDefaultDataDirectory(),
|
||||||
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
|
||||||
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
// should be relative path to DataDir if not explicitly set.
|
||||||
|
InstancesDir: "",
|
||||||
|
LogsDir: "",
|
||||||
AutoCreateDirs: true,
|
AutoCreateDirs: true,
|
||||||
MaxInstances: -1, // -1 means unlimited
|
MaxInstances: -1, // -1 means unlimited
|
||||||
MaxRunningInstances: -1, // -1 means unlimited
|
MaxRunningInstances: -1, // -1 means unlimited
|
||||||
@@ -196,6 +220,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
// 3. Override with environment variables
|
// 3. Override with environment variables
|
||||||
loadEnvVars(&cfg)
|
loadEnvVars(&cfg)
|
||||||
|
|
||||||
|
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
|
||||||
|
if cfg.Instances.InstancesDir == "" {
|
||||||
|
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
|
||||||
|
}
|
||||||
|
if cfg.Instances.LogsDir == "" {
|
||||||
|
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
|
||||||
|
}
|
||||||
|
|
||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -216,6 +248,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
|
|||||||
if err := yaml.Unmarshal(data, cfg); err != nil {
|
if err := yaml.Unmarshal(data, cfg); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
log.Printf("Read config at %s", path)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -288,6 +321,12 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
|
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
|
||||||
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
|
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
|
||||||
}
|
}
|
||||||
|
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
|
||||||
|
if cfg.Backends.LlamaCpp.Environment == nil {
|
||||||
|
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
|
||||||
|
}
|
||||||
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
|
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
|
||||||
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
|
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
|
||||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||||
@@ -315,18 +354,28 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
|
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
|
||||||
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
|
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
|
||||||
}
|
}
|
||||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
|
||||||
for _, envPair := range strings.Split(llamaDockerEnv, ",") {
|
|
||||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
|
||||||
cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1]
|
|
||||||
}
|
}
|
||||||
|
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||||
|
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
|
||||||
|
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
|
||||||
}
|
}
|
||||||
|
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
|
||||||
}
|
}
|
||||||
|
|
||||||
// vLLM backend
|
// vLLM backend
|
||||||
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
||||||
cfg.Backends.VLLM.Command = vllmCmd
|
cfg.Backends.VLLM.Command = vllmCmd
|
||||||
}
|
}
|
||||||
|
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
|
||||||
|
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
|
||||||
|
}
|
||||||
|
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
|
||||||
|
if cfg.Backends.VLLM.Environment == nil {
|
||||||
|
cfg.Backends.VLLM.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
|
||||||
|
}
|
||||||
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
|
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
|
||||||
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
|
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
|
||||||
if cfg.Backends.VLLM.Docker == nil {
|
if cfg.Backends.VLLM.Docker == nil {
|
||||||
@@ -354,12 +403,13 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if cfg.Backends.VLLM.Docker.Environment == nil {
|
if cfg.Backends.VLLM.Docker.Environment == nil {
|
||||||
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
|
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
|
||||||
}
|
}
|
||||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
|
||||||
for _, envPair := range strings.Split(vllmDockerEnv, ",") {
|
|
||||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
|
||||||
cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1]
|
|
||||||
}
|
}
|
||||||
|
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||||
|
if cfg.Backends.VLLM.ResponseHeaders == nil {
|
||||||
|
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
|
||||||
}
|
}
|
||||||
|
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MLX backend
|
// MLX backend
|
||||||
@@ -369,6 +419,18 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
|
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
|
||||||
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
|
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
|
||||||
}
|
}
|
||||||
|
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
|
||||||
|
if cfg.Backends.MLX.Environment == nil {
|
||||||
|
cfg.Backends.MLX.Environment = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
|
||||||
|
}
|
||||||
|
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||||
|
if cfg.Backends.MLX.ResponseHeaders == nil {
|
||||||
|
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
|
||||||
|
}
|
||||||
|
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
|
||||||
|
}
|
||||||
|
|
||||||
// Instance defaults
|
// Instance defaults
|
||||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||||
@@ -418,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
|
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
|
||||||
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
|
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Local node config
|
||||||
|
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
|
||||||
|
cfg.LocalNode = localNode
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
|
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
|
||||||
@@ -443,6 +510,32 @@ func ParsePortRange(s string) [2]int {
|
|||||||
return [2]int{0, 0} // Invalid format
|
return [2]int{0, 0} // Invalid format
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
|
||||||
|
// and populates the provided environment map
|
||||||
|
func parseEnvVars(envString string, envMap map[string]string) {
|
||||||
|
if envString == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, envPair := range strings.Split(envString, ",") {
|
||||||
|
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||||
|
envMap[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
|
||||||
|
// and populates the provided environment map
|
||||||
|
func parseHeaders(envString string, envMap map[string]string) {
|
||||||
|
if envString == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, envPair := range strings.Split(envString, ";") {
|
||||||
|
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||||
|
envMap[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// getDefaultDataDirectory returns platform-specific default data directory
|
// getDefaultDataDirectory returns platform-specific default data directory
|
||||||
func getDefaultDataDirectory() string {
|
func getDefaultDataDirectory() string {
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
@@ -475,6 +568,10 @@ func getDefaultDataDirectory() string {
|
|||||||
// getDefaultConfigLocations returns platform-specific config file locations
|
// getDefaultConfigLocations returns platform-specific config file locations
|
||||||
func getDefaultConfigLocations() []string {
|
func getDefaultConfigLocations() []string {
|
||||||
var locations []string
|
var locations []string
|
||||||
|
// Use ./llamactl.yaml and ./config.yaml as the default config file
|
||||||
|
locations = append(locations, "llamactl.yaml")
|
||||||
|
locations = append(locations, "config.yaml")
|
||||||
|
|
||||||
homeDir, _ := os.UserHomeDir()
|
homeDir, _ := os.UserHomeDir()
|
||||||
|
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
|
|||||||
@@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
|
|||||||
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadConfig_LocalNode(t *testing.T) {
|
||||||
|
t.Run("default local node", func(t *testing.T) {
|
||||||
|
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "main" {
|
||||||
|
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("local node from file", func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
configFile := filepath.Join(tempDir, "test-config.yaml")
|
||||||
|
|
||||||
|
configContent := `
|
||||||
|
local_node: "worker1"
|
||||||
|
nodes:
|
||||||
|
worker1:
|
||||||
|
address: ""
|
||||||
|
worker2:
|
||||||
|
address: "http://192.168.1.10:8080"
|
||||||
|
api_key: "test-key"
|
||||||
|
`
|
||||||
|
|
||||||
|
err := os.WriteFile(configFile, []byte(configContent), 0644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to write test config file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig(configFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "worker1" {
|
||||||
|
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify nodes map (includes default "main" + worker1 + worker2)
|
||||||
|
if len(cfg.Nodes) != 3 {
|
||||||
|
t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify local node exists and is empty
|
||||||
|
localNode, exists := cfg.Nodes["worker1"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected local node 'worker1' to exist in nodes map")
|
||||||
|
}
|
||||||
|
if localNode.Address != "" {
|
||||||
|
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
|
||||||
|
}
|
||||||
|
if localNode.APIKey != "" {
|
||||||
|
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify remote node
|
||||||
|
remoteNode, exists := cfg.Nodes["worker2"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected remote node 'worker2' to exist in nodes map")
|
||||||
|
}
|
||||||
|
if remoteNode.Address != "http://192.168.1.10:8080" {
|
||||||
|
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify default main node still exists
|
||||||
|
_, exists = cfg.Nodes["main"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected default 'main' node to still exist in nodes map")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("custom local node name in config", func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
configFile := filepath.Join(tempDir, "test-config.yaml")
|
||||||
|
|
||||||
|
configContent := `
|
||||||
|
local_node: "primary"
|
||||||
|
nodes:
|
||||||
|
primary:
|
||||||
|
address: ""
|
||||||
|
worker1:
|
||||||
|
address: "http://192.168.1.10:8080"
|
||||||
|
`
|
||||||
|
|
||||||
|
err := os.WriteFile(configFile, []byte(configContent), 0644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to write test config file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig(configFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "primary" {
|
||||||
|
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify nodes map includes default "main" + primary + worker1
|
||||||
|
if len(cfg.Nodes) != 3 {
|
||||||
|
t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
|
||||||
|
}
|
||||||
|
|
||||||
|
localNode, exists := cfg.Nodes["primary"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected local node 'primary' to exist in nodes map")
|
||||||
|
}
|
||||||
|
if localNode.Address != "" {
|
||||||
|
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("local node from environment variable", func(t *testing.T) {
|
||||||
|
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
|
||||||
|
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "custom-node" {
|
||||||
|
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ type Process struct {
|
|||||||
options *CreateInstanceOptions `json:"-"`
|
options *CreateInstanceOptions `json:"-"`
|
||||||
globalInstanceSettings *config.InstancesConfig
|
globalInstanceSettings *config.InstancesConfig
|
||||||
globalBackendSettings *config.BackendConfig
|
globalBackendSettings *config.BackendConfig
|
||||||
|
localNodeName string `json:"-"` // Name of the local node for remote detection
|
||||||
|
|
||||||
// Status
|
// Status
|
||||||
Status InstanceStatus `json:"status"`
|
Status InstanceStatus `json:"status"`
|
||||||
@@ -66,7 +67,7 @@ type Process struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewInstance creates a new instance with the given name, log path, and options
|
// NewInstance creates a new instance with the given name, log path, and options
|
||||||
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
|
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, localNodeName string, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
|
||||||
// Validate and copy options
|
// Validate and copy options
|
||||||
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
|
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
|
||||||
|
|
||||||
@@ -78,6 +79,7 @@ func NewInstance(name string, globalBackendSettings *config.BackendConfig, globa
|
|||||||
options: options,
|
options: options,
|
||||||
globalInstanceSettings: globalInstanceSettings,
|
globalInstanceSettings: globalInstanceSettings,
|
||||||
globalBackendSettings: globalBackendSettings,
|
globalBackendSettings: globalBackendSettings,
|
||||||
|
localNodeName: localNodeName,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
timeProvider: realTimeProvider{},
|
timeProvider: realTimeProvider{},
|
||||||
Created: time.Now().Unix(),
|
Created: time.Now().Unix(),
|
||||||
@@ -145,6 +147,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preserve the original nodes to prevent changing instance location
|
||||||
|
if i.options != nil && i.options.Nodes != nil {
|
||||||
|
options.Nodes = i.options.Nodes
|
||||||
|
}
|
||||||
|
|
||||||
// Validate and copy options
|
// Validate and copy options
|
||||||
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
|
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
|
||||||
|
|
||||||
@@ -171,6 +178,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
|||||||
return nil, fmt.Errorf("instance %s has no options set", i.Name)
|
return nil, fmt.Errorf("instance %s has no options set", i.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
|
||||||
|
if len(i.options.Nodes) > 0 && i.options.Nodes[0] != i.localNodeName {
|
||||||
|
return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
|
||||||
|
}
|
||||||
|
|
||||||
var host string
|
var host string
|
||||||
var port int
|
var port int
|
||||||
switch i.options.BackendType {
|
switch i.options.BackendType {
|
||||||
@@ -198,6 +210,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
|||||||
|
|
||||||
proxy := httputil.NewSingleHostReverseProxy(targetURL)
|
proxy := httputil.NewSingleHostReverseProxy(targetURL)
|
||||||
|
|
||||||
|
var responseHeaders map[string]string
|
||||||
|
switch i.options.BackendType {
|
||||||
|
case backends.BackendTypeLlamaCpp:
|
||||||
|
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
|
||||||
|
case backends.BackendTypeVllm:
|
||||||
|
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
|
||||||
|
case backends.BackendTypeMlxLm:
|
||||||
|
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
|
||||||
|
}
|
||||||
proxy.ModifyResponse = func(resp *http.Response) error {
|
proxy.ModifyResponse = func(resp *http.Response) error {
|
||||||
// Remove CORS headers from llama-server response to avoid conflicts
|
// Remove CORS headers from llama-server response to avoid conflicts
|
||||||
// llamactl will add its own CORS headers
|
// llamactl will add its own CORS headers
|
||||||
@@ -207,6 +228,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
|||||||
resp.Header.Del("Access-Control-Allow-Credentials")
|
resp.Header.Del("Access-Control-Allow-Credentials")
|
||||||
resp.Header.Del("Access-Control-Max-Age")
|
resp.Header.Del("Access-Control-Max-Age")
|
||||||
resp.Header.Del("Access-Control-Expose-Headers")
|
resp.Header.Del("Access-Control-Expose-Headers")
|
||||||
|
|
||||||
|
for key, value := range responseHeaders {
|
||||||
|
resp.Header.Set(key, value)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -272,5 +297,35 @@ func (i *Process) UnmarshalJSON(data []byte) error {
|
|||||||
i.options = aux.Options
|
i.options = aux.Options
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize fields that are not serialized
|
||||||
|
if i.timeProvider == nil {
|
||||||
|
i.timeProvider = realTimeProvider{}
|
||||||
|
}
|
||||||
|
if i.logger == nil && i.globalInstanceSettings != nil {
|
||||||
|
i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Process) IsRemote() bool {
|
||||||
|
i.mu.RLock()
|
||||||
|
defer i.mu.RUnlock()
|
||||||
|
|
||||||
|
if i.options == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no nodes specified, it's a local instance
|
||||||
|
if len(i.options.Nodes) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the first node is the local node, treat it as a local instance
|
||||||
|
if i.options.Nodes[0] == i.localNodeName {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, it's a remote instance
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ func TestNewInstance(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
|
|
||||||
if inst.Name != "test-instance" {
|
if inst.Name != "test-instance" {
|
||||||
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
|
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
|
||||||
@@ -115,7 +115,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
opts := instance.GetOptions()
|
opts := instance.GetOptions()
|
||||||
|
|
||||||
// Check that explicit values override defaults
|
// Check that explicit values override defaults
|
||||||
@@ -164,7 +164,7 @@ func TestSetOptions(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, "main", mockOnStatusChange)
|
||||||
|
|
||||||
// Update options
|
// Update options
|
||||||
newOptions := &instance.CreateInstanceOptions{
|
newOptions := &instance.CreateInstanceOptions{
|
||||||
@@ -191,6 +191,58 @@ func TestSetOptions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSetOptions_PreservesNodes(t *testing.T) {
|
||||||
|
backendConfig := &config.BackendConfig{
|
||||||
|
LlamaCpp: config.BackendSettings{
|
||||||
|
Command: "llama-server",
|
||||||
|
Args: []string{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
DefaultAutoRestart: true,
|
||||||
|
DefaultMaxRestarts: 3,
|
||||||
|
DefaultRestartDelay: 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create instance with initial nodes
|
||||||
|
initialOptions := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
|
Nodes: []string{"worker1"},
|
||||||
|
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
Port: 8080,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, "main", mockOnStatusChange)
|
||||||
|
|
||||||
|
// Try to update with different nodes
|
||||||
|
updatedOptions := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
|
Nodes: []string{"worker2"}, // Attempt to change node
|
||||||
|
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/new-model.gguf",
|
||||||
|
Port: 8081,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst.SetOptions(updatedOptions)
|
||||||
|
opts := inst.GetOptions()
|
||||||
|
|
||||||
|
// Nodes should remain unchanged
|
||||||
|
if len(opts.Nodes) != 1 || opts.Nodes[0] != "worker1" {
|
||||||
|
t.Errorf("Expected nodes to remain ['worker1'], got %v", opts.Nodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other options should be updated
|
||||||
|
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
|
||||||
|
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetProxy(t *testing.T) {
|
func TestGetProxy(t *testing.T) {
|
||||||
backendConfig := &config.BackendConfig{
|
backendConfig := &config.BackendConfig{
|
||||||
LlamaCpp: config.BackendSettings{
|
LlamaCpp: config.BackendSettings{
|
||||||
@@ -222,7 +274,7 @@ func TestGetProxy(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
|
|
||||||
// Get proxy for the first time
|
// Get proxy for the first time
|
||||||
proxy1, err := inst.GetProxy()
|
proxy1, err := inst.GetProxy()
|
||||||
@@ -277,7 +329,7 @@ func TestMarshalJSON(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
|
|
||||||
data, err := json.Marshal(instance)
|
data, err := json.Marshal(instance)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -446,7 +498,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
|
instance := instance.NewInstance("test", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
opts := instance.GetOptions()
|
opts := instance.GetOptions()
|
||||||
|
|
||||||
if opts.MaxRestarts == nil {
|
if opts.MaxRestarts == nil {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -37,6 +38,9 @@ func (i *Process) Start() error {
|
|||||||
// Initialize last request time to current time when starting
|
// Initialize last request time to current time when starting
|
||||||
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
|
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
|
||||||
|
|
||||||
|
// Create context before building command (needed for CommandContext)
|
||||||
|
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
// Create log files
|
// Create log files
|
||||||
if err := i.logger.Create(); err != nil {
|
if err := i.logger.Create(); err != nil {
|
||||||
return fmt.Errorf("failed to create log files: %w", err)
|
return fmt.Errorf("failed to create log files: %w", err)
|
||||||
@@ -47,8 +51,6 @@ func (i *Process) Start() error {
|
|||||||
if cmdErr != nil {
|
if cmdErr != nil {
|
||||||
return fmt.Errorf("failed to build command: %w", cmdErr)
|
return fmt.Errorf("failed to build command: %w", cmdErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
|
||||||
i.cmd = cmd
|
i.cmd = cmd
|
||||||
|
|
||||||
if runtime.GOOS != "windows" {
|
if runtime.GOOS != "windows" {
|
||||||
@@ -372,13 +374,27 @@ func (i *Process) buildCommand() (*exec.Cmd, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build the environment variables
|
||||||
|
env := i.options.BuildEnvironment(backendConfig)
|
||||||
|
|
||||||
// Get the command to execute
|
// Get the command to execute
|
||||||
cmd := i.options.GetCommand(backendConfig)
|
command := i.options.GetCommand(backendConfig)
|
||||||
|
|
||||||
// Build command arguments
|
// Build command arguments
|
||||||
args := i.options.BuildCommandArgs(backendConfig)
|
args := i.options.BuildCommandArgs(backendConfig)
|
||||||
|
|
||||||
return exec.Command(cmd, args...), nil
|
// Create the exec.Cmd
|
||||||
|
cmd := exec.CommandContext(i.ctx, command, args...)
|
||||||
|
|
||||||
|
// Start with host environment variables
|
||||||
|
cmd.Env = os.Environ()
|
||||||
|
|
||||||
|
// Add/override with backend-specific environment variables
|
||||||
|
for k, v := range env {
|
||||||
|
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmd, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getBackendConfig resolves the backend configuration for the current instance
|
// getBackendConfig resolves the backend configuration for the current instance
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"llamactl/pkg/backends/vllm"
|
"llamactl/pkg/backends/vllm"
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"log"
|
"log"
|
||||||
|
"maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CreateInstanceOptions struct {
|
type CreateInstanceOptions struct {
|
||||||
@@ -20,10 +21,14 @@ type CreateInstanceOptions struct {
|
|||||||
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||||
// Idle timeout
|
// Idle timeout
|
||||||
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
|
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
|
||||||
|
//Environment variables
|
||||||
|
Environment map[string]string `json:"environment,omitempty"`
|
||||||
|
|
||||||
BackendType backends.BackendType `json:"backend_type"`
|
BackendType backends.BackendType `json:"backend_type"`
|
||||||
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
||||||
|
|
||||||
|
Nodes []string `json:"nodes,omitempty"`
|
||||||
|
|
||||||
// Backend-specific options
|
// Backend-specific options
|
||||||
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
|
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
|
||||||
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
|
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
|
||||||
@@ -240,3 +245,23 @@ func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSe
|
|||||||
|
|
||||||
return args
|
return args
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
|
||||||
|
env := map[string]string{}
|
||||||
|
|
||||||
|
if backendConfig.Environment != nil {
|
||||||
|
maps.Copy(env, backendConfig.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||||
|
if backendConfig.Docker.Environment != nil {
|
||||||
|
maps.Copy(env, backendConfig.Docker.Environment)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Environment != nil {
|
||||||
|
maps.Copy(env, c.Environment)
|
||||||
|
}
|
||||||
|
|
||||||
|
return env
|
||||||
|
}
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ func TestUpdateLastRequestTime(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
|
|
||||||
// Test that UpdateLastRequestTime doesn't panic
|
// Test that UpdateLastRequestTime doesn't panic
|
||||||
inst.UpdateLastRequestTime()
|
inst.UpdateLastRequestTime()
|
||||||
@@ -88,7 +88,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
|
|
||||||
// Instance is not running, should not timeout regardless of configuration
|
// Instance is not running, should not timeout regardless of configuration
|
||||||
if inst.ShouldTimeout() {
|
if inst.ShouldTimeout() {
|
||||||
@@ -132,7 +132,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
// Simulate running state
|
// Simulate running state
|
||||||
inst.SetStatus(instance.Running)
|
inst.SetStatus(instance.Running)
|
||||||
|
|
||||||
@@ -169,7 +169,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
inst.SetStatus(instance.Running)
|
inst.SetStatus(instance.Running)
|
||||||
|
|
||||||
// Update last request time to now
|
// Update last request time to now
|
||||||
@@ -207,7 +207,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
inst.SetStatus(instance.Running)
|
inst.SetStatus(instance.Running)
|
||||||
|
|
||||||
// Use MockTimeProvider to simulate old last request time
|
// Use MockTimeProvider to simulate old last request time
|
||||||
@@ -263,7 +263,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
|
|||||||
// Mock onStatusChange function
|
// Mock onStatusChange function
|
||||||
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
|
||||||
|
|
||||||
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
|
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
|
||||||
opts := inst.GetOptions()
|
opts := inst.GetOptions()
|
||||||
|
|
||||||
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
|
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
"llamactl/pkg/instance"
|
||||||
"log"
|
"log"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -25,10 +26,22 @@ type InstanceManager interface {
|
|||||||
StopInstance(name string) (*instance.Process, error)
|
StopInstance(name string) (*instance.Process, error)
|
||||||
EvictLRUInstance() error
|
EvictLRUInstance() error
|
||||||
RestartInstance(name string) (*instance.Process, error)
|
RestartInstance(name string) (*instance.Process, error)
|
||||||
GetInstanceLogs(name string) (string, error)
|
GetInstanceLogs(name string, numLines int) (string, error)
|
||||||
Shutdown()
|
Shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RemoteManager interface {
|
||||||
|
ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
|
||||||
|
CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
|
||||||
|
GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
|
||||||
|
DeleteRemoteInstance(node *config.NodeConfig, name string) error
|
||||||
|
StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
type instanceManager struct {
|
type instanceManager struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
instances map[string]*instance.Process
|
instances map[string]*instance.Process
|
||||||
@@ -36,29 +49,51 @@ type instanceManager struct {
|
|||||||
ports map[int]bool
|
ports map[int]bool
|
||||||
instancesConfig config.InstancesConfig
|
instancesConfig config.InstancesConfig
|
||||||
backendsConfig config.BackendConfig
|
backendsConfig config.BackendConfig
|
||||||
|
localNodeName string // Name of the local node
|
||||||
|
|
||||||
// Timeout checker
|
// Timeout checker
|
||||||
timeoutChecker *time.Ticker
|
timeoutChecker *time.Ticker
|
||||||
shutdownChan chan struct{}
|
shutdownChan chan struct{}
|
||||||
shutdownDone chan struct{}
|
shutdownDone chan struct{}
|
||||||
isShutdown bool
|
isShutdown bool
|
||||||
|
|
||||||
|
// Remote instance management
|
||||||
|
httpClient *http.Client
|
||||||
|
instanceNodeMap map[string]*config.NodeConfig // Maps instance name to its node config
|
||||||
|
nodeConfigMap map[string]*config.NodeConfig // Maps node name to node config for quick lookup
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstanceManager creates a new instance of InstanceManager.
|
// NewInstanceManager creates a new instance of InstanceManager.
|
||||||
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
|
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig, localNodeName string) InstanceManager {
|
||||||
if instancesConfig.TimeoutCheckInterval <= 0 {
|
if instancesConfig.TimeoutCheckInterval <= 0 {
|
||||||
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build node config map for quick lookup
|
||||||
|
nodeConfigMap := make(map[string]*config.NodeConfig)
|
||||||
|
for name := range nodesConfig {
|
||||||
|
nodeCopy := nodesConfig[name]
|
||||||
|
nodeConfigMap[name] = &nodeCopy
|
||||||
|
}
|
||||||
|
|
||||||
im := &instanceManager{
|
im := &instanceManager{
|
||||||
instances: make(map[string]*instance.Process),
|
instances: make(map[string]*instance.Process),
|
||||||
runningInstances: make(map[string]struct{}),
|
runningInstances: make(map[string]struct{}),
|
||||||
ports: make(map[int]bool),
|
ports: make(map[int]bool),
|
||||||
instancesConfig: instancesConfig,
|
instancesConfig: instancesConfig,
|
||||||
backendsConfig: backendsConfig,
|
backendsConfig: backendsConfig,
|
||||||
|
localNodeName: localNodeName,
|
||||||
|
|
||||||
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
||||||
shutdownChan: make(chan struct{}),
|
shutdownChan: make(chan struct{}),
|
||||||
shutdownDone: make(chan struct{}),
|
shutdownDone: make(chan struct{}),
|
||||||
|
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
|
|
||||||
|
instanceNodeMap: make(map[string]*config.NodeConfig),
|
||||||
|
nodeConfigMap: nodeConfigMap,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load existing instances from disk
|
// Load existing instances from disk
|
||||||
@@ -238,18 +273,37 @@ func (im *instanceManager) loadInstance(name, path string) error {
|
|||||||
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
|
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
|
options := persistedInstance.GetOptions()
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
// An instance is remote if Nodes is specified AND the first node is not the local node
|
||||||
|
isRemote := options != nil && len(options.Nodes) > 0 && options.Nodes[0] != im.localNodeName
|
||||||
|
|
||||||
|
var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
|
||||||
|
if !isRemote {
|
||||||
|
// Only set status callback for local instances
|
||||||
|
statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
|
||||||
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
|
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Create new inst using NewInstance (handles validation, defaults, setup)
|
// Create new inst using NewInstance (handles validation, defaults, setup)
|
||||||
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
|
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, statusCallback)
|
||||||
|
|
||||||
// Restore persisted fields that NewInstance doesn't set
|
// Restore persisted fields that NewInstance doesn't set
|
||||||
inst.Created = persistedInstance.Created
|
inst.Created = persistedInstance.Created
|
||||||
inst.SetStatus(persistedInstance.Status)
|
inst.SetStatus(persistedInstance.Status)
|
||||||
|
|
||||||
// Check for port conflicts and add to maps
|
// Handle remote instance mapping
|
||||||
|
if isRemote {
|
||||||
|
nodeName := options.Nodes[0]
|
||||||
|
nodeConfig, exists := im.nodeConfigMap[nodeName]
|
||||||
|
if !exists {
|
||||||
|
return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
|
||||||
|
}
|
||||||
|
im.instanceNodeMap[name] = nodeConfig
|
||||||
|
} else {
|
||||||
|
// Check for port conflicts only for local instances
|
||||||
if inst.GetPort() > 0 {
|
if inst.GetPort() > 0 {
|
||||||
port := inst.GetPort()
|
port := inst.GetPort()
|
||||||
if im.ports[port] {
|
if im.ports[port] {
|
||||||
@@ -257,34 +311,58 @@ func (im *instanceManager) loadInstance(name, path string) error {
|
|||||||
}
|
}
|
||||||
im.ports[port] = true
|
im.ports[port] = true
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
im.instances[name] = inst
|
im.instances[name] = inst
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
|
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
|
||||||
|
// For instances with auto-restart disabled, it sets their status to Stopped
|
||||||
func (im *instanceManager) autoStartInstances() {
|
func (im *instanceManager) autoStartInstances() {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
var instancesToStart []*instance.Process
|
var instancesToStart []*instance.Process
|
||||||
|
var instancesToStop []*instance.Process
|
||||||
for _, inst := range im.instances {
|
for _, inst := range im.instances {
|
||||||
if inst.IsRunning() && // Was running when persisted
|
if inst.IsRunning() && // Was running when persisted
|
||||||
inst.GetOptions() != nil &&
|
inst.GetOptions() != nil &&
|
||||||
inst.GetOptions().AutoRestart != nil &&
|
inst.GetOptions().AutoRestart != nil {
|
||||||
*inst.GetOptions().AutoRestart {
|
if *inst.GetOptions().AutoRestart {
|
||||||
instancesToStart = append(instancesToStart, inst)
|
instancesToStart = append(instancesToStart, inst)
|
||||||
|
} else {
|
||||||
|
// Instance was running but auto-restart is disabled, mark as stopped
|
||||||
|
instancesToStop = append(instancesToStop, inst)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
|
// Stop instances that have auto-restart disabled
|
||||||
|
for _, inst := range instancesToStop {
|
||||||
|
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
|
||||||
|
inst.SetStatus(instance.Stopped)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start instances that have auto-restart enabled
|
||||||
for _, inst := range instancesToStart {
|
for _, inst := range instancesToStart {
|
||||||
log.Printf("Auto-starting instance %s", inst.Name)
|
log.Printf("Auto-starting instance %s", inst.Name)
|
||||||
// Reset running state before starting (since Start() expects stopped instance)
|
// Reset running state before starting (since Start() expects stopped instance)
|
||||||
inst.SetStatus(instance.Stopped)
|
inst.SetStatus(instance.Stopped)
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
// Remote instance - use StartRemoteInstance
|
||||||
|
if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
|
||||||
|
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Local instance - call Start() directly
|
||||||
if err := inst.Start(); err != nil {
|
if err := inst.Start(); err != nil {
|
||||||
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
|
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
|
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
@@ -296,3 +374,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
|
|||||||
delete(im.runningInstances, name)
|
delete(im.runningInstances, name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getNodeForInstance returns the node configuration for a remote instance
|
||||||
|
// Returns nil if the instance is not remote or the node is not found
|
||||||
|
func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
|
||||||
|
if !inst.IsRemote() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have a cached mapping
|
||||||
|
if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
|
||||||
|
return nodeConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) {
|
|||||||
TimeoutCheckInterval: 5,
|
TimeoutCheckInterval: 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
mgr := manager.NewInstanceManager(backendConfig, cfg)
|
mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
if mgr == nil {
|
if mgr == nil {
|
||||||
t.Fatal("NewInstanceManager returned nil")
|
t.Fatal("NewInstanceManager returned nil")
|
||||||
}
|
}
|
||||||
@@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test instance persistence on creation
|
// Test instance persistence on creation
|
||||||
manager1 := manager.NewInstanceManager(backendConfig, cfg)
|
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
options := &instance.CreateInstanceOptions{
|
options := &instance.CreateInstanceOptions{
|
||||||
BackendType: backends.BackendTypeLlamaCpp,
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
||||||
@@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test loading instances from disk
|
// Test loading instances from disk
|
||||||
manager2 := manager.NewInstanceManager(backendConfig, cfg)
|
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
instances, err := manager2.ListInstances()
|
instances, err := manager2.ListInstances()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("ListInstances failed: %v", err)
|
t.Fatalf("ListInstances failed: %v", err)
|
||||||
@@ -207,5 +207,68 @@ func createTestManager() manager.InstanceManager {
|
|||||||
DefaultRestartDelay: 5,
|
DefaultRestartDelay: 5,
|
||||||
TimeoutCheckInterval: 5,
|
TimeoutCheckInterval: 5,
|
||||||
}
|
}
|
||||||
return manager.NewInstanceManager(backendConfig, cfg)
|
return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
backendConfig := config.BackendConfig{
|
||||||
|
LlamaCpp: config.BackendSettings{
|
||||||
|
Command: "llama-server",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := config.InstancesConfig{
|
||||||
|
PortRange: [2]int{8000, 9000},
|
||||||
|
InstancesDir: tempDir,
|
||||||
|
MaxInstances: 10,
|
||||||
|
TimeoutCheckInterval: 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create first manager and instance with auto-restart disabled
|
||||||
|
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
|
|
||||||
|
autoRestart := false
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
|
AutoRestart: &autoRestart,
|
||||||
|
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
Port: 8080,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := manager1.CreateInstance("test-instance", options)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CreateInstance failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simulate instance being in running state when persisted
|
||||||
|
// (this would happen if the instance was running when llamactl was stopped)
|
||||||
|
inst.SetStatus(instance.Running)
|
||||||
|
|
||||||
|
// Shutdown first manager
|
||||||
|
manager1.Shutdown()
|
||||||
|
|
||||||
|
// Create second manager (simulating restart of llamactl)
|
||||||
|
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
|
|
||||||
|
// Get the loaded instance
|
||||||
|
loadedInst, err := manager2.GetInstance("test-instance")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GetInstance failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The instance should be marked as Stopped, not Running
|
||||||
|
// because auto-restart is disabled
|
||||||
|
if loadedInst.IsRunning() {
|
||||||
|
t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
|
||||||
|
}
|
||||||
|
|
||||||
|
if loadedInst.GetStatus() != instance.Stopped {
|
||||||
|
t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
|
||||||
|
}
|
||||||
|
|
||||||
|
manager2.Shutdown()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package manager
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"llamactl/pkg/backends"
|
"llamactl/pkg/backends"
|
||||||
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
"llamactl/pkg/instance"
|
||||||
"llamactl/pkg/validation"
|
"llamactl/pkg/validation"
|
||||||
"os"
|
"os"
|
||||||
@@ -11,16 +12,65 @@ import (
|
|||||||
|
|
||||||
type MaxRunningInstancesError error
|
type MaxRunningInstancesError error
|
||||||
|
|
||||||
|
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
|
||||||
|
// while preserving the Nodes field to maintain remote instance tracking
|
||||||
|
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
|
||||||
|
if localInst == nil || remoteInst == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the remote instance options
|
||||||
|
remoteOptions := remoteInst.GetOptions()
|
||||||
|
if remoteOptions == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Preserve the Nodes field from the local instance
|
||||||
|
localOptions := localInst.GetOptions()
|
||||||
|
var preservedNodes []string
|
||||||
|
if localOptions != nil && len(localOptions.Nodes) > 0 {
|
||||||
|
preservedNodes = make([]string, len(localOptions.Nodes))
|
||||||
|
copy(preservedNodes, localOptions.Nodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a copy of remote options and restore the Nodes field
|
||||||
|
updatedOptions := *remoteOptions
|
||||||
|
updatedOptions.Nodes = preservedNodes
|
||||||
|
|
||||||
|
// Update the local instance with all remote data
|
||||||
|
localInst.SetOptions(&updatedOptions)
|
||||||
|
localInst.Status = remoteInst.Status
|
||||||
|
localInst.Created = remoteInst.Created
|
||||||
|
}
|
||||||
|
|
||||||
// ListInstances returns a list of all instances managed by the instance manager.
|
// ListInstances returns a list of all instances managed by the instance manager.
|
||||||
|
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
|
||||||
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
|
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
defer im.mu.RUnlock()
|
localInstances := make([]*instance.Process, 0, len(im.instances))
|
||||||
|
|
||||||
instances := make([]*instance.Process, 0, len(im.instances))
|
|
||||||
for _, inst := range im.instances {
|
for _, inst := range im.instances {
|
||||||
instances = append(instances, inst)
|
localInstances = append(localInstances, inst)
|
||||||
}
|
}
|
||||||
return instances, nil
|
im.mu.RUnlock()
|
||||||
|
|
||||||
|
// Update remote instances with live state
|
||||||
|
for _, inst := range localInstances {
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.GetRemoteInstance(node, inst.Name)
|
||||||
|
if err != nil {
|
||||||
|
// Log error but continue with stale data
|
||||||
|
// Don't fail the entire list operation due to one remote failure
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return localInstances, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateInstance creates a new instance with the given options and returns it.
|
// CreateInstance creates a new instance with the given options and returns it.
|
||||||
@@ -43,16 +93,57 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
// Check max instances limit after acquiring the lock
|
// Check if instance with this name already exists (must be globally unique)
|
||||||
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
|
||||||
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if instance with this name already exists
|
|
||||||
if im.instances[name] != nil {
|
if im.instances[name] != nil {
|
||||||
return nil, fmt.Errorf("instance with name %s already exists", name)
|
return nil, fmt.Errorf("instance with name %s already exists", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
// An instance is remote if Nodes is specified AND the first node is not the local node
|
||||||
|
isRemote := len(options.Nodes) > 0 && options.Nodes[0] != im.localNodeName
|
||||||
|
var nodeConfig *config.NodeConfig
|
||||||
|
|
||||||
|
if isRemote {
|
||||||
|
// Validate that the node exists
|
||||||
|
nodeName := options.Nodes[0] // Use first node for now
|
||||||
|
var exists bool
|
||||||
|
nodeConfig, exists = im.nodeConfigMap[nodeName]
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("node %s not found", nodeName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the remote instance on the remote node
|
||||||
|
remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a local stub that preserves the Nodes field for tracking
|
||||||
|
// We keep the original options (with Nodes) so IsRemote() works correctly
|
||||||
|
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, nil)
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
|
||||||
|
// Add to local tracking maps (but don't count towards limits)
|
||||||
|
im.instances[name] = inst
|
||||||
|
im.instanceNodeMap[name] = nodeConfig
|
||||||
|
|
||||||
|
// Persist the remote instance locally for tracking across restarts
|
||||||
|
if err := im.persistInstance(inst); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Local instance creation
|
||||||
|
// Check max instances limit for local instances only
|
||||||
|
localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
|
||||||
|
if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
||||||
|
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
||||||
|
}
|
||||||
|
|
||||||
// Assign and validate port for backend-specific options
|
// Assign and validate port for backend-specific options
|
||||||
if err := im.assignAndValidatePort(options); err != nil {
|
if err := im.assignAndValidatePort(options); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -62,7 +153,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
im.onStatusChange(name, oldStatus, newStatus)
|
im.onStatusChange(name, oldStatus, newStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
|
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, statusCallback)
|
||||||
im.instances[inst.Name] = inst
|
im.instances[inst.Name] = inst
|
||||||
|
|
||||||
if err := im.persistInstance(inst); err != nil {
|
if err := im.persistInstance(inst); err != nil {
|
||||||
@@ -73,28 +164,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetInstance retrieves an instance by its name.
|
// GetInstance retrieves an instance by its name.
|
||||||
|
// For remote instances, this fetches the live state from the remote node and updates the local stub.
|
||||||
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
defer im.mu.RUnlock()
|
inst, exists := im.instances[name]
|
||||||
|
im.mu.RUnlock()
|
||||||
|
|
||||||
instance, exists := im.instances[name]
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
return instance, nil
|
|
||||||
|
// Check if instance is remote and fetch live state
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.GetRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
// Return the local stub (preserving Nodes field)
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateInstance updates the options of an existing instance and returns it.
|
// UpdateInstance updates the options of an existing instance and returns it.
|
||||||
// If the instance is running, it will be restarted to apply the new options.
|
// If the instance is running, it will be restarted to apply the new options.
|
||||||
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
instance, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.UpdateRemoteInstance(node, name, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
// Persist the updated remote instance locally
|
||||||
|
im.mu.Lock()
|
||||||
|
defer im.mu.Unlock()
|
||||||
|
if err := im.persistInstance(inst); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
if options == nil {
|
if options == nil {
|
||||||
return nil, fmt.Errorf("instance options cannot be nil")
|
return nil, fmt.Errorf("instance options cannot be nil")
|
||||||
}
|
}
|
||||||
@@ -105,55 +236,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if instance is running before updating options
|
// Check if instance is running before updating options
|
||||||
wasRunning := instance.IsRunning()
|
wasRunning := inst.IsRunning()
|
||||||
|
|
||||||
// If the instance is running, stop it first
|
// If the instance is running, stop it first
|
||||||
if wasRunning {
|
if wasRunning {
|
||||||
if err := instance.Stop(); err != nil {
|
if err := inst.Stop(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
|
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now update the options while the instance is stopped
|
// Now update the options while the instance is stopped
|
||||||
instance.SetOptions(options)
|
inst.SetOptions(options)
|
||||||
|
|
||||||
// If it was running before, start it again with the new options
|
// If it was running before, start it again with the new options
|
||||||
if wasRunning {
|
if wasRunning {
|
||||||
if err := instance.Start(); err != nil {
|
if err := inst.Start(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
|
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
if err := im.persistInstance(instance); err != nil {
|
if err := im.persistInstance(inst); err != nil {
|
||||||
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance, nil
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteInstance removes stopped instance by its name.
|
// DeleteInstance removes stopped instance by its name.
|
||||||
func (im *instanceManager) DeleteInstance(name string) error {
|
func (im *instanceManager) DeleteInstance(name string) error {
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
inst, exists := im.instances[name]
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
instance, exists := im.instances[name]
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return fmt.Errorf("instance with name %s not found", name)
|
return fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
if instance.IsRunning() {
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
err := im.DeleteRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up local tracking
|
||||||
|
im.mu.Lock()
|
||||||
|
defer im.mu.Unlock()
|
||||||
|
delete(im.instances, name)
|
||||||
|
delete(im.instanceNodeMap, name)
|
||||||
|
|
||||||
|
// Delete the instance's config file if persistence is enabled
|
||||||
|
// Re-validate instance name for security (defense in depth)
|
||||||
|
validatedName, err := validation.ValidateInstanceName(name)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("invalid instance name for file deletion: %w", err)
|
||||||
|
}
|
||||||
|
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
|
||||||
|
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
|
||||||
|
return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if inst.IsRunning() {
|
||||||
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
|
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
delete(im.ports, instance.GetPort())
|
im.mu.Lock()
|
||||||
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
|
delete(im.ports, inst.GetPort())
|
||||||
delete(im.instances, name)
|
delete(im.instances, name)
|
||||||
|
|
||||||
// Delete the instance's config file if persistence is enabled
|
// Delete the instance's config file if persistence is enabled
|
||||||
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
|
// Re-validate instance name for security (defense in depth)
|
||||||
|
validatedName, err := validation.ValidateInstanceName(inst.Name)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("invalid instance name for file deletion: %w", err)
|
||||||
|
}
|
||||||
|
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
|
||||||
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
|
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
|
||||||
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
|
return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -163,33 +329,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
|
|||||||
// If the instance is already running, it returns an error.
|
// If the instance is already running, it returns an error.
|
||||||
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
instance, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
|
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
if instance.IsRunning() {
|
|
||||||
return instance, fmt.Errorf("instance with name %s is already running", name)
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.StartRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if inst.IsRunning() {
|
||||||
|
return inst, fmt.Errorf("instance with name %s is already running", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check max running instances limit for local instances only
|
||||||
|
im.mu.RLock()
|
||||||
|
localRunningCount := 0
|
||||||
|
for instName := range im.runningInstances {
|
||||||
|
if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
|
||||||
|
localRunningCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
|
||||||
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if maxRunningExceeded {
|
if maxRunningExceeded {
|
||||||
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
|
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := instance.Start(); err != nil {
|
if err := inst.Start(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
err := im.persistInstance(instance)
|
err := im.persistInstance(inst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance, nil
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
|
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
|
||||||
@@ -206,51 +398,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
|
|||||||
// StopInstance stops a running instance and returns it.
|
// StopInstance stops a running instance and returns it.
|
||||||
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
instance, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
if !instance.IsRunning() {
|
|
||||||
return instance, fmt.Errorf("instance with name %s is already stopped", name)
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.StopRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := instance.Stop(); err != nil {
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
return inst, fmt.Errorf("instance with name %s is already stopped", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := inst.Stop(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
err := im.persistInstance(instance)
|
err := im.persistInstance(inst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance, nil
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RestartInstance stops and then starts an instance, returning the updated instance.
|
// RestartInstance stops and then starts an instance, returning the updated instance.
|
||||||
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
|
||||||
instance, err := im.StopInstance(name)
|
im.mu.RLock()
|
||||||
|
inst, exists := im.instances[name]
|
||||||
|
im.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.RestartRemoteInstance(node, name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return im.StartInstance(instance.Name)
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := im.StopInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return im.StartInstance(inst.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetInstanceLogs retrieves the logs for a specific instance by its name.
|
// GetInstanceLogs retrieves the logs for a specific instance by its name.
|
||||||
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
|
func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
_, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return "", fmt.Errorf("instance with name %s not found", name)
|
return "", fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Implement actual log retrieval logic
|
// Check if instance is remote and delegate to remote operation
|
||||||
return fmt.Sprintf("Logs for instance %s", name), nil
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
return im.GetRemoteInstanceLogs(node, name, numLines)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get logs from the local instance
|
||||||
|
return inst.GetLogs(numLines)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getPortFromOptions extracts the port from backend-specific options
|
// getPortFromOptions extracts the port from backend-specific options
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
|
|||||||
MaxInstances: 1, // Very low limit for testing
|
MaxInstances: 1, // Very low limit for testing
|
||||||
TimeoutCheckInterval: 5,
|
TimeoutCheckInterval: 5,
|
||||||
}
|
}
|
||||||
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
|
limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
|
|
||||||
_, err = limitedManager.CreateInstance("instance1", options)
|
_, err = limitedManager.CreateInstance("instance1", options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
222
pkg/manager/remote_ops.go
Normal file
222
pkg/manager/remote_ops.go
Normal file
@@ -0,0 +1,222 @@
|
|||||||
|
package manager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"llamactl/pkg/config"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// makeRemoteRequest is a helper function to make HTTP requests to a remote node
|
||||||
|
func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
|
||||||
|
var reqBody io.Reader
|
||||||
|
if body != nil {
|
||||||
|
jsonData, err := json.Marshal(body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to marshal request body: %w", err)
|
||||||
|
}
|
||||||
|
reqBody = bytes.NewBuffer(jsonData)
|
||||||
|
}
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
|
||||||
|
req, err := http.NewRequest(method, url, reqBody)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if body != nil {
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeConfig.APIKey != "" {
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := im.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to execute request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseRemoteResponse is a helper function to parse API responses
|
||||||
|
func parseRemoteResponse(resp *http.Response, result any) error {
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
if result != nil {
|
||||||
|
if err := json.Unmarshal(body, result); err != nil {
|
||||||
|
return fmt.Errorf("failed to unmarshal response: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListRemoteInstances lists all instances on the remote node
|
||||||
|
func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var instances []*instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &instances); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return instances, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateRemoteInstance creates a new instance on the remote node
|
||||||
|
func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRemoteInstance retrieves an instance by name from the remote node
|
||||||
|
func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateRemoteInstance updates an existing instance on the remote node
|
||||||
|
func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteRemoteInstance deletes an instance from the remote node
|
||||||
|
func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseRemoteResponse(resp, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartRemoteInstance starts an instance on the remote node
|
||||||
|
func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/start", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// StopRemoteInstance stops an instance on the remote node
|
||||||
|
func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RestartRemoteInstance restarts an instance on the remote node
|
||||||
|
func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
|
||||||
|
func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Logs endpoint might return plain text or JSON
|
||||||
|
// Try to parse as JSON first (in case it's wrapped in a response object)
|
||||||
|
var logResponse struct {
|
||||||
|
Logs string `json:"logs"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
|
||||||
|
return logResponse.Logs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, return as plain text
|
||||||
|
return string(body), nil
|
||||||
|
}
|
||||||
@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {
|
|||||||
|
|
||||||
// Identify instances that should timeout
|
// Identify instances that should timeout
|
||||||
for _, inst := range im.instances {
|
for _, inst := range im.instances {
|
||||||
|
// Skip remote instances - they are managed by their respective nodes
|
||||||
|
if inst.IsRemote() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if inst.ShouldTimeout() {
|
if inst.ShouldTimeout() {
|
||||||
timeoutInstances = append(timeoutInstances, inst.Name)
|
timeoutInstances = append(timeoutInstances, inst.Name)
|
||||||
}
|
}
|
||||||
@@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip remote instances - they are managed by their respective nodes
|
||||||
|
if inst.IsRemote() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
|
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
|
||||||
continue // Skip instances without idle timeout
|
continue // Skip instances without idle timeout
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) {
|
|||||||
MaxInstances: 5,
|
MaxInstances: 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
manager := manager.NewInstanceManager(backendConfig, cfg)
|
manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
|
||||||
if manager == nil {
|
if manager == nil {
|
||||||
t.Fatal("Manager should be initialized with timeout checker")
|
t.Fatal("Manager should be initialized with timeout checker")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,795 +1,29 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"llamactl/pkg/backends"
|
|
||||||
"llamactl/pkg/backends/llamacpp"
|
|
||||||
"llamactl/pkg/backends/mlx"
|
|
||||||
"llamactl/pkg/backends/vllm"
|
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
|
||||||
"llamactl/pkg/manager"
|
"llamactl/pkg/manager"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os/exec"
|
"net/http/httputil"
|
||||||
"strconv"
|
"sync"
|
||||||
"strings"
|
"time"
|
||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Handler struct {
|
type Handler struct {
|
||||||
InstanceManager manager.InstanceManager
|
InstanceManager manager.InstanceManager
|
||||||
cfg config.AppConfig
|
cfg config.AppConfig
|
||||||
|
httpClient *http.Client
|
||||||
|
remoteProxies map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
|
||||||
|
remoteProxiesMu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
||||||
return &Handler{
|
return &Handler{
|
||||||
InstanceManager: im,
|
InstanceManager: im,
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
}
|
httpClient: &http.Client{
|
||||||
}
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
// VersionHandler godoc
|
remoteProxies: make(map[string]*httputil.ReverseProxy),
|
||||||
// @Summary Get llamactl version
|
|
||||||
// @Description Returns the version of the llamactl command
|
|
||||||
// @Tags version
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Version information"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /version [get]
|
|
||||||
func (h *Handler) VersionHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaServerHelpHandler godoc
|
|
||||||
// @Summary Get help for llama server
|
|
||||||
// @Description Returns the help text for the llama server command
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Help text"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/help [get]
|
|
||||||
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
helpCmd := exec.Command("llama-server", "--help")
|
|
||||||
output, err := helpCmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaServerVersionHandler godoc
|
|
||||||
// @Summary Get version of llama server
|
|
||||||
// @Description Returns the version of the llama server command
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Version information"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/version [get]
|
|
||||||
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
versionCmd := exec.Command("llama-server", "--version")
|
|
||||||
output, err := versionCmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaServerListDevicesHandler godoc
|
|
||||||
// @Summary List available devices for llama server
|
|
||||||
// @Description Returns a list of available devices for the llama server
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "List of devices"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/devices [get]
|
|
||||||
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
listCmd := exec.Command("llama-server", "--list-devices")
|
|
||||||
output, err := listCmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ListInstances godoc
|
|
||||||
// @Summary List all instances
|
|
||||||
// @Description Returns a list of all instances managed by the server
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Success 200 {array} instance.Process "List of instances"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances [get]
|
|
||||||
func (h *Handler) ListInstances() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
instances, err := h.InstanceManager.ListInstances()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(instances); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateInstance godoc
|
|
||||||
// @Summary Create and start a new instance
|
|
||||||
// @Description Creates a new instance with the provided configuration options
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
|
||||||
// @Success 201 {object} instance.Process "Created instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid request body"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [post]
|
|
||||||
func (h *Handler) CreateInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var options instance.CreateInstanceOptions
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.CreateInstance(name, &options)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(http.StatusCreated)
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetInstance godoc
|
|
||||||
// @Summary Get details of a specific instance
|
|
||||||
// @Description Returns the details of a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [get]
|
|
||||||
func (h *Handler) GetInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateInstance godoc
|
|
||||||
// @Summary Update an instance's configuration
|
|
||||||
// @Description Updates the configuration of a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
|
||||||
// @Success 200 {object} instance.Process "Updated instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [put]
|
|
||||||
func (h *Handler) UpdateInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var options instance.CreateInstanceOptions
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.UpdateInstance(name, &options)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// StartInstance godoc
|
|
||||||
// @Summary Start a stopped instance
|
|
||||||
// @Description Starts a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Started instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/start [post]
|
|
||||||
func (h *Handler) StartInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.StartInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
// Check if error is due to maximum running instances limit
|
|
||||||
if _, ok := err.(manager.MaxRunningInstancesError); ok {
|
|
||||||
http.Error(w, err.Error(), http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// StopInstance godoc
|
|
||||||
// @Summary Stop a running instance
|
|
||||||
// @Description Stops a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Stopped instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/stop [post]
|
|
||||||
func (h *Handler) StopInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.StopInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RestartInstance godoc
|
|
||||||
// @Summary Restart a running instance
|
|
||||||
// @Description Restarts a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Restarted instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/restart [post]
|
|
||||||
func (h *Handler) RestartInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.RestartInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteInstance godoc
|
|
||||||
// @Summary Delete an instance
|
|
||||||
// @Description Stops and removes a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 204 "No Content"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [delete]
|
|
||||||
func (h *Handler) DeleteInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := h.InstanceManager.DeleteInstance(name); err != nil {
|
|
||||||
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.WriteHeader(http.StatusNoContent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetInstanceLogs godoc
|
|
||||||
// @Summary Get logs from a specific instance
|
|
||||||
// @Description Returns the logs from a specific instance by name with optional line limit
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Instance logs"
|
|
||||||
// @Failure 400 {string} string "Invalid name format or lines parameter"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/logs [get]
|
|
||||||
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
lines := r.URL.Query().Get("lines")
|
|
||||||
if lines == "" {
|
|
||||||
lines = "-1"
|
|
||||||
}
|
|
||||||
|
|
||||||
num_lines, err := strconv.Atoi(lines)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
logs, err := inst.GetLogs(num_lines)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write([]byte(logs))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ProxyToInstance godoc
|
|
||||||
// @Summary Proxy requests to a specific instance
|
|
||||||
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 "Request successfully proxied to instance"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Failure 503 {string} string "Instance is not running"
|
|
||||||
// @Router /instances/{name}/proxy [get]
|
|
||||||
// @Router /instances/{name}/proxy [post]
|
|
||||||
func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inst.IsRunning() {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the cached proxy for this instance
|
|
||||||
proxy, err := inst.GetProxy()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
|
|
||||||
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
|
|
||||||
proxyPath := r.URL.Path[len(prefix):]
|
|
||||||
|
|
||||||
// Ensure the proxy path starts with "/"
|
|
||||||
if !strings.HasPrefix(proxyPath, "/") {
|
|
||||||
proxyPath = "/" + proxyPath
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the last request time for the instance
|
|
||||||
inst.UpdateLastRequestTime()
|
|
||||||
|
|
||||||
// Modify the request to remove the proxy prefix
|
|
||||||
originalPath := r.URL.Path
|
|
||||||
r.URL.Path = proxyPath
|
|
||||||
|
|
||||||
// Set forwarded headers
|
|
||||||
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
|
|
||||||
r.Header.Set("X-Forwarded-Proto", "http")
|
|
||||||
|
|
||||||
// Restore original path for logging purposes
|
|
||||||
defer func() {
|
|
||||||
r.URL.Path = originalPath
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Forward the request using the cached proxy
|
|
||||||
proxy.ServeHTTP(w, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// OpenAIListInstances godoc
|
|
||||||
// @Summary List instances in OpenAI-compatible format
|
|
||||||
// @Description Returns a list of instances in a format compatible with OpenAI API
|
|
||||||
// @Tags openai
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /v1/models [get]
|
|
||||||
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
instances, err := h.InstanceManager.ListInstances()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
openaiInstances := make([]OpenAIInstance, len(instances))
|
|
||||||
for i, inst := range instances {
|
|
||||||
openaiInstances[i] = OpenAIInstance{
|
|
||||||
ID: inst.Name,
|
|
||||||
Object: "model",
|
|
||||||
Created: inst.Created,
|
|
||||||
OwnedBy: "llamactl",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
openaiResponse := OpenAIListInstancesResponse{
|
|
||||||
Object: "list",
|
|
||||||
Data: openaiInstances,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// OpenAIProxy godoc
|
|
||||||
// @Summary OpenAI-compatible proxy endpoint
|
|
||||||
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
|
|
||||||
// @Tags openai
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produces json
|
|
||||||
// @Success 200 "OpenAI response"
|
|
||||||
// @Failure 400 {string} string "Invalid request body or instance name"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /v1/ [post]
|
|
||||||
func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
// Read the entire body first
|
|
||||||
bodyBytes, err := io.ReadAll(r.Body)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to read request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r.Body.Close()
|
|
||||||
|
|
||||||
// Parse the body to extract instance name
|
|
||||||
var requestBody map[string]any
|
|
||||||
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
|
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
modelName, ok := requestBody["model"].(string)
|
|
||||||
if !ok || modelName == "" {
|
|
||||||
http.Error(w, "Instance name is required", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Route to the appropriate inst based on instance name
|
|
||||||
inst, err := h.InstanceManager.GetInstance(modelName)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inst.IsRunning() {
|
|
||||||
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
|
|
||||||
if !allowOnDemand {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
|
||||||
if h.cfg.Instances.EnableLRUEviction {
|
|
||||||
err := h.InstanceManager.EvictLRUInstance()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If on-demand start is enabled, start the instance
|
|
||||||
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for the instance to become healthy before proceeding
|
|
||||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
|
||||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
proxy, err := inst.GetProxy()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update last request time for the instance
|
|
||||||
inst.UpdateLastRequestTime()
|
|
||||||
|
|
||||||
// Recreate the request body from the bytes we read
|
|
||||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
|
||||||
r.ContentLength = int64(len(bodyBytes))
|
|
||||||
|
|
||||||
proxy.ServeHTTP(w, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseCommandRequest represents the request body for command parsing
|
|
||||||
type ParseCommandRequest struct {
|
|
||||||
Command string `json:"command"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseLlamaCommand godoc
|
|
||||||
// @Summary Parse llama-server command
|
|
||||||
// @Description Parses a llama-server command string into instance options
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produce json
|
|
||||||
// @Param request body ParseCommandRequest true "Command to parse"
|
|
||||||
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
|
||||||
// @Failure 400 {object} map[string]string "Invalid request or command"
|
|
||||||
// @Failure 500 {object} map[string]string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/parse-command [post]
|
|
||||||
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var req ParseCommandRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if strings.TrimSpace(req.Command) == "" {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
|
|
||||||
if err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
BackendType: backends.BackendTypeLlamaCpp,
|
|
||||||
LlamaServerOptions: llamaOptions,
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
|
||||||
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseMlxCommand godoc
|
|
||||||
// @Summary Parse mlx_lm.server command
|
|
||||||
// @Description Parses MLX-LM server command string into instance options
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produce json
|
|
||||||
// @Param request body ParseCommandRequest true "Command to parse"
|
|
||||||
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
|
||||||
// @Failure 400 {object} map[string]string "Invalid request or command"
|
|
||||||
// @Router /backends/mlx/parse-command [post]
|
|
||||||
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var req ParseCommandRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.TrimSpace(req.Command) == "" {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
|
|
||||||
if err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Currently only support mlx_lm backend type
|
|
||||||
backendType := backends.BackendTypeMlxLm
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
BackendType: backendType,
|
|
||||||
MlxServerOptions: mlxOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
|
||||||
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseVllmCommand godoc
|
|
||||||
// @Summary Parse vllm serve command
|
|
||||||
// @Description Parses a vLLM serve command string into instance options
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produce json
|
|
||||||
// @Param request body ParseCommandRequest true "Command to parse"
|
|
||||||
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
|
||||||
// @Failure 400 {object} map[string]string "Invalid request or command"
|
|
||||||
// @Router /backends/vllm/parse-command [post]
|
|
||||||
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var req ParseCommandRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.TrimSpace(req.Command) == "" {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
|
|
||||||
if err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
backendType := backends.BackendTypeVllm
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
BackendType: backendType,
|
|
||||||
VllmServerOptions: vllmOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
|
||||||
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
320
pkg/server/handlers_backends.go
Normal file
320
pkg/server/handlers_backends.go
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"llamactl/pkg/backends"
|
||||||
|
"llamactl/pkg/backends/llamacpp"
|
||||||
|
"llamactl/pkg/backends/mlx"
|
||||||
|
"llamactl/pkg/backends/vllm"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"net/http"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseCommandRequest represents the request body for command parsing
|
||||||
|
type ParseCommandRequest struct {
|
||||||
|
Command string `json:"command"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
|
// Get the instance name from the URL parameter
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Route to the appropriate inst based on instance name
|
||||||
|
inst, err := h.InstanceManager.GetInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
options := inst.GetOptions()
|
||||||
|
if options == nil {
|
||||||
|
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.BackendType != backends.BackendTypeLlamaCpp {
|
||||||
|
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
|
||||||
|
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||||
|
if h.cfg.Instances.EnableLRUEviction {
|
||||||
|
err := h.InstanceManager.EvictLRUInstance()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(name); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy, err := inst.GetProxy()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip the "/llama-cpp/<name>" prefix from the request URL
|
||||||
|
prefix := fmt.Sprintf("/llama-cpp/%s", name)
|
||||||
|
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
||||||
|
|
||||||
|
// Update the last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseLlamaCommand godoc
|
||||||
|
// @Summary Parse llama-server command
|
||||||
|
// @Description Parses a llama-server command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Failure 500 {object} map[string]string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/parse-command [post]
|
||||||
|
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
|
LlamaServerOptions: llamaOptions,
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseMlxCommand godoc
|
||||||
|
// @Summary Parse mlx_lm.server command
|
||||||
|
// @Description Parses MLX-LM server command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Router /backends/mlx/parse-command [post]
|
||||||
|
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently only support mlx_lm backend type
|
||||||
|
backendType := backends.BackendTypeMlxLm
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backendType,
|
||||||
|
MlxServerOptions: mlxOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseVllmCommand godoc
|
||||||
|
// @Summary Parse vllm serve command
|
||||||
|
// @Description Parses a vLLM serve command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Router /backends/vllm/parse-command [post]
|
||||||
|
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
backendType := backends.BackendTypeVllm
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backendType,
|
||||||
|
VllmServerOptions: vllmOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerHelpHandler godoc
|
||||||
|
// @Summary Get help for llama server
|
||||||
|
// @Description Returns the help text for the llama server command
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Help text"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/help [get]
|
||||||
|
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
helpCmd := exec.Command("llama-server", "--help")
|
||||||
|
output, err := helpCmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerVersionHandler godoc
|
||||||
|
// @Summary Get version of llama server
|
||||||
|
// @Description Returns the version of the llama server command
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Version information"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/version [get]
|
||||||
|
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
versionCmd := exec.Command("llama-server", "--version")
|
||||||
|
output, err := versionCmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerListDevicesHandler godoc
|
||||||
|
// @Summary List available devices for llama server
|
||||||
|
// @Description Returns a list of available devices for the llama server
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "List of devices"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/devices [get]
|
||||||
|
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
listCmd := exec.Command("llama-server", "--list-devices")
|
||||||
|
output, err := listCmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
445
pkg/server/handlers_instances.go
Normal file
445
pkg/server/handlers_instances.go
Normal file
@@ -0,0 +1,445 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"llamactl/pkg/manager"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httputil"
|
||||||
|
"net/url"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ListInstances godoc
|
||||||
|
// @Summary List all instances
|
||||||
|
// @Description Returns a list of all instances managed by the server
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 {array} instance.Process "List of instances"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances [get]
|
||||||
|
func (h *Handler) ListInstances() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
instances, err := h.InstanceManager.ListInstances()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(instances); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateInstance godoc
|
||||||
|
// @Summary Create and start a new instance
|
||||||
|
// @Description Creates a new instance with the provided configuration options
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
||||||
|
// @Success 201 {object} instance.Process "Created instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid request body"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [post]
|
||||||
|
func (h *Handler) CreateInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var options instance.CreateInstanceOptions
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.CreateInstance(name, &options)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInstance godoc
|
||||||
|
// @Summary Get details of a specific instance
|
||||||
|
// @Description Returns the details of a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [get]
|
||||||
|
func (h *Handler) GetInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.GetInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateInstance godoc
|
||||||
|
// @Summary Update an instance's configuration
|
||||||
|
// @Description Updates the configuration of a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
||||||
|
// @Success 200 {object} instance.Process "Updated instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [put]
|
||||||
|
func (h *Handler) UpdateInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var options instance.CreateInstanceOptions
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.UpdateInstance(name, &options)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartInstance godoc
|
||||||
|
// @Summary Start a stopped instance
|
||||||
|
// @Description Starts a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Started instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/start [post]
|
||||||
|
func (h *Handler) StartInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.StartInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
// Check if error is due to maximum running instances limit
|
||||||
|
if _, ok := err.(manager.MaxRunningInstancesError); ok {
|
||||||
|
http.Error(w, err.Error(), http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StopInstance godoc
|
||||||
|
// @Summary Stop a running instance
|
||||||
|
// @Description Stops a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Stopped instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/stop [post]
|
||||||
|
func (h *Handler) StopInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.StopInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RestartInstance godoc
|
||||||
|
// @Summary Restart a running instance
|
||||||
|
// @Description Restarts a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Restarted instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/restart [post]
|
||||||
|
func (h *Handler) RestartInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.RestartInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteInstance godoc
|
||||||
|
// @Summary Delete an instance
|
||||||
|
// @Description Stops and removes a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 204 "No Content"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [delete]
|
||||||
|
func (h *Handler) DeleteInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.InstanceManager.DeleteInstance(name); err != nil {
|
||||||
|
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInstanceLogs godoc
|
||||||
|
// @Summary Get logs from a specific instance
|
||||||
|
// @Description Returns the logs from a specific instance by name with optional line limit
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Instance logs"
|
||||||
|
// @Failure 400 {string} string "Invalid name format or lines parameter"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/logs [get]
|
||||||
|
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
lines := r.URL.Query().Get("lines")
|
||||||
|
numLines := -1 // Default to all lines
|
||||||
|
if lines != "" {
|
||||||
|
parsedLines, err := strconv.Atoi(lines)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
numLines = parsedLines
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the instance manager which handles both local and remote instances
|
||||||
|
logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write([]byte(logs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ProxyToInstance godoc
|
||||||
|
// @Summary Proxy requests to a specific instance
|
||||||
|
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 "Request successfully proxied to instance"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Failure 503 {string} string "Instance is not running"
|
||||||
|
// @Router /instances/{name}/proxy [get]
|
||||||
|
// @Router /instances/{name}/proxy [post]
|
||||||
|
func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.GetInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
if inst.IsRemote() {
|
||||||
|
h.RemoteInstanceProxy(w, r, name, inst)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the cached proxy for this instance
|
||||||
|
proxy, err := inst.GetProxy()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
|
||||||
|
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
|
||||||
|
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
||||||
|
|
||||||
|
// Update the last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Set forwarded headers
|
||||||
|
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
|
||||||
|
r.Header.Set("X-Forwarded-Proto", "http")
|
||||||
|
|
||||||
|
// Forward the request using the cached proxy
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoteInstanceProxy proxies requests to a remote instance
|
||||||
|
func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
|
||||||
|
// Get the node name from instance options
|
||||||
|
options := inst.GetOptions()
|
||||||
|
if options == nil || len(options.Nodes) == 0 {
|
||||||
|
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeName := options.Nodes[0]
|
||||||
|
|
||||||
|
// Check if we have a cached proxy for this node
|
||||||
|
h.remoteProxiesMu.RLock()
|
||||||
|
proxy, exists := h.remoteProxies[nodeName]
|
||||||
|
h.remoteProxiesMu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
// Find node configuration
|
||||||
|
nodeConfig, exists := h.cfg.Nodes[nodeName]
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create reverse proxy to remote node
|
||||||
|
targetURL, err := url.Parse(nodeConfig.Address)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy = httputil.NewSingleHostReverseProxy(targetURL)
|
||||||
|
|
||||||
|
// Modify request before forwarding
|
||||||
|
originalDirector := proxy.Director
|
||||||
|
apiKey := nodeConfig.APIKey // Capture for closure
|
||||||
|
proxy.Director = func(req *http.Request) {
|
||||||
|
originalDirector(req)
|
||||||
|
// Add API key if configured
|
||||||
|
if apiKey != "" {
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the proxy by node name
|
||||||
|
h.remoteProxiesMu.Lock()
|
||||||
|
h.remoteProxies[nodeName] = proxy
|
||||||
|
h.remoteProxiesMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward the request using the cached proxy
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
79
pkg/server/handlers_nodes.go
Normal file
79
pkg/server/handlers_nodes.go
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NodeResponse represents a sanitized node configuration for API responses
|
||||||
|
type NodeResponse struct {
|
||||||
|
Address string `json:"address"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListNodes godoc
|
||||||
|
// @Summary List all configured nodes
|
||||||
|
// @Description Returns a map of all nodes configured in the server (node name -> node config)
|
||||||
|
// @Tags nodes
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /nodes [get]
|
||||||
|
func (h *Handler) ListNodes() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Convert to sanitized response format (map of name -> NodeResponse)
|
||||||
|
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
|
||||||
|
for name, node := range h.cfg.Nodes {
|
||||||
|
nodeResponses[name] = NodeResponse{
|
||||||
|
Address: node.Address,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
|
||||||
|
http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNode godoc
|
||||||
|
// @Summary Get details of a specific node
|
||||||
|
// @Description Returns the details of a specific node by name
|
||||||
|
// @Tags nodes
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Node Name"
|
||||||
|
// @Success 200 {object} NodeResponse "Node details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 404 {string} string "Node not found"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /nodes/{name} [get]
|
||||||
|
func (h *Handler) GetNode() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeConfig, exists := h.cfg.Nodes[name]
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, "Node not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to sanitized response format
|
||||||
|
nodeResponse := NodeResponse{
|
||||||
|
Address: nodeConfig.Address,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
|
||||||
|
http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
206
pkg/server/handlers_openai.go
Normal file
206
pkg/server/handlers_openai.go
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httputil"
|
||||||
|
"net/url"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenAIListInstances godoc
|
||||||
|
// @Summary List instances in OpenAI-compatible format
|
||||||
|
// @Description Returns a list of instances in a format compatible with OpenAI API
|
||||||
|
// @Tags openai
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /v1/models [get]
|
||||||
|
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
instances, err := h.InstanceManager.ListInstances()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
openaiInstances := make([]OpenAIInstance, len(instances))
|
||||||
|
for i, inst := range instances {
|
||||||
|
openaiInstances[i] = OpenAIInstance{
|
||||||
|
ID: inst.Name,
|
||||||
|
Object: "model",
|
||||||
|
Created: inst.Created,
|
||||||
|
OwnedBy: "llamactl",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
openaiResponse := OpenAIListInstancesResponse{
|
||||||
|
Object: "list",
|
||||||
|
Data: openaiInstances,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAIProxy godoc
|
||||||
|
// @Summary OpenAI-compatible proxy endpoint
|
||||||
|
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
|
||||||
|
// @Tags openai
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 "OpenAI response"
|
||||||
|
// @Failure 400 {string} string "Invalid request body or instance name"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /v1/ [post]
|
||||||
|
func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Read the entire body first
|
||||||
|
bodyBytes, err := io.ReadAll(r.Body)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to read request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r.Body.Close()
|
||||||
|
|
||||||
|
// Parse the body to extract instance name
|
||||||
|
var requestBody map[string]any
|
||||||
|
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
modelName, ok := requestBody["model"].(string)
|
||||||
|
if !ok || modelName == "" {
|
||||||
|
http.Error(w, "Instance name is required", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Route to the appropriate inst based on instance name
|
||||||
|
inst, err := h.InstanceManager.GetInstance(modelName)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
if inst.IsRemote() {
|
||||||
|
// Restore the body for the remote proxy
|
||||||
|
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
|
h.RemoteOpenAIProxy(w, r, modelName, inst)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
options := inst.GetOptions()
|
||||||
|
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
|
||||||
|
if !allowOnDemand {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||||
|
if h.cfg.Instances.EnableLRUEviction {
|
||||||
|
err := h.InstanceManager.EvictLRUInstance()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy, err := inst.GetProxy()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Recreate the request body from the bytes we read
|
||||||
|
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
|
r.ContentLength = int64(len(bodyBytes))
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
|
||||||
|
func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
|
||||||
|
// Get the node name from instance options
|
||||||
|
options := inst.GetOptions()
|
||||||
|
if options == nil || len(options.Nodes) == 0 {
|
||||||
|
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeName := options.Nodes[0]
|
||||||
|
|
||||||
|
// Check if we have a cached proxy for this node
|
||||||
|
h.remoteProxiesMu.RLock()
|
||||||
|
proxy, exists := h.remoteProxies[nodeName]
|
||||||
|
h.remoteProxiesMu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
// Find node configuration
|
||||||
|
nodeConfig, exists := h.cfg.Nodes[nodeName]
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create reverse proxy to remote node
|
||||||
|
targetURL, err := url.Parse(nodeConfig.Address)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy = httputil.NewSingleHostReverseProxy(targetURL)
|
||||||
|
|
||||||
|
// Modify request before forwarding
|
||||||
|
originalDirector := proxy.Director
|
||||||
|
apiKey := nodeConfig.APIKey // Capture for closure
|
||||||
|
proxy.Director = func(req *http.Request) {
|
||||||
|
originalDirector(req)
|
||||||
|
// Add API key if configured
|
||||||
|
if apiKey != "" {
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the proxy
|
||||||
|
h.remoteProxiesMu.Lock()
|
||||||
|
h.remoteProxies[nodeName] = proxy
|
||||||
|
h.remoteProxiesMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward the request using the cached proxy
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
22
pkg/server/handlers_system.go
Normal file
22
pkg/server/handlers_system.go
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// VersionHandler godoc
|
||||||
|
// @Summary Get llamactl version
|
||||||
|
// @Description Returns the version of the llamactl command
|
||||||
|
// @Tags version
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Version information"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /version [get]
|
||||||
|
func (h *Handler) VersionHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
|||||||
r.Use(cors.Handler(cors.Options{
|
r.Use(cors.Handler(cors.Options{
|
||||||
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
|
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
|
||||||
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
|
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
|
||||||
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
|
AllowedHeaders: handler.cfg.Server.AllowedHeaders,
|
||||||
ExposedHeaders: []string{"Link"},
|
ExposedHeaders: []string{"Link"},
|
||||||
AllowCredentials: false,
|
AllowCredentials: false,
|
||||||
MaxAge: 300,
|
MaxAge: 300,
|
||||||
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Node management endpoints
|
||||||
|
r.Route("/nodes", func(r chi.Router) {
|
||||||
|
r.Get("/", handler.ListNodes()) // List all nodes
|
||||||
|
|
||||||
|
r.Route("/{name}", func(r chi.Router) {
|
||||||
|
r.Get("/", handler.GetNode())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
// Instance management endpoints
|
// Instance management endpoints
|
||||||
r.Route("/instances", func(r chi.Router) {
|
r.Route("/instances", func(r chi.Router) {
|
||||||
r.Get("/", handler.ListInstances()) // List all instances
|
r.Get("/", handler.ListInstances()) // List all instances
|
||||||
@@ -103,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
|||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
r.Route("/llama-cpp/{name}", func(r chi.Router) {
|
||||||
|
|
||||||
|
// Public Routes
|
||||||
|
// Allow llama-cpp server to serve its own WebUI if it is running.
|
||||||
|
// Don't auto start the server since it can be accessed without an API key
|
||||||
|
r.Get("/", handler.LlamaCppProxy(false))
|
||||||
|
|
||||||
|
// Private Routes
|
||||||
|
r.Group(func(r chi.Router) {
|
||||||
|
|
||||||
|
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
|
||||||
|
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
|
||||||
|
}
|
||||||
|
|
||||||
|
// This handler auto start the server if it's not running
|
||||||
|
llamaCppHandler := handler.LlamaCppProxy(true)
|
||||||
|
|
||||||
|
// llama.cpp server specific proxy endpoints
|
||||||
|
r.Get("/props", llamaCppHandler)
|
||||||
|
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
|
||||||
|
r.Get("/slots", llamaCppHandler)
|
||||||
|
r.Post("/apply-template", llamaCppHandler)
|
||||||
|
r.Post("/completion", llamaCppHandler)
|
||||||
|
r.Post("/detokenize", llamaCppHandler)
|
||||||
|
r.Post("/embeddings", llamaCppHandler)
|
||||||
|
r.Post("/infill", llamaCppHandler)
|
||||||
|
r.Post("/metrics", llamaCppHandler)
|
||||||
|
r.Post("/props", llamaCppHandler)
|
||||||
|
r.Post("/reranking", llamaCppHandler)
|
||||||
|
r.Post("/tokenize", llamaCppHandler)
|
||||||
|
|
||||||
|
// OpenAI-compatible proxy endpoint
|
||||||
|
// Handles all POST requests to /v1/*, including:
|
||||||
|
// - /v1/completions
|
||||||
|
// - /v1/chat/completions
|
||||||
|
// - /v1/embeddings
|
||||||
|
// - /v1/rerank
|
||||||
|
// - /v1/reranking
|
||||||
|
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
|
||||||
|
// on "model" field being optional, and handler.OpenAIProxy requires it.
|
||||||
|
r.Post("/v1/*", llamaCppHandler)
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
// Serve WebUI files
|
// Serve WebUI files
|
||||||
if err := webui.SetupWebUI(r); err != nil {
|
if err := webui.SetupWebUI(r); err != nil {
|
||||||
fmt.Printf("Failed to set up WebUI: %v\n", err)
|
fmt.Printf("Failed to set up WebUI: %v\n", err)
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
// Clean up undefined values to avoid sending empty fields
|
// Clean up undefined values to avoid sending empty fields
|
||||||
const cleanOptions: CreateInstanceOptions = {};
|
const cleanOptions: CreateInstanceOptions = {};
|
||||||
Object.entries(formData).forEach(([key, value]) => {
|
Object.entries(formData).forEach(([key, value]) => {
|
||||||
if (key === 'backend_options' && value && typeof value === 'object') {
|
if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
|
||||||
// Handle backend_options specially - clean nested object
|
// Handle backend_options specially - clean nested object
|
||||||
const cleanBackendOptions: any = {};
|
const cleanBackendOptions: any = {};
|
||||||
Object.entries(value).forEach(([backendKey, backendValue]) => {
|
Object.entries(value).forEach(([backendKey, backendValue]) => {
|
||||||
@@ -123,8 +123,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
if (Object.keys(cleanBackendOptions).length > 0) {
|
if (Object.keys(cleanBackendOptions).length > 0) {
|
||||||
(cleanOptions as any)[key] = cleanBackendOptions;
|
(cleanOptions as any)[key] = cleanBackendOptions;
|
||||||
}
|
}
|
||||||
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
|
} else if (value !== undefined && value !== null) {
|
||||||
// Handle arrays - don't include empty arrays
|
// Skip empty strings
|
||||||
|
if (typeof value === 'string' && value.trim() === "") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Skip empty arrays
|
||||||
if (Array.isArray(value) && value.length === 0) {
|
if (Array.isArray(value) && value.length === 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,144 +0,0 @@
|
|||||||
import React from 'react'
|
|
||||||
import { Input } from '@/components/ui/input'
|
|
||||||
import { Label } from '@/components/ui/label'
|
|
||||||
import { Checkbox } from '@/components/ui/checkbox'
|
|
||||||
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
|
||||||
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
|
|
||||||
|
|
||||||
interface ZodFormFieldProps {
|
|
||||||
fieldKey: keyof CreateInstanceOptions
|
|
||||||
value: string | number | boolean | string[] | undefined
|
|
||||||
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
|
|
||||||
// Get configuration for basic fields, or use field name for advanced fields
|
|
||||||
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
|
||||||
|
|
||||||
// Get type from Zod schema
|
|
||||||
const fieldType = getFieldType(fieldKey)
|
|
||||||
|
|
||||||
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
|
|
||||||
onChange(fieldKey, newValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
const renderField = () => {
|
|
||||||
// Special handling for backend_type field - render as dropdown
|
|
||||||
if (fieldKey === 'backend_type') {
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<select
|
|
||||||
id={fieldKey}
|
|
||||||
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
|
|
||||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
|
||||||
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
|
||||||
>
|
|
||||||
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
|
||||||
<option value={BackendType.MLX_LM}>MLX LM</option>
|
|
||||||
<option value={BackendType.VLLM}>vLLM</option>
|
|
||||||
</select>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (fieldType) {
|
|
||||||
case 'boolean':
|
|
||||||
return (
|
|
||||||
<div className="flex items-center space-x-2">
|
|
||||||
<Checkbox
|
|
||||||
id={fieldKey}
|
|
||||||
checked={typeof value === 'boolean' ? value : false}
|
|
||||||
onCheckedChange={(checked) => handleChange(checked)}
|
|
||||||
/>
|
|
||||||
<Label htmlFor={fieldKey} className="text-sm font-normal">
|
|
||||||
{config.label}
|
|
||||||
{config.description && (
|
|
||||||
<span className="text-muted-foreground ml-1">- {config.description}</span>
|
|
||||||
)}
|
|
||||||
</Label>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'number':
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<Input
|
|
||||||
id={fieldKey}
|
|
||||||
type="number"
|
|
||||||
step="any" // This allows decimal numbers
|
|
||||||
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
|
||||||
onChange={(e) => {
|
|
||||||
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
|
|
||||||
// Only update if the parsed value is valid or the input is empty
|
|
||||||
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
|
|
||||||
handleChange(numValue)
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
/>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'array':
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<Input
|
|
||||||
id={fieldKey}
|
|
||||||
type="text"
|
|
||||||
value={Array.isArray(value) ? value.join(', ') : ''}
|
|
||||||
onChange={(e) => {
|
|
||||||
const arrayValue = e.target.value
|
|
||||||
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
|
|
||||||
: undefined
|
|
||||||
handleChange(arrayValue)
|
|
||||||
}}
|
|
||||||
placeholder="item1, item2, item3"
|
|
||||||
/>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'text':
|
|
||||||
default:
|
|
||||||
return (
|
|
||||||
<div className="grid gap-2">
|
|
||||||
<Label htmlFor={fieldKey}>
|
|
||||||
{config.label}
|
|
||||||
</Label>
|
|
||||||
<Input
|
|
||||||
id={fieldKey}
|
|
||||||
type="text"
|
|
||||||
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
|
||||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
/>
|
|
||||||
{config.description && (
|
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
|
||||||
)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return <div className="space-y-2">{renderField()}</div>
|
|
||||||
}
|
|
||||||
|
|
||||||
export default ZodFormField
|
|
||||||
144
webui/src/components/form/EnvironmentVariablesInput.tsx
Normal file
144
webui/src/components/form/EnvironmentVariablesInput.tsx
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import React, { useState } from 'react'
|
||||||
|
import { Input } from '@/components/ui/input'
|
||||||
|
import { Label } from '@/components/ui/label'
|
||||||
|
import { Button } from '@/components/ui/button'
|
||||||
|
import { X, Plus } from 'lucide-react'
|
||||||
|
|
||||||
|
interface EnvironmentVariablesInputProps {
|
||||||
|
id: string
|
||||||
|
label: string
|
||||||
|
value: Record<string, string> | undefined
|
||||||
|
onChange: (value: Record<string, string> | undefined) => void
|
||||||
|
description?: string
|
||||||
|
disabled?: boolean
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EnvVar {
|
||||||
|
key: string
|
||||||
|
value: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
|
||||||
|
id,
|
||||||
|
label,
|
||||||
|
value,
|
||||||
|
onChange,
|
||||||
|
description,
|
||||||
|
disabled = false,
|
||||||
|
className
|
||||||
|
}) => {
|
||||||
|
// Convert the value object to an array of key-value pairs for editing
|
||||||
|
const envVarsFromValue = value
|
||||||
|
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
|
||||||
|
: []
|
||||||
|
|
||||||
|
const [envVars, setEnvVars] = useState<EnvVar[]>(
|
||||||
|
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
|
||||||
|
)
|
||||||
|
|
||||||
|
// Update parent component when env vars change
|
||||||
|
const updateParent = (newEnvVars: EnvVar[]) => {
|
||||||
|
// Filter out empty entries
|
||||||
|
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
|
||||||
|
|
||||||
|
if (validVars.length === 0) {
|
||||||
|
onChange(undefined)
|
||||||
|
} else {
|
||||||
|
const envObject = validVars.reduce((acc, env) => {
|
||||||
|
acc[env.key.trim()] = env.value.trim()
|
||||||
|
return acc
|
||||||
|
}, {} as Record<string, string>)
|
||||||
|
onChange(envObject)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleKeyChange = (index: number, newKey: string) => {
|
||||||
|
const newEnvVars = [...envVars]
|
||||||
|
newEnvVars[index].key = newKey
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
}
|
||||||
|
|
||||||
|
const handleValueChange = (index: number, newValue: string) => {
|
||||||
|
const newEnvVars = [...envVars]
|
||||||
|
newEnvVars[index].value = newValue
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
}
|
||||||
|
|
||||||
|
const addEnvVar = () => {
|
||||||
|
const newEnvVars = [...envVars, { key: '', value: '' }]
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
}
|
||||||
|
|
||||||
|
const removeEnvVar = (index: number) => {
|
||||||
|
if (envVars.length === 1) {
|
||||||
|
// Reset to empty if it's the last one
|
||||||
|
const newEnvVars = [{ key: '', value: '' }]
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
} else {
|
||||||
|
const newEnvVars = envVars.filter((_, i) => i !== index)
|
||||||
|
setEnvVars(newEnvVars)
|
||||||
|
updateParent(newEnvVars)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className={`grid gap-2 ${className || ''}`}>
|
||||||
|
<Label htmlFor={id}>
|
||||||
|
{label}
|
||||||
|
</Label>
|
||||||
|
<div className="space-y-2">
|
||||||
|
{envVars.map((envVar, index) => (
|
||||||
|
<div key={index} className="flex gap-2 items-center">
|
||||||
|
<Input
|
||||||
|
placeholder="Variable name"
|
||||||
|
value={envVar.key}
|
||||||
|
onChange={(e) => handleKeyChange(index, e.target.value)}
|
||||||
|
disabled={disabled}
|
||||||
|
className="flex-1"
|
||||||
|
/>
|
||||||
|
<Input
|
||||||
|
placeholder="Variable value"
|
||||||
|
value={envVar.value}
|
||||||
|
onChange={(e) => handleValueChange(index, e.target.value)}
|
||||||
|
disabled={disabled}
|
||||||
|
className="flex-1"
|
||||||
|
/>
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
variant="outline"
|
||||||
|
size="sm"
|
||||||
|
onClick={() => removeEnvVar(index)}
|
||||||
|
disabled={disabled}
|
||||||
|
className="shrink-0"
|
||||||
|
>
|
||||||
|
<X className="h-4 w-4" />
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
<Button
|
||||||
|
type="button"
|
||||||
|
variant="outline"
|
||||||
|
size="sm"
|
||||||
|
onClick={addEnvVar}
|
||||||
|
disabled={disabled}
|
||||||
|
className="w-fit"
|
||||||
|
>
|
||||||
|
<Plus className="h-4 w-4 mr-2" />
|
||||||
|
Add Variable
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
{description && (
|
||||||
|
<p className="text-sm text-muted-foreground">{description}</p>
|
||||||
|
)}
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
Environment variables that will be passed to the backend process
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default EnvironmentVariablesInput
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
import React from 'react'
|
|
||||||
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
|
|
||||||
import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
|
|
||||||
import { getFieldType } from '@/schemas/instanceOptions'
|
|
||||||
import TextInput from '@/components/form/TextInput'
|
|
||||||
import NumberInput from '@/components/form/NumberInput'
|
|
||||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
|
||||||
import SelectInput from '@/components/form/SelectInput'
|
|
||||||
|
|
||||||
interface BasicInstanceFieldsProps {
|
|
||||||
formData: CreateInstanceOptions
|
|
||||||
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
|
|
||||||
formData,
|
|
||||||
onChange
|
|
||||||
}) => {
|
|
||||||
const basicFields = getBasicFields()
|
|
||||||
|
|
||||||
const renderField = (fieldKey: keyof CreateInstanceOptions) => {
|
|
||||||
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
|
|
||||||
const fieldType = getFieldType(fieldKey)
|
|
||||||
|
|
||||||
// Special handling for backend_type field
|
|
||||||
if (fieldKey === 'backend_type') {
|
|
||||||
return (
|
|
||||||
<SelectInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] || BackendType.LLAMA_CPP}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
options={[
|
|
||||||
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
|
|
||||||
{ value: BackendType.MLX_LM, label: 'MLX LM' },
|
|
||||||
{ value: BackendType.VLLM, label: 'vLLM' }
|
|
||||||
]}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Render based on field type
|
|
||||||
switch (fieldType) {
|
|
||||||
case 'boolean':
|
|
||||||
return (
|
|
||||||
<CheckboxInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] as boolean | undefined}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
|
|
||||||
case 'number':
|
|
||||||
return (
|
|
||||||
<NumberInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] as number | undefined}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
|
|
||||||
default:
|
|
||||||
return (
|
|
||||||
<TextInput
|
|
||||||
key={fieldKey}
|
|
||||||
id={fieldKey}
|
|
||||||
label={config.label}
|
|
||||||
value={formData[fieldKey] as string | number | undefined}
|
|
||||||
onChange={(value) => onChange(fieldKey, value)}
|
|
||||||
placeholder={config.placeholder}
|
|
||||||
description={config.description}
|
|
||||||
/>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Filter out auto restart fields and backend_options (handled separately)
|
|
||||||
const fieldsToRender = basicFields.filter(
|
|
||||||
fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
|
|
||||||
)
|
|
||||||
|
|
||||||
return (
|
|
||||||
<div className="space-y-4">
|
|
||||||
<h3 className="text-lg font-medium">Basic Configuration</h3>
|
|
||||||
{fieldsToRender.map(renderField)}
|
|
||||||
</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
export default BasicInstanceFields
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import React from 'react'
|
import React, { useState, useEffect } from 'react'
|
||||||
import type { CreateInstanceOptions } from '@/types/instance'
|
import type { CreateInstanceOptions } from '@/types/instance'
|
||||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
||||||
import { Label } from '@/components/ui/label'
|
import { Label } from '@/components/ui/label'
|
||||||
@@ -6,6 +6,9 @@ import { Input } from '@/components/ui/input'
|
|||||||
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
|
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
|
||||||
import NumberInput from '@/components/form/NumberInput'
|
import NumberInput from '@/components/form/NumberInput'
|
||||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||||
|
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
|
||||||
|
import SelectInput from '@/components/form/SelectInput'
|
||||||
|
import { nodesApi, type NodesMap } from '@/lib/api'
|
||||||
|
|
||||||
interface InstanceSettingsCardProps {
|
interface InstanceSettingsCardProps {
|
||||||
instanceName: string
|
instanceName: string
|
||||||
@@ -24,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
|||||||
onNameChange,
|
onNameChange,
|
||||||
onChange
|
onChange
|
||||||
}) => {
|
}) => {
|
||||||
|
const [nodes, setNodes] = useState<NodesMap>({})
|
||||||
|
const [loadingNodes, setLoadingNodes] = useState(true)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const fetchNodes = async () => {
|
||||||
|
try {
|
||||||
|
const fetchedNodes = await nodesApi.list()
|
||||||
|
setNodes(fetchedNodes)
|
||||||
|
|
||||||
|
// Auto-select first node if none selected
|
||||||
|
const nodeNames = Object.keys(fetchedNodes)
|
||||||
|
if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
|
||||||
|
onChange('nodes', [nodeNames[0]])
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch nodes:', error)
|
||||||
|
} finally {
|
||||||
|
setLoadingNodes(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fetchNodes()
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const nodeOptions = Object.keys(nodes).map(nodeName => ({
|
||||||
|
value: nodeName,
|
||||||
|
label: nodeName
|
||||||
|
}))
|
||||||
|
|
||||||
|
const handleNodeChange = (value: string | undefined) => {
|
||||||
|
if (value) {
|
||||||
|
onChange('nodes', [value])
|
||||||
|
} else {
|
||||||
|
onChange('nodes', undefined)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Card>
|
<Card>
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
@@ -49,6 +92,19 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Node Selection */}
|
||||||
|
{!loadingNodes && Object.keys(nodes).length > 0 && (
|
||||||
|
<SelectInput
|
||||||
|
id="node"
|
||||||
|
label="Node"
|
||||||
|
value={selectedNode}
|
||||||
|
onChange={handleNodeChange}
|
||||||
|
options={nodeOptions}
|
||||||
|
description={isEditing ? "Node cannot be changed after instance creation" : "Select the node where the instance will run"}
|
||||||
|
disabled={isEditing}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Auto Restart Configuration */}
|
{/* Auto Restart Configuration */}
|
||||||
<AutoRestartConfiguration
|
<AutoRestartConfiguration
|
||||||
formData={formData}
|
formData={formData}
|
||||||
@@ -75,6 +131,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
|||||||
onChange={(value) => onChange('on_demand_start', value)}
|
onChange={(value) => onChange('on_demand_start', value)}
|
||||||
description="Start instance only when needed"
|
description="Start instance only when needed"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<EnvironmentVariablesInput
|
||||||
|
id="environment"
|
||||||
|
label="Environment Variables"
|
||||||
|
value={formData.environment}
|
||||||
|
onChange={(value) => onChange('environment', value)}
|
||||||
|
description="Custom environment variables for the instance"
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</Card>
|
</Card>
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
|
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
|
||||||
|
|
||||||
interface AuthContextState {
|
interface AuthContextState {
|
||||||
isAuthenticated: boolean
|
isAuthenticated: boolean
|
||||||
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
|
|||||||
// Validate API key by making a test request
|
// Validate API key by making a test request
|
||||||
const validateApiKey = async (key: string): Promise<boolean> => {
|
const validateApiKey = async (key: string): Promise<boolean> => {
|
||||||
try {
|
try {
|
||||||
const response = await fetch('/api/v1/instances', {
|
const response = await fetch(document.baseURI + 'api/v1/instances', {
|
||||||
headers: {
|
headers: {
|
||||||
'Authorization': `Bearer ${key}`,
|
'Authorization': `Bearer ${key}`,
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { describe, it, expect, vi, beforeEach } from 'vitest'
|
|
||||||
import { instancesApi } from '@/lib/api'
|
import { instancesApi } from '@/lib/api'
|
||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
// Mock fetch globally
|
// Mock fetch globally
|
||||||
const mockFetch = vi.fn()
|
const mockFetch = vi.fn()
|
||||||
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('converts HTTP errors to meaningful messages', async () => {
|
it('converts HTTP errors to meaningful messages', async () => {
|
||||||
mockFetch.mockResolvedValue({
|
const mockResponse = {
|
||||||
ok: false,
|
ok: false,
|
||||||
status: 409,
|
status: 409,
|
||||||
text: () => Promise.resolve('Instance already exists')
|
text: () => Promise.resolve('Instance already exists'),
|
||||||
})
|
clone: function() { return this }
|
||||||
|
}
|
||||||
|
mockFetch.mockResolvedValue(mockResponse)
|
||||||
|
|
||||||
await expect(instancesApi.create('existing', {}))
|
await expect(instancesApi.create('existing', {}))
|
||||||
.rejects
|
.rejects
|
||||||
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it('handles empty error responses gracefully', async () => {
|
it('handles empty error responses gracefully', async () => {
|
||||||
mockFetch.mockResolvedValue({
|
const mockResponse = {
|
||||||
ok: false,
|
ok: false,
|
||||||
status: 500,
|
status: 500,
|
||||||
text: () => Promise.resolve('')
|
text: () => Promise.resolve(''),
|
||||||
})
|
clone: function() { return this }
|
||||||
|
}
|
||||||
|
mockFetch.mockResolvedValue(mockResponse)
|
||||||
|
|
||||||
await expect(instancesApi.list())
|
await expect(instancesApi.list())
|
||||||
.rejects
|
.rejects
|
||||||
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
|
|||||||
await instancesApi.getLogs('test-instance', 100)
|
await instancesApi.getLogs('test-instance', 100)
|
||||||
|
|
||||||
expect(mockFetch).toHaveBeenCalledWith(
|
expect(mockFetch).toHaveBeenCalledWith(
|
||||||
'/api/v1/instances/test-instance/logs?lines=100',
|
expect.stringMatching(
|
||||||
|
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
|
||||||
|
),
|
||||||
expect.any(Object)
|
expect.any(Object)
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
import type { CreateInstanceOptions, Instance } from "@/types/instance";
|
import type { CreateInstanceOptions, Instance } from "@/types/instance";
|
||||||
import { handleApiError } from "./errorUtils";
|
import { handleApiError } from "./errorUtils";
|
||||||
|
|
||||||
const API_BASE = "/api/v1";
|
// Adding baseURI as a prefix to support being served behind a subpath
|
||||||
|
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
|
||||||
|
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
|
||||||
|
export const API_BASE = document.baseURI + "api/v1";
|
||||||
|
|
||||||
// Base API call function with error handling
|
// Base API call function with error handling
|
||||||
async function apiCall<T>(
|
async function apiCall<T>(
|
||||||
@@ -46,12 +49,9 @@ async function apiCall<T>(
|
|||||||
} else {
|
} else {
|
||||||
// Handle empty responses for JSON endpoints
|
// Handle empty responses for JSON endpoints
|
||||||
const contentLength = response.headers.get('content-length');
|
const contentLength = response.headers.get('content-length');
|
||||||
if (contentLength === '0' || contentLength === null) {
|
if (contentLength === '0') {
|
||||||
const text = await response.text();
|
|
||||||
if (text.trim() === '') {
|
|
||||||
return {} as T; // Return empty object for empty JSON responses
|
return {} as T; // Return empty object for empty JSON responses
|
||||||
}
|
}
|
||||||
}
|
|
||||||
const data = await response.json() as T;
|
const data = await response.json() as T;
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
@@ -103,6 +103,22 @@ export const backendsApi = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Node API types
|
||||||
|
export interface NodeResponse {
|
||||||
|
address: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type NodesMap = Record<string, NodeResponse>;
|
||||||
|
|
||||||
|
// Node API functions
|
||||||
|
export const nodesApi = {
|
||||||
|
// GET /nodes - returns map of node name to NodeResponse
|
||||||
|
list: () => apiCall<NodesMap>("/nodes"),
|
||||||
|
|
||||||
|
// GET /nodes/{name}
|
||||||
|
get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
|
||||||
|
};
|
||||||
|
|
||||||
// Instance API functions
|
// Instance API functions
|
||||||
export const instancesApi = {
|
export const instancesApi = {
|
||||||
// GET /instances
|
// GET /instances
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const errorMessage = await parseErrorResponse(response)
|
// Clone the response before reading to avoid consuming the body stream
|
||||||
|
const errorMessage = await parseErrorResponse(response.clone())
|
||||||
throw new Error(errorMessage)
|
throw new Error(errorMessage)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,12 +1,10 @@
|
|||||||
import {
|
import {
|
||||||
type CreateInstanceOptions,
|
|
||||||
type LlamaCppBackendOptions,
|
type LlamaCppBackendOptions,
|
||||||
type MlxBackendOptions,
|
type MlxBackendOptions,
|
||||||
type VllmBackendOptions,
|
type VllmBackendOptions,
|
||||||
LlamaCppBackendOptionsSchema,
|
LlamaCppBackendOptionsSchema,
|
||||||
MlxBackendOptionsSchema,
|
MlxBackendOptionsSchema,
|
||||||
VllmBackendOptionsSchema,
|
VllmBackendOptionsSchema,
|
||||||
getAllFieldKeys,
|
|
||||||
getAllLlamaCppFieldKeys,
|
getAllLlamaCppFieldKeys,
|
||||||
getAllMlxFieldKeys,
|
getAllMlxFieldKeys,
|
||||||
getAllVllmFieldKeys,
|
getAllVllmFieldKeys,
|
||||||
@@ -15,41 +13,6 @@ import {
|
|||||||
getVllmFieldType
|
getVllmFieldType
|
||||||
} from '@/schemas/instanceOptions'
|
} from '@/schemas/instanceOptions'
|
||||||
|
|
||||||
// Instance-level basic fields (not backend-specific)
|
|
||||||
export const basicFieldsConfig: Record<string, {
|
|
||||||
label: string
|
|
||||||
description?: string
|
|
||||||
placeholder?: string
|
|
||||||
}> = {
|
|
||||||
auto_restart: {
|
|
||||||
label: 'Auto Restart',
|
|
||||||
description: 'Automatically restart the instance on failure'
|
|
||||||
},
|
|
||||||
max_restarts: {
|
|
||||||
label: 'Max Restarts',
|
|
||||||
placeholder: '3',
|
|
||||||
description: 'Maximum number of restart attempts (0 = unlimited)'
|
|
||||||
},
|
|
||||||
restart_delay: {
|
|
||||||
label: 'Restart Delay (seconds)',
|
|
||||||
placeholder: '5',
|
|
||||||
description: 'Delay in seconds before attempting restart'
|
|
||||||
},
|
|
||||||
idle_timeout: {
|
|
||||||
label: 'Idle Timeout (minutes)',
|
|
||||||
placeholder: '60',
|
|
||||||
description: 'Time in minutes before instance is considered idle and stopped'
|
|
||||||
},
|
|
||||||
on_demand_start: {
|
|
||||||
label: 'On-Demand Start',
|
|
||||||
description: 'Start instance upon receiving OpenAI-compatible API request'
|
|
||||||
},
|
|
||||||
backend_type: {
|
|
||||||
label: 'Backend Type',
|
|
||||||
description: 'Type of backend to use for this instance'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaCpp backend-specific basic fields
|
// LlamaCpp backend-specific basic fields
|
||||||
const basicLlamaCppFieldsConfig: Record<string, {
|
const basicLlamaCppFieldsConfig: Record<string, {
|
||||||
label: string
|
label: string
|
||||||
@@ -152,18 +115,6 @@ const backendFieldGetters = {
|
|||||||
llama_cpp: getAllLlamaCppFieldKeys,
|
llama_cpp: getAllLlamaCppFieldKeys,
|
||||||
} as const
|
} as const
|
||||||
|
|
||||||
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
|
||||||
return key in basicFieldsConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
|
|
||||||
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
|
|
||||||
return getAllFieldKeys().filter(key => !isBasicField(key))
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getBasicBackendFields(backendType?: string): string[] {
|
export function getBasicBackendFields(backendType?: string): string[] {
|
||||||
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
|
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
|
||||||
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
|
||||||
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
|
|||||||
return 'text'
|
return 'text'
|
||||||
}
|
}
|
||||||
|
|
||||||
// Re-export the Zod-based functions
|
|
||||||
export { getFieldType } from '@/schemas/instanceOptions'
|
|
||||||
@@ -33,9 +33,15 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
idle_timeout: z.number().optional(),
|
idle_timeout: z.number().optional(),
|
||||||
on_demand_start: z.boolean().optional(),
|
on_demand_start: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Environment variables
|
||||||
|
environment: z.record(z.string(), z.string()).optional(),
|
||||||
|
|
||||||
// Backend configuration
|
// Backend configuration
|
||||||
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
||||||
backend_options: BackendOptionsSchema.optional(),
|
backend_options: BackendOptionsSchema.optional(),
|
||||||
|
|
||||||
|
// Node configuration
|
||||||
|
nodes: z.array(z.string()).optional(),
|
||||||
})
|
})
|
||||||
|
|
||||||
// Re-export types and schemas from backend files
|
// Re-export types and schemas from backend files
|
||||||
@@ -75,5 +81,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
|
|||||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
if (innerSchema instanceof z.ZodObject) return 'object'
|
if (innerSchema instanceof z.ZodObject) return 'object'
|
||||||
|
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
|
||||||
return 'text' // ZodString and others default to text
|
return 'text' // ZodString and others default to text
|
||||||
}
|
}
|
||||||
@@ -21,4 +21,6 @@ export default defineConfig({
|
|||||||
setupFiles: ['./src/test/setup.ts'],
|
setupFiles: ['./src/test/setup.ts'],
|
||||||
css: true,
|
css: true,
|
||||||
},
|
},
|
||||||
|
// ensures relative asset paths to support being served behind a subpath
|
||||||
|
base: "./"
|
||||||
})
|
})
|
||||||
Reference in New Issue
Block a user