mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-07 01:24:27 +00:00
Compare commits
51 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a6e3cb4a9b | |||
| 9181c3d7bc | |||
| 1939b45312 | |||
| 8265a94bf7 | |||
| 4bc9362f7a | |||
| ddb54763f6 | |||
| 496ab3aa5d | |||
| 287a5e0817 | |||
| 7b4adfa0cd | |||
| 651c8b9b2c | |||
| 7194e1fdd1 | |||
| 492c3ff270 | |||
| 00a3cba717 | |||
| eb1d4ab55f | |||
| a9e3801eae | |||
| 1aaab96cec | |||
| 78eda77e44 | |||
| d70bb634cd | |||
| 41eaebc927 | |||
| c45fa13206 | |||
| 5e3a28398d | |||
| c734bcae4a | |||
| e4e7a82294 | |||
| ccffbca6b2 | |||
| 902be409d5 | |||
| eb9599f26a | |||
| ebf8dfdeab | |||
| f15c0840c4 | |||
| e702bcb694 | |||
| 4895fbff15 | |||
| 282fe67355 | |||
| 96a36e1119 | |||
| 759fc58326 | |||
| afef3d0180 | |||
| a87652937f | |||
| 7bde12db47 | |||
| e2b64620b5 | |||
| 3ba62af01a | |||
| 0150429e82 | |||
| 2ecf096024 | |||
| 5aed01b68f | |||
| 3f9caff33b | |||
| 169254c61a | |||
| 8154b8d0ab | |||
| a26d853ad5 | |||
| 6203b64045 | |||
| 8d9c808be1 | |||
| 161cd213c5 | |||
| d6e84f0527 | |||
| 0846350d41 | |||
| dacaca8594 |
2
.github/workflows/release.yaml
vendored
2
.github/workflows/release.yaml
vendored
@@ -29,6 +29,8 @@ jobs:
|
|||||||
npm ci
|
npm ci
|
||||||
|
|
||||||
- name: Build Web UI
|
- name: Build Web UI
|
||||||
|
env:
|
||||||
|
VITE_APP_VERSION: ${{ github.ref_name }}
|
||||||
run: |
|
run: |
|
||||||
cd webui
|
cd webui
|
||||||
npm run build
|
npm run build
|
||||||
|
|||||||
138
CONTRIBUTING.md
Normal file
138
CONTRIBUTING.md
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
# Contributing to Llamactl
|
||||||
|
|
||||||
|
Thank you for considering contributing to Llamactl! This document outlines the development setup and contribution process.
|
||||||
|
|
||||||
|
## Development Setup
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Go 1.24 or later
|
||||||
|
- Node.js 22 or later
|
||||||
|
- `llama-server` executable (from [llama.cpp](https://github.com/ggml-org/llama.cpp))
|
||||||
|
|
||||||
|
### Getting Started
|
||||||
|
|
||||||
|
1. **Clone the repository**
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/lordmathis/llamactl.git
|
||||||
|
cd llamactl
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Install dependencies**
|
||||||
|
```bash
|
||||||
|
# Go dependencies
|
||||||
|
go mod download
|
||||||
|
|
||||||
|
# Frontend dependencies
|
||||||
|
cd webui && npm ci && cd ..
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Run for development**
|
||||||
|
```bash
|
||||||
|
# Start backend server
|
||||||
|
go run ./cmd/server
|
||||||
|
```
|
||||||
|
Server will be available at `http://localhost:8080`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# In a separate terminal, start frontend dev server
|
||||||
|
cd webui && npm run dev
|
||||||
|
```
|
||||||
|
Development UI will be available at `http://localhost:5173`
|
||||||
|
|
||||||
|
4. **Common development commands**
|
||||||
|
```bash
|
||||||
|
# Backend
|
||||||
|
go test ./... -v # Run tests
|
||||||
|
go test -race ./... -v # Run with race detector
|
||||||
|
go fmt ./... && go vet ./... # Format and vet code
|
||||||
|
|
||||||
|
# Frontend (run from webui/ directory)
|
||||||
|
npm run test:run # Run tests once
|
||||||
|
npm run test # Run tests in watch mode
|
||||||
|
npm run type-check # TypeScript type checking
|
||||||
|
npm run lint:fix # Lint and fix issues
|
||||||
|
```
|
||||||
|
|
||||||
|
## Before Submitting a Pull Request
|
||||||
|
|
||||||
|
### Required Checks
|
||||||
|
|
||||||
|
All the following must pass:
|
||||||
|
|
||||||
|
1. **Backend**
|
||||||
|
```bash
|
||||||
|
go test ./... -v
|
||||||
|
go test -race ./... -v
|
||||||
|
go fmt ./... && go vet ./...
|
||||||
|
go build -o llamactl ./cmd/server
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Frontend**
|
||||||
|
```bash
|
||||||
|
cd webui
|
||||||
|
npm run test:run
|
||||||
|
npm run type-check
|
||||||
|
npm run build
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Documentation
|
||||||
|
|
||||||
|
If changes affect API endpoints, update Swagger documentation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install swag if needed
|
||||||
|
go install github.com/swaggo/swag/cmd/swag@latest
|
||||||
|
|
||||||
|
# Update Swagger comments in pkg/server/handlers.go
|
||||||
|
# Then regenerate docs
|
||||||
|
swag init -g cmd/server/main.go -o apidocs
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pull Request Guidelines
|
||||||
|
|
||||||
|
### Pull Request Titles
|
||||||
|
Use this format for pull request titles:
|
||||||
|
- `feat:` for new features
|
||||||
|
- `fix:` for bug fixes
|
||||||
|
- `docs:` for documentation changes
|
||||||
|
- `test:` for test additions or modifications
|
||||||
|
- `refactor:` for code refactoring
|
||||||
|
|
||||||
|
### Submission Process
|
||||||
|
1. Create a feature branch from `main`
|
||||||
|
2. Make changes following the coding standards
|
||||||
|
3. Run all required checks listed above
|
||||||
|
4. Update documentation if necessary
|
||||||
|
5. Submit pull request with:
|
||||||
|
- Clear description of changes
|
||||||
|
- Reference to any related issues
|
||||||
|
- Screenshots for UI changes
|
||||||
|
|
||||||
|
## Code Style and Testing
|
||||||
|
|
||||||
|
### Testing Strategy
|
||||||
|
- Backend tests use Go's built-in testing framework
|
||||||
|
- Frontend tests use Vitest and React Testing Library
|
||||||
|
- Run tests frequently during development
|
||||||
|
- Add tests for new features and bug fixes
|
||||||
|
|
||||||
|
### Go
|
||||||
|
- Follow standard Go formatting (`go fmt`)
|
||||||
|
- Use meaningful variable and function names
|
||||||
|
- Add comments for exported functions and types
|
||||||
|
- Handle errors appropriately
|
||||||
|
|
||||||
|
### TypeScript/React
|
||||||
|
- Use TypeScript strictly (avoid `any` when possible)
|
||||||
|
- Follow React hooks best practices
|
||||||
|
- Use meaningful component and variable names
|
||||||
|
- Prefer functional components over class components
|
||||||
|
|
||||||
|
## Getting Help
|
||||||
|
|
||||||
|
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
|
||||||
|
- Review the [README.md](README.md) for usage documentation
|
||||||
|
- Look at existing code for patterns and conventions
|
||||||
|
|
||||||
|
Thank you for contributing to Llamactl!
|
||||||
340
README.md
340
README.md
@@ -2,90 +2,140 @@
|
|||||||
|
|
||||||
  
|
  
|
||||||
|
|
||||||
A control server for managing multiple Llama Server instances with a web-based dashboard.
|
**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
|
||||||
|
|
||||||
## Features
|
## Why llamactl?
|
||||||
|
|
||||||
- **Multi-instance Management**: Create, start, stop, restart, and delete multiple llama-server instances
|
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
|
||||||
- **Web Dashboard**: Modern React-based UI for managing instances
|
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
|
||||||
- **Auto-restart**: Configurable automatic restart on instance failure
|
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||||
- **Instance Monitoring**: Real-time health checks and status monitoring
|
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
||||||
- **Log Management**: View, search, and download instance logs
|
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
||||||
- **Data Persistence**: Persistent storage of instance state.
|
⏳ **Idle Timeout Management**: Automatically stop idle instances after a configurable period
|
||||||
- **REST API**: Full API for programmatic control
|
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
|
||||||
- **OpenAI Compatible**: Route requests to instances by instance name
|
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
||||||
- **Configuration Management**: Comprehensive llama-server parameter support
|
|
||||||
- **System Information**: View llama-server version, devices, and help
|
|
||||||
- **API Key Authentication**: Secure access with separate management and inference keys
|
|
||||||
|
|
||||||
## Prerequisites
|

|
||||||
|
|
||||||
This project requires `llama-server` from llama.cpp to be installed and available in your PATH.
|
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
|
||||||
|
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
|
||||||
|
**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
|
||||||
|
|
||||||
**Install llama.cpp:**
|
## Quick Start
|
||||||
Follow the installation instructions at https://github.com/ggml-org/llama.cpp
|
|
||||||
|
```bash
|
||||||
|
# 1. Install llama-server (one-time setup)
|
||||||
|
# See: https://github.com/ggml-org/llama.cpp#quick-start
|
||||||
|
|
||||||
|
# 2. Download and run llamactl
|
||||||
|
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||||
|
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
|
||||||
|
sudo mv llamactl /usr/local/bin/
|
||||||
|
|
||||||
|
# 3. Start the server
|
||||||
|
llamactl
|
||||||
|
# Access dashboard at http://localhost:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Create and manage instances via web dashboard:
|
||||||
|
1. Open http://localhost:8080
|
||||||
|
2. Click "Create Instance"
|
||||||
|
3. Set model path and GPU layers
|
||||||
|
4. Start or stop the instance
|
||||||
|
|
||||||
|
### Or use the REST API:
|
||||||
|
```bash
|
||||||
|
# Create instance
|
||||||
|
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
|
||||||
|
-H "Authorization: Bearer your-key" \
|
||||||
|
-d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
|
||||||
|
|
||||||
|
# Use with OpenAI SDK
|
||||||
|
curl -X POST localhost:8080/v1/chat/completions \
|
||||||
|
-H "Authorization: Bearer your-key" \
|
||||||
|
-d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### Download Prebuilt Binaries
|
### Option 1: Download Binary (Recommended)
|
||||||
|
|
||||||
The easiest way to install llamactl is to download a prebuilt binary from the [releases page](https://github.com/lordmathis/llamactl/releases).
|
|
||||||
|
|
||||||
**Linux/macOS:**
|
|
||||||
```bash
|
```bash
|
||||||
# Download the latest release for your platform
|
# Linux/macOS - Get latest version and download
|
||||||
curl -L https://github.com/lordmathis/llamactl/releases/latest/download/llamactl-$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep tag_name | cut -d '"' -f 4)-linux-amd64.tar.gz | tar -xz
|
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||||
|
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
|
||||||
# Move to PATH
|
|
||||||
sudo mv llamactl /usr/local/bin/
|
sudo mv llamactl /usr/local/bin/
|
||||||
|
|
||||||
# Run the server
|
# Or download manually from the releases page:
|
||||||
llamactl
|
# https://github.com/lordmathis/llamactl/releases/latest
|
||||||
|
|
||||||
|
# Windows - Download from releases page
|
||||||
```
|
```
|
||||||
|
|
||||||
**Manual Download:**
|
### Option 2: Build from Source
|
||||||
1. Go to the [releases page](https://github.com/lordmathis/llamactl/releases)
|
Requires Go 1.24+ and Node.js 22+
|
||||||
2. Download the appropriate archive for your platform
|
|
||||||
3. Extract the archive and move the binary to a directory in your PATH
|
|
||||||
|
|
||||||
### Build from Source
|
|
||||||
|
|
||||||
If you prefer to build from source or need the latest development version:
|
|
||||||
|
|
||||||
#### Build Requirements
|
|
||||||
|
|
||||||
- Go 1.24 or later
|
|
||||||
- Node.js 22 or later (for building the web UI)
|
|
||||||
|
|
||||||
#### Building with Web UI
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clone the repository
|
|
||||||
git clone https://github.com/lordmathis/llamactl.git
|
git clone https://github.com/lordmathis/llamactl.git
|
||||||
cd llamactl
|
cd llamactl
|
||||||
|
cd webui && npm ci && npm run build && cd ..
|
||||||
# Install Node.js dependencies
|
|
||||||
cd webui
|
|
||||||
npm ci
|
|
||||||
|
|
||||||
# Build the web UI
|
|
||||||
npm run build
|
|
||||||
|
|
||||||
# Return to project root and build
|
|
||||||
cd ..
|
|
||||||
go build -o llamactl ./cmd/server
|
go build -o llamactl ./cmd/server
|
||||||
|
```
|
||||||
|
|
||||||
# Run the server
|
## Prerequisites
|
||||||
./llamactl
|
|
||||||
|
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Quick install methods:
|
||||||
|
# Homebrew (macOS)
|
||||||
|
brew install llama.cpp
|
||||||
|
|
||||||
|
# Or build from source - see llama.cpp docs
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
|
llamactl works out of the box with sensible defaults.
|
||||||
|
|
||||||
1. Hardcoded defaults
|
```yaml
|
||||||
2. Configuration file
|
server:
|
||||||
3. Environment variables
|
host: "0.0.0.0" # Server host to bind to
|
||||||
|
port: 8080 # Server port to bind to
|
||||||
|
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
||||||
|
enable_swagger: false # Enable Swagger UI for API docs
|
||||||
|
|
||||||
|
instances:
|
||||||
|
port_range: [8000, 9000] # Port range for instances
|
||||||
|
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
|
||||||
|
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
|
||||||
|
logs_dir: ~/.local/share/llamactl/logs # Logs directory
|
||||||
|
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
|
||||||
|
max_instances: -1 # Max instances (-1 = unlimited)
|
||||||
|
llama_executable: llama-server # Path to llama-server executable
|
||||||
|
default_auto_restart: true # Auto-restart new instances by default
|
||||||
|
default_max_restarts: 3 # Max restarts for new instances
|
||||||
|
default_restart_delay: 5 # Restart delay (seconds) for new instances
|
||||||
|
default_on_demand_start: true # Default on-demand start setting
|
||||||
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
|
timeout_check_interval: 5 # Idle instance timeout check in minutes
|
||||||
|
|
||||||
|
|
||||||
|
auth:
|
||||||
|
require_inference_auth: true # Require auth for inference endpoints
|
||||||
|
inference_keys: [] # Keys for inference endpoints
|
||||||
|
require_management_auth: true # Require auth for management endpoints
|
||||||
|
management_keys: [] # Keys for management endpoints
|
||||||
|
```
|
||||||
|
|
||||||
|
<details><summary><strong>Full Configuration Guide</strong></summary>
|
||||||
|
|
||||||
|
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
|
||||||
|
|
||||||
|
```
|
||||||
|
Defaults < Configuration file < Environment variables
|
||||||
|
```
|
||||||
|
|
||||||
### Configuration Files
|
### Configuration Files
|
||||||
|
|
||||||
@@ -128,16 +178,19 @@ server:
|
|||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
instances:
|
instances:
|
||||||
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
|
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
|
||||||
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
|
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
|
||||||
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
|
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
|
||||||
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
|
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
|
||||||
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
|
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
|
||||||
max_instances: -1 # Maximum instances (-1 = unlimited)
|
max_instances: -1 # Maximum instances (-1 = unlimited)
|
||||||
llama_executable: "llama-server" # Path to llama-server executable
|
llama_executable: "llama-server" # Path to llama-server executable
|
||||||
default_auto_restart: true # Default auto-restart setting
|
default_auto_restart: true # Default auto-restart setting
|
||||||
default_max_restarts: 3 # Default maximum restart attempts
|
default_max_restarts: 3 # Default maximum restart attempts
|
||||||
default_restart_delay: 5 # Default restart delay in seconds
|
default_restart_delay: 5 # Default restart delay in seconds
|
||||||
|
default_on_demand_start: true # Default on-demand start setting
|
||||||
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
|
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
||||||
```
|
```
|
||||||
|
|
||||||
**Environment Variables:**
|
**Environment Variables:**
|
||||||
@@ -151,6 +204,10 @@ instances:
|
|||||||
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
||||||
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
||||||
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
||||||
|
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
|
||||||
|
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
|
||||||
|
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
||||||
|
|
||||||
|
|
||||||
#### Authentication Configuration
|
#### Authentication Configuration
|
||||||
|
|
||||||
@@ -168,147 +225,8 @@ auth:
|
|||||||
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
||||||
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
||||||
|
|
||||||
### Example Configuration
|
</details>
|
||||||
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
host: "0.0.0.0"
|
|
||||||
port: 8080
|
|
||||||
|
|
||||||
instances:
|
|
||||||
port_range: [8001, 8100]
|
|
||||||
data_dir: "/var/lib/llamactl"
|
|
||||||
configs_dir: "/var/lib/llamactl/instances"
|
|
||||||
logs_dir: "/var/log/llamactl"
|
|
||||||
auto_create_dirs: true
|
|
||||||
max_instances: 10
|
|
||||||
llama_executable: "/usr/local/bin/llama-server"
|
|
||||||
default_auto_restart: true
|
|
||||||
default_max_restarts: 5
|
|
||||||
default_restart_delay: 10
|
|
||||||
|
|
||||||
auth:
|
|
||||||
require_inference_auth: true
|
|
||||||
inference_keys: ["sk-inference-abc123"]
|
|
||||||
require_management_auth: true
|
|
||||||
management_keys: ["sk-management-xyz456"]
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Starting the Server
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Start with default configuration
|
|
||||||
./llamactl
|
|
||||||
|
|
||||||
# Start with custom config file
|
|
||||||
LLAMACTL_CONFIG_PATH=/path/to/config.yaml ./llamactl
|
|
||||||
|
|
||||||
# Start with environment variables
|
|
||||||
LLAMACTL_PORT=9090 LLAMACTL_LOG_DIR=/custom/logs ./llamactl
|
|
||||||
```
|
|
||||||
|
|
||||||
### Authentication
|
|
||||||
|
|
||||||
llamactl supports API Key authentication for both management and inference (OpenAI-compatible) endpoints. There are separate keys for management and inference APIs:
|
|
||||||
|
|
||||||
- **Management keys** grant full access to instance management
|
|
||||||
- **Inference keys** grant access to OpenAI-compatible endpoints
|
|
||||||
- Management keys also work for inference endpoints (higher privilege)
|
|
||||||
|
|
||||||
**How to Use:**
|
|
||||||
Pass your API key in requests using one of:
|
|
||||||
- `Authorization: Bearer <key>` header
|
|
||||||
- `X-API-Key: <key>` header
|
|
||||||
- `api_key=<key>` query parameter
|
|
||||||
|
|
||||||
**Auto-generated keys**: If no keys are set and authentication is required, a key will be generated and printed to the terminal at startup. For production, set your own keys in config or environment variables.
|
|
||||||
|
|
||||||
### Web Dashboard
|
|
||||||
|
|
||||||
Open your browser and navigate to `http://localhost:8080` to access the web dashboard.
|
|
||||||
|
|
||||||
### API Usage
|
|
||||||
|
|
||||||
The REST API is available at `http://localhost:8080/api/v1`. See the Swagger documentation at `http://localhost:8080/swagger/` for complete API reference.
|
|
||||||
|
|
||||||
#### Create an Instance
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST http://localhost:8080/api/v1/instances/my-instance \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-management-your-key" \
|
|
||||||
-d '{
|
|
||||||
"model": "/path/to/model.gguf",
|
|
||||||
"gpu_layers": 32,
|
|
||||||
"auto_restart": true
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### List Instances
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -H "Authorization: Bearer sk-management-your-key" \
|
|
||||||
http://localhost:8080/api/v1/instances
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Start/Stop Instance
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Start
|
|
||||||
curl -X POST \
|
|
||||||
-H "Authorization: Bearer sk-management-your-key" \
|
|
||||||
http://localhost:8080/api/v1/instances/my-instance/start
|
|
||||||
|
|
||||||
# Stop
|
|
||||||
curl -X POST \
|
|
||||||
-H "Authorization: Bearer sk-management-your-key" \
|
|
||||||
http://localhost:8080/api/v1/instances/my-instance/stop
|
|
||||||
```
|
|
||||||
|
|
||||||
### OpenAI Compatible Endpoints
|
|
||||||
|
|
||||||
Route requests to instances by including the instance name as the model parameter:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST http://localhost:8080/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-inference-your-key" \
|
|
||||||
-d '{
|
|
||||||
"model": "my-instance",
|
|
||||||
"messages": [{"role": "user", "content": "Hello!"}]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Development
|
|
||||||
|
|
||||||
### Running Tests
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Go tests
|
|
||||||
go test ./...
|
|
||||||
|
|
||||||
# Web UI tests
|
|
||||||
cd webui
|
|
||||||
npm test
|
|
||||||
```
|
|
||||||
|
|
||||||
### Development Server
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Start Go server in development mode
|
|
||||||
go run ./cmd/server
|
|
||||||
|
|
||||||
# Start web UI development server (in another terminal)
|
|
||||||
cd webui
|
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
|
|
||||||
## API Documentation
|
|
||||||
|
|
||||||
Interactive API documentation is available at `http://localhost:8080/swagger/` when the server is running.
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
MIT License - see [LICENSE](LICENSE) file.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Package docs Code generated by swaggo/swag. DO NOT EDIT
|
// Package apidocs Code generated by swaggo/swag. DO NOT EDIT
|
||||||
package docs
|
package apidocs
|
||||||
|
|
||||||
import "github.com/swaggo/swag"
|
import "github.com/swaggo/swag"
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ const docTemplate = `{
|
|||||||
"schema": {
|
"schema": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -75,7 +75,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Instance details",
|
"description": "Instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -120,7 +120,7 @@ const docTemplate = `{
|
|||||||
"in": "body",
|
"in": "body",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -128,7 +128,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Updated instance details",
|
"description": "Updated instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -173,7 +173,7 @@ const docTemplate = `{
|
|||||||
"in": "body",
|
"in": "body",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -181,7 +181,7 @@ const docTemplate = `{
|
|||||||
"201": {
|
"201": {
|
||||||
"description": "Created instance details",
|
"description": "Created instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -401,7 +401,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Restarted instance details",
|
"description": "Restarted instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -444,7 +444,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Started instance details",
|
"description": "Started instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -487,7 +487,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Stopped instance details",
|
"description": "Stopped instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -639,7 +639,35 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "List of OpenAI-compatible instances",
|
"description": "List of OpenAI-compatible instances",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse"
|
"$ref": "#/definitions/server.OpenAIListInstancesResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/version": {
|
||||||
|
"get": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Returns the version of the llamactl command",
|
||||||
|
"tags": [
|
||||||
|
"version"
|
||||||
|
],
|
||||||
|
"summary": "Get llamactl version",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Version information",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"500": {
|
"500": {
|
||||||
@@ -653,7 +681,7 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"definitions": {
|
"definitions": {
|
||||||
"llamactl.CreateInstanceOptions": {
|
"instance.CreateInstanceOptions": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"alias": {
|
"alias": {
|
||||||
@@ -751,7 +779,6 @@ const docTemplate = `{
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"draft_max": {
|
"draft_max": {
|
||||||
"description": "Speculative decoding params",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"draft_min": {
|
"draft_min": {
|
||||||
@@ -955,7 +982,7 @@ const docTemplate = `{
|
|||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"no_context_shift": {
|
"no_context_shift": {
|
||||||
"description": "Server/Example-specific params",
|
"description": "Example-specific params",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"no_escape": {
|
"no_escape": {
|
||||||
@@ -1027,10 +1054,10 @@ const docTemplate = `{
|
|||||||
"presence_penalty": {
|
"presence_penalty": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"priority": {
|
"prio": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"priority_batch": {
|
"prio_batch": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"props": {
|
"props": {
|
||||||
@@ -1101,7 +1128,7 @@ const docTemplate = `{
|
|||||||
"ssl_key_file": {
|
"ssl_key_file": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"temperature": {
|
"temp": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"tensor_split": {
|
"tensor_split": {
|
||||||
@@ -1167,7 +1194,7 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"llamactl.Instance": {
|
"instance.Process": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"created": {
|
"created": {
|
||||||
@@ -1183,7 +1210,7 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"llamactl.OpenAIInstance": {
|
"server.OpenAIInstance": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"created": {
|
"created": {
|
||||||
@@ -1200,13 +1227,13 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"llamactl.OpenAIListInstancesResponse": {
|
"server.OpenAIListInstancesResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"data": {
|
"data": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/llamactl.OpenAIInstance"
|
"$ref": "#/definitions/server.OpenAIInstance"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"object": {
|
"object": {
|
||||||
@@ -30,7 +30,7 @@
|
|||||||
"schema": {
|
"schema": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -68,7 +68,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Instance details",
|
"description": "Instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -113,7 +113,7 @@
|
|||||||
"in": "body",
|
"in": "body",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -121,7 +121,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Updated instance details",
|
"description": "Updated instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -166,7 +166,7 @@
|
|||||||
"in": "body",
|
"in": "body",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
|
"$ref": "#/definitions/instance.CreateInstanceOptions"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -174,7 +174,7 @@
|
|||||||
"201": {
|
"201": {
|
||||||
"description": "Created instance details",
|
"description": "Created instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -394,7 +394,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Restarted instance details",
|
"description": "Restarted instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -437,7 +437,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Started instance details",
|
"description": "Started instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -480,7 +480,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Stopped instance details",
|
"description": "Stopped instance details",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.Instance"
|
"$ref": "#/definitions/instance.Process"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -632,7 +632,35 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "List of OpenAI-compatible instances",
|
"description": "List of OpenAI-compatible instances",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse"
|
"$ref": "#/definitions/server.OpenAIListInstancesResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/version": {
|
||||||
|
"get": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Returns the version of the llamactl command",
|
||||||
|
"tags": [
|
||||||
|
"version"
|
||||||
|
],
|
||||||
|
"summary": "Get llamactl version",
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Version information",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"500": {
|
"500": {
|
||||||
@@ -646,7 +674,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"definitions": {
|
"definitions": {
|
||||||
"llamactl.CreateInstanceOptions": {
|
"instance.CreateInstanceOptions": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"alias": {
|
"alias": {
|
||||||
@@ -744,7 +772,6 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"draft_max": {
|
"draft_max": {
|
||||||
"description": "Speculative decoding params",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"draft_min": {
|
"draft_min": {
|
||||||
@@ -948,7 +975,7 @@
|
|||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"no_context_shift": {
|
"no_context_shift": {
|
||||||
"description": "Server/Example-specific params",
|
"description": "Example-specific params",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"no_escape": {
|
"no_escape": {
|
||||||
@@ -1020,10 +1047,10 @@
|
|||||||
"presence_penalty": {
|
"presence_penalty": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"priority": {
|
"prio": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"priority_batch": {
|
"prio_batch": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"props": {
|
"props": {
|
||||||
@@ -1094,7 +1121,7 @@
|
|||||||
"ssl_key_file": {
|
"ssl_key_file": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"temperature": {
|
"temp": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"tensor_split": {
|
"tensor_split": {
|
||||||
@@ -1160,7 +1187,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"llamactl.Instance": {
|
"instance.Process": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"created": {
|
"created": {
|
||||||
@@ -1176,7 +1203,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"llamactl.OpenAIInstance": {
|
"server.OpenAIInstance": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"created": {
|
"created": {
|
||||||
@@ -1193,13 +1220,13 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"llamactl.OpenAIListInstancesResponse": {
|
"server.OpenAIListInstancesResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"data": {
|
"data": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/llamactl.OpenAIInstance"
|
"$ref": "#/definitions/server.OpenAIInstance"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"object": {
|
"object": {
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
basePath: /api/v1
|
basePath: /api/v1
|
||||||
definitions:
|
definitions:
|
||||||
llamactl.CreateInstanceOptions:
|
instance.CreateInstanceOptions:
|
||||||
properties:
|
properties:
|
||||||
alias:
|
alias:
|
||||||
type: string
|
type: string
|
||||||
@@ -66,7 +66,6 @@ definitions:
|
|||||||
device_draft:
|
device_draft:
|
||||||
type: string
|
type: string
|
||||||
draft_max:
|
draft_max:
|
||||||
description: Speculative decoding params
|
|
||||||
type: integer
|
type: integer
|
||||||
draft_min:
|
draft_min:
|
||||||
type: integer
|
type: integer
|
||||||
@@ -203,7 +202,7 @@ definitions:
|
|||||||
no_cont_batching:
|
no_cont_batching:
|
||||||
type: boolean
|
type: boolean
|
||||||
no_context_shift:
|
no_context_shift:
|
||||||
description: Server/Example-specific params
|
description: Example-specific params
|
||||||
type: boolean
|
type: boolean
|
||||||
no_escape:
|
no_escape:
|
||||||
type: boolean
|
type: boolean
|
||||||
@@ -251,9 +250,9 @@ definitions:
|
|||||||
type: integer
|
type: integer
|
||||||
presence_penalty:
|
presence_penalty:
|
||||||
type: number
|
type: number
|
||||||
priority:
|
prio:
|
||||||
type: integer
|
type: integer
|
||||||
priority_batch:
|
prio_batch:
|
||||||
type: integer
|
type: integer
|
||||||
props:
|
props:
|
||||||
type: boolean
|
type: boolean
|
||||||
@@ -301,7 +300,7 @@ definitions:
|
|||||||
type: string
|
type: string
|
||||||
ssl_key_file:
|
ssl_key_file:
|
||||||
type: string
|
type: string
|
||||||
temperature:
|
temp:
|
||||||
type: number
|
type: number
|
||||||
tensor_split:
|
tensor_split:
|
||||||
type: string
|
type: string
|
||||||
@@ -345,7 +344,7 @@ definitions:
|
|||||||
yarn_orig_ctx:
|
yarn_orig_ctx:
|
||||||
type: integer
|
type: integer
|
||||||
type: object
|
type: object
|
||||||
llamactl.Instance:
|
instance.Process:
|
||||||
properties:
|
properties:
|
||||||
created:
|
created:
|
||||||
description: Creation time
|
description: Creation time
|
||||||
@@ -356,7 +355,7 @@ definitions:
|
|||||||
description: Status
|
description: Status
|
||||||
type: boolean
|
type: boolean
|
||||||
type: object
|
type: object
|
||||||
llamactl.OpenAIInstance:
|
server.OpenAIInstance:
|
||||||
properties:
|
properties:
|
||||||
created:
|
created:
|
||||||
type: integer
|
type: integer
|
||||||
@@ -367,11 +366,11 @@ definitions:
|
|||||||
owned_by:
|
owned_by:
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
llamactl.OpenAIListInstancesResponse:
|
server.OpenAIListInstancesResponse:
|
||||||
properties:
|
properties:
|
||||||
data:
|
data:
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/llamactl.OpenAIInstance'
|
$ref: '#/definitions/server.OpenAIInstance'
|
||||||
type: array
|
type: array
|
||||||
object:
|
object:
|
||||||
type: string
|
type: string
|
||||||
@@ -393,7 +392,7 @@ paths:
|
|||||||
description: List of instances
|
description: List of instances
|
||||||
schema:
|
schema:
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
type: array
|
type: array
|
||||||
"500":
|
"500":
|
||||||
description: Internal Server Error
|
description: Internal Server Error
|
||||||
@@ -441,7 +440,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Instance details
|
description: Instance details
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
"400":
|
"400":
|
||||||
description: Invalid name format
|
description: Invalid name format
|
||||||
schema:
|
schema:
|
||||||
@@ -470,12 +469,12 @@ paths:
|
|||||||
name: options
|
name: options
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.CreateInstanceOptions'
|
$ref: '#/definitions/instance.CreateInstanceOptions'
|
||||||
responses:
|
responses:
|
||||||
"201":
|
"201":
|
||||||
description: Created instance details
|
description: Created instance details
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
"400":
|
"400":
|
||||||
description: Invalid request body
|
description: Invalid request body
|
||||||
schema:
|
schema:
|
||||||
@@ -504,12 +503,12 @@ paths:
|
|||||||
name: options
|
name: options
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.CreateInstanceOptions'
|
$ref: '#/definitions/instance.CreateInstanceOptions'
|
||||||
responses:
|
responses:
|
||||||
"200":
|
"200":
|
||||||
description: Updated instance details
|
description: Updated instance details
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
"400":
|
"400":
|
||||||
description: Invalid name format
|
description: Invalid name format
|
||||||
schema:
|
schema:
|
||||||
@@ -627,7 +626,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Restarted instance details
|
description: Restarted instance details
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
"400":
|
"400":
|
||||||
description: Invalid name format
|
description: Invalid name format
|
||||||
schema:
|
schema:
|
||||||
@@ -654,7 +653,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Started instance details
|
description: Started instance details
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
"400":
|
"400":
|
||||||
description: Invalid name format
|
description: Invalid name format
|
||||||
schema:
|
schema:
|
||||||
@@ -681,7 +680,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Stopped instance details
|
description: Stopped instance details
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.Instance'
|
$ref: '#/definitions/instance.Process'
|
||||||
"400":
|
"400":
|
||||||
description: Invalid name format
|
description: Invalid name format
|
||||||
schema:
|
schema:
|
||||||
@@ -777,7 +776,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: List of OpenAI-compatible instances
|
description: List of OpenAI-compatible instances
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse'
|
$ref: '#/definitions/server.OpenAIListInstancesResponse'
|
||||||
"500":
|
"500":
|
||||||
description: Internal Server Error
|
description: Internal Server Error
|
||||||
schema:
|
schema:
|
||||||
@@ -787,4 +786,21 @@ paths:
|
|||||||
summary: List instances in OpenAI-compatible format
|
summary: List instances in OpenAI-compatible format
|
||||||
tags:
|
tags:
|
||||||
- openai
|
- openai
|
||||||
|
/version:
|
||||||
|
get:
|
||||||
|
description: Returns the version of the llamactl command
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Version information
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
"500":
|
||||||
|
description: Internal Server Error
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
summary: Get llamactl version
|
||||||
|
tags:
|
||||||
|
- version
|
||||||
swagger: "2.0"
|
swagger: "2.0"
|
||||||
@@ -11,6 +11,11 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// version is set at build time using -ldflags "-X main.version=1.0.0"
|
||||||
|
var version string = "unknown"
|
||||||
|
var commitHash string = "unknown"
|
||||||
|
var buildTime string = "unknown"
|
||||||
|
|
||||||
// @title llamactl API
|
// @title llamactl API
|
||||||
// @version 1.0
|
// @version 1.0
|
||||||
// @description llamactl is a control server for managing Llama Server instances.
|
// @description llamactl is a control server for managing Llama Server instances.
|
||||||
@@ -19,6 +24,14 @@ import (
|
|||||||
// @basePath /api/v1
|
// @basePath /api/v1
|
||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
|
// --version flag to print the version
|
||||||
|
if len(os.Args) > 1 && os.Args[1] == "--version" {
|
||||||
|
fmt.Printf("llamactl version: %s\n", version)
|
||||||
|
fmt.Printf("Commit hash: %s\n", commitHash)
|
||||||
|
fmt.Printf("Build time: %s\n", buildTime)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
|
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
|
||||||
cfg, err := config.LoadConfig(configPath)
|
cfg, err := config.LoadConfig(configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -26,6 +39,11 @@ func main() {
|
|||||||
fmt.Println("Using default configuration.")
|
fmt.Println("Using default configuration.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set version information
|
||||||
|
cfg.Version = version
|
||||||
|
cfg.CommitHash = commitHash
|
||||||
|
cfg.BuildTime = buildTime
|
||||||
|
|
||||||
// Create the data directory if it doesn't exist
|
// Create the data directory if it doesn't exist
|
||||||
if cfg.Instances.AutoCreateDirs {
|
if cfg.Instances.AutoCreateDirs {
|
||||||
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
|
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
|
||||||
|
|||||||
BIN
docs/images/screenshot.png
Normal file
BIN
docs/images/screenshot.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 47 KiB |
@@ -15,12 +15,12 @@ type LlamaServerOptions struct {
|
|||||||
CPUMask string `json:"cpu_mask,omitempty"`
|
CPUMask string `json:"cpu_mask,omitempty"`
|
||||||
CPURange string `json:"cpu_range,omitempty"`
|
CPURange string `json:"cpu_range,omitempty"`
|
||||||
CPUStrict int `json:"cpu_strict,omitempty"`
|
CPUStrict int `json:"cpu_strict,omitempty"`
|
||||||
Priority int `json:"priority,omitempty"`
|
Prio int `json:"prio,omitempty"`
|
||||||
Poll int `json:"poll,omitempty"`
|
Poll int `json:"poll,omitempty"`
|
||||||
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
|
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
|
||||||
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
|
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
|
||||||
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
|
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
|
||||||
PriorityBatch int `json:"priority_batch,omitempty"`
|
PrioBatch int `json:"prio_batch,omitempty"`
|
||||||
PollBatch int `json:"poll_batch,omitempty"`
|
PollBatch int `json:"poll_batch,omitempty"`
|
||||||
CtxSize int `json:"ctx_size,omitempty"`
|
CtxSize int `json:"ctx_size,omitempty"`
|
||||||
Predict int `json:"predict,omitempty"`
|
Predict int `json:"predict,omitempty"`
|
||||||
@@ -83,7 +83,7 @@ type LlamaServerOptions struct {
|
|||||||
Seed int `json:"seed,omitempty"`
|
Seed int `json:"seed,omitempty"`
|
||||||
SamplingSeq string `json:"sampling_seq,omitempty"`
|
SamplingSeq string `json:"sampling_seq,omitempty"`
|
||||||
IgnoreEOS bool `json:"ignore_eos,omitempty"`
|
IgnoreEOS bool `json:"ignore_eos,omitempty"`
|
||||||
Temperature float64 `json:"temperature,omitempty"`
|
Temperature float64 `json:"temp,omitempty"`
|
||||||
TopK int `json:"top_k,omitempty"`
|
TopK int `json:"top_k,omitempty"`
|
||||||
TopP float64 `json:"top_p,omitempty"`
|
TopP float64 `json:"top_p,omitempty"`
|
||||||
MinP float64 `json:"min_p,omitempty"`
|
MinP float64 `json:"min_p,omitempty"`
|
||||||
@@ -110,7 +110,7 @@ type LlamaServerOptions struct {
|
|||||||
JSONSchema string `json:"json_schema,omitempty"`
|
JSONSchema string `json:"json_schema,omitempty"`
|
||||||
JSONSchemaFile string `json:"json_schema_file,omitempty"`
|
JSONSchemaFile string `json:"json_schema_file,omitempty"`
|
||||||
|
|
||||||
// Server/Example-specific params
|
// Example-specific params
|
||||||
NoContextShift bool `json:"no_context_shift,omitempty"`
|
NoContextShift bool `json:"no_context_shift,omitempty"`
|
||||||
Special bool `json:"special,omitempty"`
|
Special bool `json:"special,omitempty"`
|
||||||
NoWarmup bool `json:"no_warmup,omitempty"`
|
NoWarmup bool `json:"no_warmup,omitempty"`
|
||||||
@@ -150,17 +150,15 @@ type LlamaServerOptions struct {
|
|||||||
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
|
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
|
||||||
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
|
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
|
||||||
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
|
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
|
||||||
|
DraftMax int `json:"draft_max,omitempty"`
|
||||||
// Speculative decoding params
|
DraftMin int `json:"draft_min,omitempty"`
|
||||||
DraftMax int `json:"draft_max,omitempty"`
|
DraftPMin float64 `json:"draft_p_min,omitempty"`
|
||||||
DraftMin int `json:"draft_min,omitempty"`
|
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
|
||||||
DraftPMin float64 `json:"draft_p_min,omitempty"`
|
DeviceDraft string `json:"device_draft,omitempty"`
|
||||||
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
|
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
|
||||||
DeviceDraft string `json:"device_draft,omitempty"`
|
ModelDraft string `json:"model_draft,omitempty"`
|
||||||
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
|
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
|
||||||
ModelDraft string `json:"model_draft,omitempty"`
|
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
|
||||||
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
|
|
||||||
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
|
|
||||||
|
|
||||||
// Audio/TTS params
|
// Audio/TTS params
|
||||||
ModelVocoder string `json:"model_vocoder,omitempty"`
|
ModelVocoder string `json:"model_vocoder,omitempty"`
|
||||||
@@ -199,62 +197,75 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
|
|||||||
|
|
||||||
// Handle alternative field names
|
// Handle alternative field names
|
||||||
fieldMappings := map[string]string{
|
fieldMappings := map[string]string{
|
||||||
// Official llama-server short forms from the documentation
|
// Common params
|
||||||
"t": "threads", // -t, --threads N
|
"t": "threads", // -t, --threads N
|
||||||
"tb": "threads_batch", // -tb, --threads-batch N
|
"tb": "threads_batch", // -tb, --threads-batch N
|
||||||
"C": "cpu_mask", // -C, --cpu-mask M
|
"C": "cpu_mask", // -C, --cpu-mask M
|
||||||
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
|
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
|
||||||
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
|
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
|
||||||
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
|
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
|
||||||
"c": "ctx_size", // -c, --ctx-size N
|
"c": "ctx_size", // -c, --ctx-size N
|
||||||
"n": "predict", // -n, --predict, --n-predict N
|
"n": "predict", // -n, --predict N
|
||||||
"b": "batch_size", // -b, --batch-size N
|
"n-predict": "predict", // --n-predict N
|
||||||
"ub": "ubatch_size", // -ub, --ubatch-size N
|
"b": "batch_size", // -b, --batch-size N
|
||||||
"fa": "flash_attn", // -fa, --flash-attn
|
"ub": "ubatch_size", // -ub, --ubatch-size N
|
||||||
"e": "escape", // -e, --escape
|
"fa": "flash_attn", // -fa, --flash-attn
|
||||||
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
|
"e": "escape", // -e, --escape
|
||||||
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
|
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
|
||||||
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
|
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
|
||||||
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
|
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
|
||||||
"dt": "defrag_thold", // -dt, --defrag-thold N
|
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
|
||||||
"np": "parallel", // -np, --parallel N
|
"dt": "defrag_thold", // -dt, --defrag-thold N
|
||||||
"dev": "device", // -dev, --device <dev1,dev2,..>
|
"np": "parallel", // -np, --parallel N
|
||||||
"ot": "override_tensor", // --override-tensor, -ot
|
"dev": "device", // -dev, --device <dev1,dev2,..>
|
||||||
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
|
"ot": "override_tensor", // --override-tensor, -ot
|
||||||
"sm": "split_mode", // -sm, --split-mode
|
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
|
||||||
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
|
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
|
||||||
"mg": "main_gpu", // -mg, --main-gpu INDEX
|
"sm": "split_mode", // -sm, --split-mode
|
||||||
"m": "model", // -m, --model FNAME
|
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
|
||||||
"mu": "model_url", // -mu, --model-url MODEL_URL
|
"mg": "main_gpu", // -mg, --main-gpu INDEX
|
||||||
"hf": "hf_repo", // -hf, -hfr, --hf-repo
|
"m": "model", // -m, --model FNAME
|
||||||
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
|
"mu": "model_url", // -mu, --model-url MODEL_URL
|
||||||
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
"hf": "hf_repo", // -hf, -hfr, --hf-repo
|
||||||
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
|
||||||
"hff": "hf_file", // -hff, --hf-file FILE
|
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
||||||
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
||||||
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
"hff": "hf_file", // -hff, --hf-file FILE
|
||||||
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
|
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
||||||
"hft": "hf_token", // -hft, --hf-token TOKEN
|
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
||||||
"v": "verbose", // -v, --verbose, --log-verbose
|
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
|
||||||
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
|
"hft": "hf_token", // -hft, --hf-token TOKEN
|
||||||
"s": "seed", // -s, --seed SEED
|
"v": "verbose", // -v, --verbose, --log-verbose
|
||||||
"temp": "temperature", // --temp N
|
"log-verbose": "verbose", // --log-verbose
|
||||||
"l": "logit_bias", // -l, --logit-bias
|
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
|
||||||
"j": "json_schema", // -j, --json-schema SCHEMA
|
"log-verbosity": "verbosity", // --log-verbosity N
|
||||||
"jf": "json_schema_file", // -jf, --json-schema-file FILE
|
|
||||||
"sp": "special", // -sp, --special
|
// Sampling params
|
||||||
"cb": "cont_batching", // -cb, --cont-batching
|
"s": "seed", // -s, --seed SEED
|
||||||
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
|
"l": "logit_bias", // -l, --logit-bias
|
||||||
"a": "alias", // -a, --alias STRING
|
"j": "json_schema", // -j, --json-schema SCHEMA
|
||||||
"to": "timeout", // -to, --timeout N
|
"jf": "json_schema_file", // -jf, --json-schema-file FILE
|
||||||
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
|
|
||||||
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
|
// Example-specific params
|
||||||
"devd": "device_draft", // -devd, --device-draft
|
"sp": "special", // -sp, --special
|
||||||
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
|
"cb": "cont_batching", // -cb, --cont-batching
|
||||||
"md": "model_draft", // -md, --model-draft FNAME
|
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
|
||||||
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
|
"a": "alias", // -a, --alias STRING
|
||||||
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
|
"embeddings": "embedding", // --embeddings
|
||||||
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
|
"rerank": "reranking", // --reranking
|
||||||
|
"to": "timeout", // -to, --timeout N
|
||||||
|
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
|
||||||
|
"draft": "draft-max", // -draft, --draft-max N
|
||||||
|
"draft-n": "draft-max", // --draft-n-max N
|
||||||
|
"draft-n-min": "draft_min", // --draft-n-min N
|
||||||
|
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
|
||||||
|
"devd": "device_draft", // -devd, --device-draft
|
||||||
|
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
|
||||||
|
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
|
||||||
|
"md": "model_draft", // -md, --model-draft FNAME
|
||||||
|
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
|
||||||
|
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
|
||||||
|
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process alternative field names
|
// Process alternative field names
|
||||||
|
|||||||
@@ -109,13 +109,13 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
|
|||||||
args := options.BuildCommandArgs()
|
args := options.BuildCommandArgs()
|
||||||
|
|
||||||
expectedPairs := map[string]string{
|
expectedPairs := map[string]string{
|
||||||
"--port": "8080",
|
"--port": "8080",
|
||||||
"--threads": "4",
|
"--threads": "4",
|
||||||
"--ctx-size": "2048",
|
"--ctx-size": "2048",
|
||||||
"--gpu-layers": "16",
|
"--gpu-layers": "16",
|
||||||
"--temperature": "0.7",
|
"--temp": "0.7",
|
||||||
"--top-k": "40",
|
"--top-k": "40",
|
||||||
"--top-p": "0.9",
|
"--top-p": "0.9",
|
||||||
}
|
}
|
||||||
|
|
||||||
for flag, expectedValue := range expectedPairs {
|
for flag, expectedValue := range expectedPairs {
|
||||||
@@ -231,7 +231,7 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
|
|||||||
"verbose": true,
|
"verbose": true,
|
||||||
"ctx_size": 4096,
|
"ctx_size": 4096,
|
||||||
"gpu_layers": 32,
|
"gpu_layers": 32,
|
||||||
"temperature": 0.7
|
"temp": 0.7
|
||||||
}`
|
}`
|
||||||
|
|
||||||
var options llamacpp.LlamaServerOptions
|
var options llamacpp.LlamaServerOptions
|
||||||
|
|||||||
@@ -12,9 +12,12 @@ import (
|
|||||||
|
|
||||||
// AppConfig represents the configuration for llamactl
|
// AppConfig represents the configuration for llamactl
|
||||||
type AppConfig struct {
|
type AppConfig struct {
|
||||||
Server ServerConfig `yaml:"server"`
|
Server ServerConfig `yaml:"server"`
|
||||||
Instances InstancesConfig `yaml:"instances"`
|
Instances InstancesConfig `yaml:"instances"`
|
||||||
Auth AuthConfig `yaml:"auth"`
|
Auth AuthConfig `yaml:"auth"`
|
||||||
|
Version string `yaml:"-"`
|
||||||
|
CommitHash string `yaml:"-"`
|
||||||
|
BuildTime string `yaml:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ServerConfig contains HTTP server configuration
|
// ServerConfig contains HTTP server configuration
|
||||||
@@ -63,6 +66,15 @@ type InstancesConfig struct {
|
|||||||
|
|
||||||
// Default restart delay for new instances (in seconds)
|
// Default restart delay for new instances (in seconds)
|
||||||
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
DefaultRestartDelay int `yaml:"default_restart_delay"`
|
||||||
|
|
||||||
|
// Default on-demand start setting for new instances
|
||||||
|
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
|
||||||
|
|
||||||
|
// How long to wait for an instance to start on demand (in seconds)
|
||||||
|
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
|
||||||
|
|
||||||
|
// Interval for checking instance timeouts (in minutes)
|
||||||
|
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AuthConfig contains authentication settings
|
// AuthConfig contains authentication settings
|
||||||
@@ -95,16 +107,19 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
EnableSwagger: false,
|
EnableSwagger: false,
|
||||||
},
|
},
|
||||||
Instances: InstancesConfig{
|
Instances: InstancesConfig{
|
||||||
PortRange: [2]int{8000, 9000},
|
PortRange: [2]int{8000, 9000},
|
||||||
DataDir: getDefaultDataDirectory(),
|
DataDir: getDefaultDataDirectory(),
|
||||||
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
|
||||||
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
|
||||||
AutoCreateDirs: true,
|
AutoCreateDirs: true,
|
||||||
MaxInstances: -1, // -1 means unlimited
|
MaxInstances: -1, // -1 means unlimited
|
||||||
LlamaExecutable: "llama-server",
|
LlamaExecutable: "llama-server",
|
||||||
DefaultAutoRestart: true,
|
DefaultAutoRestart: true,
|
||||||
DefaultMaxRestarts: 3,
|
DefaultMaxRestarts: 3,
|
||||||
DefaultRestartDelay: 5,
|
DefaultRestartDelay: 5,
|
||||||
|
DefaultOnDemandStart: true,
|
||||||
|
OnDemandStartTimeout: 120, // 2 minutes
|
||||||
|
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
|
||||||
},
|
},
|
||||||
Auth: AuthConfig{
|
Auth: AuthConfig{
|
||||||
RequireInferenceAuth: true,
|
RequireInferenceAuth: true,
|
||||||
@@ -214,6 +229,21 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
cfg.Instances.DefaultRestartDelay = seconds
|
cfg.Instances.DefaultRestartDelay = seconds
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
|
||||||
|
if b, err := strconv.ParseBool(onDemandStart); err == nil {
|
||||||
|
cfg.Instances.DefaultOnDemandStart = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
|
||||||
|
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
|
||||||
|
cfg.Instances.OnDemandStartTimeout = seconds
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
|
||||||
|
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
|
||||||
|
cfg.Instances.TimeoutCheckInterval = minutes
|
||||||
|
}
|
||||||
|
}
|
||||||
// Auth config
|
// Auth config
|
||||||
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
|
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
|
||||||
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
|
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
|
||||||
|
|||||||
@@ -13,16 +13,32 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TimeProvider interface allows for testing with mock time
|
||||||
|
type TimeProvider interface {
|
||||||
|
Now() time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// realTimeProvider implements TimeProvider using the actual time
|
||||||
|
type realTimeProvider struct{}
|
||||||
|
|
||||||
|
func (realTimeProvider) Now() time.Time {
|
||||||
|
return time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
type CreateInstanceOptions struct {
|
type CreateInstanceOptions struct {
|
||||||
// Auto restart
|
// Auto restart
|
||||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||||
// RestartDelay duration in seconds
|
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||||
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
|
// On demand start
|
||||||
|
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||||
|
// Idle timeout
|
||||||
|
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||||
|
// LlamaServerOptions contains the options for the llama server
|
||||||
llamacpp.LlamaServerOptions `json:",inline"`
|
llamacpp.LlamaServerOptions `json:",inline"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -32,9 +48,11 @@ type CreateInstanceOptions struct {
|
|||||||
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
||||||
// First, unmarshal into a temporary struct without the embedded type
|
// First, unmarshal into a temporary struct without the embedded type
|
||||||
type tempCreateOptions struct {
|
type tempCreateOptions struct {
|
||||||
AutoRestart *bool `json:"auto_restart,omitempty"`
|
AutoRestart *bool `json:"auto_restart,omitempty"`
|
||||||
MaxRestarts *int `json:"max_restarts,omitempty"`
|
MaxRestarts *int `json:"max_restarts,omitempty"`
|
||||||
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
|
RestartDelay *int `json:"restart_delay,omitempty"`
|
||||||
|
OnDemandStart *bool `json:"on_demand_start,omitempty"`
|
||||||
|
IdleTimeout *int `json:"idle_timeout,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var temp tempCreateOptions
|
var temp tempCreateOptions
|
||||||
@@ -46,6 +64,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
|
|||||||
c.AutoRestart = temp.AutoRestart
|
c.AutoRestart = temp.AutoRestart
|
||||||
c.MaxRestarts = temp.MaxRestarts
|
c.MaxRestarts = temp.MaxRestarts
|
||||||
c.RestartDelay = temp.RestartDelay
|
c.RestartDelay = temp.RestartDelay
|
||||||
|
c.OnDemandStart = temp.OnDemandStart
|
||||||
|
c.IdleTimeout = temp.IdleTimeout
|
||||||
|
|
||||||
// Now unmarshal the embedded LlamaServerOptions
|
// Now unmarshal the embedded LlamaServerOptions
|
||||||
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
|
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
|
||||||
@@ -83,6 +103,10 @@ type Process struct {
|
|||||||
// Restart control
|
// Restart control
|
||||||
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
|
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
|
||||||
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
|
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
|
||||||
|
|
||||||
|
// Timeout management
|
||||||
|
lastRequestTime atomic.Int64 // Unix timestamp of last request
|
||||||
|
timeProvider TimeProvider `json:"-"` // Time provider for testing
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateAndCopyOptions validates and creates a deep copy of the provided options
|
// validateAndCopyOptions validates and creates a deep copy of the provided options
|
||||||
@@ -117,6 +141,20 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
|
|||||||
}
|
}
|
||||||
optionsCopy.RestartDelay = &restartDelay
|
optionsCopy.RestartDelay = &restartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.OnDemandStart != nil {
|
||||||
|
onDemandStart := *options.OnDemandStart
|
||||||
|
optionsCopy.OnDemandStart = &onDemandStart
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.IdleTimeout != nil {
|
||||||
|
idleTimeout := *options.IdleTimeout
|
||||||
|
if idleTimeout < 0 {
|
||||||
|
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
|
||||||
|
idleTimeout = 0
|
||||||
|
}
|
||||||
|
optionsCopy.IdleTimeout = &idleTimeout
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return optionsCopy
|
return optionsCopy
|
||||||
@@ -142,6 +180,16 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.
|
|||||||
defaultRestartDelay := globalSettings.DefaultRestartDelay
|
defaultRestartDelay := globalSettings.DefaultRestartDelay
|
||||||
options.RestartDelay = &defaultRestartDelay
|
options.RestartDelay = &defaultRestartDelay
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if options.OnDemandStart == nil {
|
||||||
|
defaultOnDemandStart := globalSettings.DefaultOnDemandStart
|
||||||
|
options.OnDemandStart = &defaultOnDemandStart
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.IdleTimeout == nil {
|
||||||
|
defaultIdleTimeout := 0
|
||||||
|
options.IdleTimeout = &defaultIdleTimeout
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstance creates a new instance with the given name, log path, and options
|
// NewInstance creates a new instance with the given name, log path, and options
|
||||||
@@ -158,10 +206,8 @@ func NewInstance(name string, globalSettings *config.InstancesConfig, options *C
|
|||||||
options: optionsCopy,
|
options: optionsCopy,
|
||||||
globalSettings: globalSettings,
|
globalSettings: globalSettings,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
|
timeProvider: realTimeProvider{},
|
||||||
Running: false,
|
Created: time.Now().Unix(),
|
||||||
|
|
||||||
Created: time.Now().Unix(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,6 +235,11 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
|
|||||||
i.proxy = nil
|
i.proxy = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTimeProvider sets a custom time provider for testing
|
||||||
|
func (i *Process) SetTimeProvider(tp TimeProvider) {
|
||||||
|
i.timeProvider = tp
|
||||||
|
}
|
||||||
|
|
||||||
// GetProxy returns the reverse proxy for this instance, creating it if needed
|
// GetProxy returns the reverse proxy for this instance, creating it if needed
|
||||||
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||||
i.mu.Lock()
|
i.mu.Lock()
|
||||||
|
|||||||
@@ -91,38 +91,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
|
|
||||||
globalSettings := &config.InstancesConfig{
|
|
||||||
LogsDir: "/tmp/test",
|
|
||||||
DefaultAutoRestart: true,
|
|
||||||
DefaultMaxRestarts: 3,
|
|
||||||
DefaultRestartDelay: 5,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test with invalid negative values
|
|
||||||
invalidMaxRestarts := -5
|
|
||||||
invalidRestartDelay := -10
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
MaxRestarts: &invalidMaxRestarts,
|
|
||||||
RestartDelay: &invalidRestartDelay,
|
|
||||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
|
||||||
Model: "/path/to/model.gguf",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
instance := instance.NewInstance("test-instance", globalSettings, options)
|
|
||||||
opts := instance.GetOptions()
|
|
||||||
|
|
||||||
// Check that negative values were corrected to 0
|
|
||||||
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
|
|
||||||
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
|
|
||||||
}
|
|
||||||
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
|
|
||||||
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSetOptions(t *testing.T) {
|
func TestSetOptions(t *testing.T) {
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
LogsDir: "/tmp/test",
|
LogsDir: "/tmp/test",
|
||||||
@@ -164,33 +132,6 @@ func TestSetOptions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSetOptions_NilOptions(t *testing.T) {
|
|
||||||
globalSettings := &config.InstancesConfig{
|
|
||||||
LogsDir: "/tmp/test",
|
|
||||||
DefaultAutoRestart: true,
|
|
||||||
DefaultMaxRestarts: 3,
|
|
||||||
DefaultRestartDelay: 5,
|
|
||||||
}
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
|
||||||
Model: "/path/to/model.gguf",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
instance := instance.NewInstance("test-instance", globalSettings, options)
|
|
||||||
originalOptions := instance.GetOptions()
|
|
||||||
|
|
||||||
// Try to set nil options
|
|
||||||
instance.SetOptions(nil)
|
|
||||||
|
|
||||||
// Options should remain unchanged
|
|
||||||
currentOptions := instance.GetOptions()
|
|
||||||
if currentOptions.Model != originalOptions.Model {
|
|
||||||
t.Error("Options should not change when setting nil options")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetProxy(t *testing.T) {
|
func TestGetProxy(t *testing.T) {
|
||||||
globalSettings := &config.InstancesConfig{
|
globalSettings := &config.InstancesConfig{
|
||||||
LogsDir: "/tmp/test",
|
LogsDir: "/tmp/test",
|
||||||
@@ -317,58 +258,6 @@ func TestUnmarshalJSON(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
|
|
||||||
jsonData := `{
|
|
||||||
"name": "test-instance",
|
|
||||||
"running": false,
|
|
||||||
"options": {
|
|
||||||
"model": "/path/to/model.gguf"
|
|
||||||
}
|
|
||||||
}`
|
|
||||||
|
|
||||||
var inst instance.Process
|
|
||||||
err := json.Unmarshal([]byte(jsonData), &inst)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("JSON unmarshal failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
opts := inst.GetOptions()
|
|
||||||
if opts.Model != "/path/to/model.gguf" {
|
|
||||||
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: Defaults are NOT applied during unmarshaling
|
|
||||||
// They should only be applied by NewInstance or SetOptions
|
|
||||||
if opts.AutoRestart != nil {
|
|
||||||
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUnmarshalJSON_NoOptions(t *testing.T) {
|
|
||||||
jsonData := `{
|
|
||||||
"name": "test-instance",
|
|
||||||
"running": false
|
|
||||||
}`
|
|
||||||
|
|
||||||
var inst instance.Process
|
|
||||||
err := json.Unmarshal([]byte(jsonData), &inst)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("JSON unmarshal failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if inst.Name != "test-instance" {
|
|
||||||
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
|
|
||||||
}
|
|
||||||
if inst.Running {
|
|
||||||
t.Error("Expected running to be false")
|
|
||||||
}
|
|
||||||
|
|
||||||
opts := inst.GetOptions()
|
|
||||||
if opts != nil {
|
|
||||||
t.Error("Expected options to be nil when not provided in JSON")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCreateInstanceOptionsValidation(t *testing.T) {
|
func TestCreateInstanceOptionsValidation(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
@@ -377,13 +266,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
|||||||
expectedMax int
|
expectedMax int
|
||||||
expectedDelay int
|
expectedDelay int
|
||||||
}{
|
}{
|
||||||
{
|
|
||||||
name: "nil values",
|
|
||||||
maxRestarts: nil,
|
|
||||||
restartDelay: nil,
|
|
||||||
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
|
|
||||||
expectedDelay: 0,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
name: "valid positive values",
|
name: "valid positive values",
|
||||||
maxRestarts: testutil.IntPtr(10),
|
maxRestarts: testutil.IntPtr(10),
|
||||||
@@ -424,20 +306,16 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
|||||||
instance := instance.NewInstance("test", globalSettings, options)
|
instance := instance.NewInstance("test", globalSettings, options)
|
||||||
opts := instance.GetOptions()
|
opts := instance.GetOptions()
|
||||||
|
|
||||||
if tt.maxRestarts != nil {
|
if opts.MaxRestarts == nil {
|
||||||
if opts.MaxRestarts == nil {
|
t.Error("Expected MaxRestarts to be set")
|
||||||
t.Error("Expected MaxRestarts to be set")
|
} else if *opts.MaxRestarts != tt.expectedMax {
|
||||||
} else if *opts.MaxRestarts != tt.expectedMax {
|
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
|
||||||
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if tt.restartDelay != nil {
|
if opts.RestartDelay == nil {
|
||||||
if opts.RestartDelay == nil {
|
t.Error("Expected RestartDelay to be set")
|
||||||
t.Error("Expected RestartDelay to be set")
|
} else if *opts.RestartDelay != tt.expectedDelay {
|
||||||
} else if *opts.RestartDelay != tt.expectedDelay {
|
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
|
||||||
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"net/http"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
"syscall"
|
"syscall"
|
||||||
@@ -30,6 +31,9 @@ func (i *Process) Start() error {
|
|||||||
i.restarts = 0
|
i.restarts = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize last request time to current time when starting
|
||||||
|
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
|
||||||
|
|
||||||
// Create log files
|
// Create log files
|
||||||
if err := i.logger.Create(); err != nil {
|
if err := i.logger.Create(); err != nil {
|
||||||
return fmt.Errorf("failed to create log files: %w", err)
|
return fmt.Errorf("failed to create log files: %w", err)
|
||||||
@@ -140,6 +144,74 @@ func (i *Process) Stop() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Process) WaitForHealthy(timeout int) error {
|
||||||
|
if !i.Running {
|
||||||
|
return fmt.Errorf("instance %s is not running", i.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if timeout <= 0 {
|
||||||
|
timeout = 30 // Default to 30 seconds if no timeout is specified
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Get instance options to build the health check URL
|
||||||
|
opts := i.GetOptions()
|
||||||
|
if opts == nil {
|
||||||
|
return fmt.Errorf("instance %s has no options set", i.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the health check URL directly
|
||||||
|
host := opts.Host
|
||||||
|
if host == "" {
|
||||||
|
host = "localhost"
|
||||||
|
}
|
||||||
|
healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
|
||||||
|
|
||||||
|
// Create a dedicated HTTP client for health checks
|
||||||
|
client := &http.Client{
|
||||||
|
Timeout: 5 * time.Second, // 5 second timeout per request
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to check health directly
|
||||||
|
checkHealth := func() bool {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
return resp.StatusCode == http.StatusOK
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try immediate check first
|
||||||
|
if checkHealth() {
|
||||||
|
return nil // Instance is healthy
|
||||||
|
}
|
||||||
|
|
||||||
|
// If immediate check failed, start polling
|
||||||
|
ticker := time.NewTicker(1 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
|
||||||
|
case <-ticker.C:
|
||||||
|
if checkHealth() {
|
||||||
|
return nil // Instance is healthy
|
||||||
|
}
|
||||||
|
// Continue polling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (i *Process) monitorProcess() {
|
func (i *Process) monitorProcess() {
|
||||||
defer func() {
|
defer func() {
|
||||||
i.mu.Lock()
|
i.mu.Lock()
|
||||||
|
|||||||
28
pkg/instance/timeout.go
Normal file
28
pkg/instance/timeout.go
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
package instance
|
||||||
|
|
||||||
|
// UpdateLastRequestTime updates the last request access time for the instance via proxy
|
||||||
|
func (i *Process) UpdateLastRequestTime() {
|
||||||
|
i.mu.Lock()
|
||||||
|
defer i.mu.Unlock()
|
||||||
|
|
||||||
|
lastRequestTime := i.timeProvider.Now().Unix()
|
||||||
|
i.lastRequestTime.Store(lastRequestTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *Process) ShouldTimeout() bool {
|
||||||
|
i.mu.RLock()
|
||||||
|
defer i.mu.RUnlock()
|
||||||
|
|
||||||
|
if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the last request time exceeds the idle timeout
|
||||||
|
lastRequest := i.lastRequestTime.Load()
|
||||||
|
idleTimeoutMinutes := *i.options.IdleTimeout
|
||||||
|
|
||||||
|
// Convert timeout from minutes to seconds for comparison
|
||||||
|
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
|
||||||
|
|
||||||
|
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
|
||||||
|
}
|
||||||
195
pkg/instance/timeout_test.go
Normal file
195
pkg/instance/timeout_test.go
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
package instance_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"llamactl/pkg/backends/llamacpp"
|
||||||
|
"llamactl/pkg/config"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"llamactl/pkg/testutil"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MockTimeProvider implements TimeProvider for testing
|
||||||
|
type MockTimeProvider struct {
|
||||||
|
currentTime atomic.Int64 // Unix timestamp
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
|
||||||
|
m := &MockTimeProvider{}
|
||||||
|
m.currentTime.Store(t.Unix())
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockTimeProvider) Now() time.Time {
|
||||||
|
return time.Unix(m.currentTime.Load(), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MockTimeProvider) SetTime(t time.Time) {
|
||||||
|
m.currentTime.Store(t.Unix())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timeout-related tests
|
||||||
|
|
||||||
|
func TestUpdateLastRequestTime(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
|
||||||
|
// Test that UpdateLastRequestTime doesn't panic
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_NotRunning(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
idleTimeout := 1 // 1 minute
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: &idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
|
||||||
|
// Instance is not running, should not timeout regardless of configuration
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
t.Error("Non-running instance should never timeout")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
idleTimeout *int
|
||||||
|
}{
|
||||||
|
{"nil timeout", nil},
|
||||||
|
{"zero timeout", testutil.IntPtr(0)},
|
||||||
|
{"negative timeout", testutil.IntPtr(-5)},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: tt.idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
// Simulate running state
|
||||||
|
inst.Running = true
|
||||||
|
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
t.Errorf("Instance with %s should not timeout", tt.name)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
idleTimeout := 5 // 5 minutes
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: &idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
inst.Running = true
|
||||||
|
|
||||||
|
// Update last request time to now
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Should not timeout immediately
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
t.Error("Instance should not timeout when last request was recent")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
idleTimeout := 1 // 1 minute
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: &idleTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
inst.Running = true
|
||||||
|
|
||||||
|
// Use MockTimeProvider to simulate old last request time
|
||||||
|
mockTime := NewMockTimeProvider(time.Now())
|
||||||
|
inst.SetTimeProvider(mockTime)
|
||||||
|
|
||||||
|
// Set last request time to now
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Advance time by 2 minutes (exceeds 1 minute timeout)
|
||||||
|
mockTime.SetTime(time.Now().Add(2 * time.Minute))
|
||||||
|
|
||||||
|
if !inst.ShouldTimeout() {
|
||||||
|
t.Error("Instance should timeout when last request exceeds idle timeout")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTimeoutConfiguration_Validation(t *testing.T) {
|
||||||
|
globalSettings := &config.InstancesConfig{
|
||||||
|
LogsDir: "/tmp/test",
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
inputTimeout *int
|
||||||
|
expectedTimeout int
|
||||||
|
}{
|
||||||
|
{"default value when nil", nil, 0},
|
||||||
|
{"positive value", testutil.IntPtr(10), 10},
|
||||||
|
{"zero value", testutil.IntPtr(0), 0},
|
||||||
|
{"negative value gets corrected", testutil.IntPtr(-5), 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
IdleTimeout: tt.inputTimeout,
|
||||||
|
LlamaServerOptions: llamacpp.LlamaServerOptions{
|
||||||
|
Model: "/path/to/model.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
inst := instance.NewInstance("test-instance", globalSettings, options)
|
||||||
|
opts := inst.GetOptions()
|
||||||
|
|
||||||
|
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
|
||||||
|
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// InstanceManager defines the interface for managing instances of the llama server.
|
// InstanceManager defines the interface for managing instances of the llama server.
|
||||||
@@ -31,20 +32,48 @@ type instanceManager struct {
|
|||||||
instances map[string]*instance.Process
|
instances map[string]*instance.Process
|
||||||
ports map[int]bool
|
ports map[int]bool
|
||||||
instancesConfig config.InstancesConfig
|
instancesConfig config.InstancesConfig
|
||||||
|
|
||||||
|
// Timeout checker
|
||||||
|
timeoutChecker *time.Ticker
|
||||||
|
shutdownChan chan struct{}
|
||||||
|
shutdownDone chan struct{}
|
||||||
|
isShutdown bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstanceManager creates a new instance of InstanceManager.
|
// NewInstanceManager creates a new instance of InstanceManager.
|
||||||
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
|
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
|
||||||
|
if instancesConfig.TimeoutCheckInterval <= 0 {
|
||||||
|
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
||||||
|
}
|
||||||
im := &instanceManager{
|
im := &instanceManager{
|
||||||
instances: make(map[string]*instance.Process),
|
instances: make(map[string]*instance.Process),
|
||||||
ports: make(map[int]bool),
|
ports: make(map[int]bool),
|
||||||
instancesConfig: instancesConfig,
|
instancesConfig: instancesConfig,
|
||||||
|
|
||||||
|
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
||||||
|
shutdownChan: make(chan struct{}),
|
||||||
|
shutdownDone: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load existing instances from disk
|
// Load existing instances from disk
|
||||||
if err := im.loadInstances(); err != nil {
|
if err := im.loadInstances(); err != nil {
|
||||||
log.Printf("Error loading instances: %v", err)
|
log.Printf("Error loading instances: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start the timeout checker goroutine after initialization is complete
|
||||||
|
go func() {
|
||||||
|
defer close(im.shutdownDone)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-im.timeoutChecker.C:
|
||||||
|
im.checkAllTimeouts()
|
||||||
|
case <-im.shutdownChan:
|
||||||
|
return // Exit goroutine on shutdown
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
return im
|
return im
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,6 +123,27 @@ func (im *instanceManager) Shutdown() {
|
|||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
|
// Check if already shutdown
|
||||||
|
if im.isShutdown {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
im.isShutdown = true
|
||||||
|
|
||||||
|
// Signal the timeout checker to stop
|
||||||
|
close(im.shutdownChan)
|
||||||
|
|
||||||
|
// Release lock temporarily to wait for goroutine
|
||||||
|
im.mu.Unlock()
|
||||||
|
// Wait for the timeout checker goroutine to actually stop
|
||||||
|
<-im.shutdownDone
|
||||||
|
// Reacquire lock
|
||||||
|
im.mu.Lock()
|
||||||
|
|
||||||
|
// Now stop the ticker
|
||||||
|
if im.timeoutChecker != nil {
|
||||||
|
im.timeoutChecker.Stop()
|
||||||
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(im.instances))
|
wg.Add(len(im.instances))
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -27,10 +27,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
return nil, fmt.Errorf("instance options cannot be nil")
|
return nil, fmt.Errorf("instance options cannot be nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
|
||||||
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
|
||||||
}
|
|
||||||
|
|
||||||
name, err := validation.ValidateInstanceName(name)
|
name, err := validation.ValidateInstanceName(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -44,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
|
// Check max instances limit after acquiring the lock
|
||||||
|
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
||||||
|
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
||||||
|
}
|
||||||
|
|
||||||
// Check if instance with this name already exists
|
// Check if instance with this name already exists
|
||||||
if im.instances[name] != nil {
|
if im.instances[name] != nil {
|
||||||
return nil, fmt.Errorf("instance with name %s already exists", name)
|
return nil, fmt.Errorf("instance with name %s already exists", name)
|
||||||
|
|||||||
26
pkg/manager/timeout.go
Normal file
26
pkg/manager/timeout.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package manager
|
||||||
|
|
||||||
|
import "log"
|
||||||
|
|
||||||
|
func (im *instanceManager) checkAllTimeouts() {
|
||||||
|
im.mu.RLock()
|
||||||
|
var timeoutInstances []string
|
||||||
|
|
||||||
|
// Identify instances that should timeout
|
||||||
|
for _, inst := range im.instances {
|
||||||
|
if inst.ShouldTimeout() {
|
||||||
|
timeoutInstances = append(timeoutInstances, inst.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
im.mu.RUnlock() // Release read lock before calling StopInstance
|
||||||
|
|
||||||
|
// Stop the timed-out instances
|
||||||
|
for _, name := range timeoutInstances {
|
||||||
|
log.Printf("Instance %s has timed out, stopping it", name)
|
||||||
|
if _, err := im.StopInstance(name); err != nil {
|
||||||
|
log.Printf("Error stopping instance %s: %v", name, err)
|
||||||
|
} else {
|
||||||
|
log.Printf("Instance %s stopped successfully", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -28,7 +28,23 @@ func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// HelpHandler godoc
|
// VersionHandler godoc
|
||||||
|
// @Summary Get llamactl version
|
||||||
|
// @Description Returns the version of the llamactl command
|
||||||
|
// @Tags version
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Version information"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /version [get]
|
||||||
|
func (h *Handler) VersionHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerHelpHandler godoc
|
||||||
// @Summary Get help for llama server
|
// @Summary Get help for llama server
|
||||||
// @Description Returns the help text for the llama server command
|
// @Description Returns the help text for the llama server command
|
||||||
// @Tags server
|
// @Tags server
|
||||||
@@ -37,7 +53,7 @@ func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
|||||||
// @Success 200 {string} string "Help text"
|
// @Success 200 {string} string "Help text"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /server/help [get]
|
// @Router /server/help [get]
|
||||||
func (h *Handler) HelpHandler() http.HandlerFunc {
|
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
helpCmd := exec.Command("llama-server", "--help")
|
helpCmd := exec.Command("llama-server", "--help")
|
||||||
output, err := helpCmd.CombinedOutput()
|
output, err := helpCmd.CombinedOutput()
|
||||||
@@ -50,7 +66,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// VersionHandler godoc
|
// LlamaServerVersionHandler godoc
|
||||||
// @Summary Get version of llama server
|
// @Summary Get version of llama server
|
||||||
// @Description Returns the version of the llama server command
|
// @Description Returns the version of the llama server command
|
||||||
// @Tags server
|
// @Tags server
|
||||||
@@ -59,7 +75,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
|
|||||||
// @Success 200 {string} string "Version information"
|
// @Success 200 {string} string "Version information"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /server/version [get]
|
// @Router /server/version [get]
|
||||||
func (h *Handler) VersionHandler() http.HandlerFunc {
|
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
versionCmd := exec.Command("llama-server", "--version")
|
versionCmd := exec.Command("llama-server", "--version")
|
||||||
output, err := versionCmd.CombinedOutput()
|
output, err := versionCmd.CombinedOutput()
|
||||||
@@ -72,7 +88,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListDevicesHandler godoc
|
// LlamaServerListDevicesHandler godoc
|
||||||
// @Summary List available devices for llama server
|
// @Summary List available devices for llama server
|
||||||
// @Description Returns a list of available devices for the llama server
|
// @Description Returns a list of available devices for the llama server
|
||||||
// @Tags server
|
// @Tags server
|
||||||
@@ -81,7 +97,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
|
|||||||
// @Success 200 {string} string "List of devices"
|
// @Success 200 {string} string "List of devices"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /server/devices [get]
|
// @Router /server/devices [get]
|
||||||
func (h *Handler) ListDevicesHandler() http.HandlerFunc {
|
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
listCmd := exec.Command("llama-server", "--list-devices")
|
listCmd := exec.Command("llama-server", "--list-devices")
|
||||||
output, err := listCmd.CombinedOutput()
|
output, err := listCmd.CombinedOutput()
|
||||||
@@ -100,7 +116,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
|
|||||||
// @Tags instances
|
// @Tags instances
|
||||||
// @Security ApiKeyAuth
|
// @Security ApiKeyAuth
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Success 200 {array} Instance "List of instances"
|
// @Success 200 {array} instance.Process "List of instances"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances [get]
|
// @Router /instances [get]
|
||||||
func (h *Handler) ListInstances() http.HandlerFunc {
|
func (h *Handler) ListInstances() http.HandlerFunc {
|
||||||
@@ -127,8 +143,8 @@ func (h *Handler) ListInstances() http.HandlerFunc {
|
|||||||
// @Accept json
|
// @Accept json
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Param name path string true "Instance Name"
|
// @Param name path string true "Instance Name"
|
||||||
// @Param options body CreateInstanceOptions true "Instance configuration options"
|
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
||||||
// @Success 201 {object} Instance "Created instance details"
|
// @Success 201 {object} instance.Process "Created instance details"
|
||||||
// @Failure 400 {string} string "Invalid request body"
|
// @Failure 400 {string} string "Invalid request body"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances/{name} [post]
|
// @Router /instances/{name} [post]
|
||||||
@@ -168,7 +184,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
|
|||||||
// @Security ApiKeyAuth
|
// @Security ApiKeyAuth
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Param name path string true "Instance Name"
|
// @Param name path string true "Instance Name"
|
||||||
// @Success 200 {object} Instance "Instance details"
|
// @Success 200 {object} instance.Process "Instance details"
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances/{name} [get]
|
// @Router /instances/{name} [get]
|
||||||
@@ -202,8 +218,8 @@ func (h *Handler) GetInstance() http.HandlerFunc {
|
|||||||
// @Accept json
|
// @Accept json
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Param name path string true "Instance Name"
|
// @Param name path string true "Instance Name"
|
||||||
// @Param options body CreateInstanceOptions true "Instance configuration options"
|
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
||||||
// @Success 200 {object} Instance "Updated instance details"
|
// @Success 200 {object} instance.Process "Updated instance details"
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances/{name} [put]
|
// @Router /instances/{name} [put]
|
||||||
@@ -242,7 +258,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
|
|||||||
// @Security ApiKeyAuth
|
// @Security ApiKeyAuth
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Param name path string true "Instance Name"
|
// @Param name path string true "Instance Name"
|
||||||
// @Success 200 {object} Instance "Started instance details"
|
// @Success 200 {object} instance.Process "Started instance details"
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances/{name}/start [post]
|
// @Router /instances/{name}/start [post]
|
||||||
@@ -275,7 +291,7 @@ func (h *Handler) StartInstance() http.HandlerFunc {
|
|||||||
// @Security ApiKeyAuth
|
// @Security ApiKeyAuth
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Param name path string true "Instance Name"
|
// @Param name path string true "Instance Name"
|
||||||
// @Success 200 {object} Instance "Stopped instance details"
|
// @Success 200 {object} instance.Process "Stopped instance details"
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances/{name}/stop [post]
|
// @Router /instances/{name}/stop [post]
|
||||||
@@ -308,7 +324,7 @@ func (h *Handler) StopInstance() http.HandlerFunc {
|
|||||||
// @Security ApiKeyAuth
|
// @Security ApiKeyAuth
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Param name path string true "Instance Name"
|
// @Param name path string true "Instance Name"
|
||||||
// @Success 200 {object} Instance "Restarted instance details"
|
// @Success 200 {object} instance.Process "Restarted instance details"
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /instances/{name}/restart [post]
|
// @Router /instances/{name}/restart [post]
|
||||||
@@ -456,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
|||||||
proxyPath = "/" + proxyPath
|
proxyPath = "/" + proxyPath
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
// Modify the request to remove the proxy prefix
|
// Modify the request to remove the proxy prefix
|
||||||
originalPath := r.URL.Path
|
originalPath := r.URL.Path
|
||||||
r.URL.Path = proxyPath
|
r.URL.Path = proxyPath
|
||||||
@@ -556,8 +575,23 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !inst.Running {
|
if !inst.Running {
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
|
||||||
return
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
proxy, err := inst.GetProxy()
|
proxy, err := inst.GetProxy()
|
||||||
@@ -566,6 +600,9 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
// Recreate the request body from the bytes we read
|
// Recreate the request body from the bytes we read
|
||||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
r.ContentLength = int64(len(bodyBytes))
|
r.ContentLength = int64(len(bodyBytes))
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
"github.com/go-chi/cors"
|
"github.com/go-chi/cors"
|
||||||
httpSwagger "github.com/swaggo/http-swagger"
|
httpSwagger "github.com/swaggo/http-swagger"
|
||||||
|
|
||||||
_ "llamactl/docs"
|
_ "llamactl/apidocs"
|
||||||
"llamactl/webui"
|
"llamactl/webui"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -42,10 +42,12 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
|||||||
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
|
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r.Get("/version", handler.VersionHandler()) // Get server version
|
||||||
|
|
||||||
r.Route("/server", func(r chi.Router) {
|
r.Route("/server", func(r chi.Router) {
|
||||||
r.Get("/help", handler.HelpHandler())
|
r.Get("/help", handler.LlamaServerHelpHandler())
|
||||||
r.Get("/version", handler.VersionHandler())
|
r.Get("/version", handler.LlamaServerVersionHandler())
|
||||||
r.Get("/devices", handler.ListDevicesHandler())
|
r.Get("/devices", handler.LlamaServerListDevicesHandler())
|
||||||
})
|
})
|
||||||
|
|
||||||
// Instance management endpoints
|
// Instance management endpoints
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import SystemInfoDialog from "./components/SystemInfoDialog";
|
|||||||
import { type CreateInstanceOptions, type Instance } from "@/types/instance";
|
import { type CreateInstanceOptions, type Instance } from "@/types/instance";
|
||||||
import { useInstances } from "@/contexts/InstancesContext";
|
import { useInstances } from "@/contexts/InstancesContext";
|
||||||
import { useAuth } from "@/contexts/AuthContext";
|
import { useAuth } from "@/contexts/AuthContext";
|
||||||
|
import { ThemeProvider } from "@/contexts/ThemeContext";
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
const { isAuthenticated, isLoading: authLoading } = useAuth();
|
const { isAuthenticated, isLoading: authLoading } = useAuth();
|
||||||
@@ -42,44 +43,50 @@ function App() {
|
|||||||
// Show loading spinner while checking auth
|
// Show loading spinner while checking auth
|
||||||
if (authLoading) {
|
if (authLoading) {
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-gray-50 flex items-center justify-center">
|
<ThemeProvider>
|
||||||
<div className="text-center">
|
<div className="min-h-screen bg-background flex items-center justify-center">
|
||||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div>
|
<div className="text-center">
|
||||||
<p className="text-gray-600">Loading...</p>
|
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
|
||||||
|
<p className="text-muted-foreground">Loading...</p>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</ThemeProvider>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show login dialog if not authenticated
|
// Show login dialog if not authenticated
|
||||||
if (!isAuthenticated) {
|
if (!isAuthenticated) {
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-gray-50">
|
<ThemeProvider>
|
||||||
<LoginDialog open={true} />
|
<div className="min-h-screen bg-background">
|
||||||
</div>
|
<LoginDialog open={true} />
|
||||||
|
</div>
|
||||||
|
</ThemeProvider>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Show main app if authenticated
|
// Show main app if authenticated
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-gray-50">
|
<ThemeProvider>
|
||||||
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
|
<div className="min-h-screen bg-background">
|
||||||
<main className="container mx-auto max-w-4xl px-4 py-8">
|
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
|
||||||
<InstanceList editInstance={handleEditInstance} />
|
<main className="container mx-auto max-w-4xl px-4 py-8">
|
||||||
</main>
|
<InstanceList editInstance={handleEditInstance} />
|
||||||
|
</main>
|
||||||
|
|
||||||
<InstanceDialog
|
<InstanceDialog
|
||||||
open={isInstanceModalOpen}
|
open={isInstanceModalOpen}
|
||||||
onOpenChange={setIsInstanceModalOpen}
|
onOpenChange={setIsInstanceModalOpen}
|
||||||
onSave={handleSaveInstance}
|
onSave={handleSaveInstance}
|
||||||
instance={editingInstance}
|
instance={editingInstance}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<SystemInfoDialog
|
<SystemInfoDialog
|
||||||
open={isSystemInfoModalOpen}
|
open={isSystemInfoModalOpen}
|
||||||
onOpenChange={setIsSystemInfoModalOpen}
|
onOpenChange={setIsSystemInfoModalOpen}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
</ThemeProvider>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -55,6 +55,21 @@ describe('App Component - Critical Business Logic Only', () => {
|
|||||||
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
|
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
|
||||||
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
|
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
|
||||||
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
|
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
|
||||||
|
|
||||||
|
// Mock window.matchMedia for dark mode functionality
|
||||||
|
Object.defineProperty(window, 'matchMedia', {
|
||||||
|
writable: true,
|
||||||
|
value: vi.fn().mockImplementation((query: string) => ({
|
||||||
|
matches: false,
|
||||||
|
media: query,
|
||||||
|
onchange: null,
|
||||||
|
addListener: vi.fn(),
|
||||||
|
removeListener: vi.fn(),
|
||||||
|
addEventListener: vi.fn(),
|
||||||
|
removeEventListener: vi.fn(),
|
||||||
|
dispatchEvent: vi.fn(),
|
||||||
|
})),
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import { HelpCircle, LogOut } from "lucide-react";
|
import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
|
||||||
import { useAuth } from "@/contexts/AuthContext";
|
import { useAuth } from "@/contexts/AuthContext";
|
||||||
|
import { useTheme } from "@/contexts/ThemeContext";
|
||||||
|
|
||||||
interface HeaderProps {
|
interface HeaderProps {
|
||||||
onCreateInstance: () => void;
|
onCreateInstance: () => void;
|
||||||
@@ -9,6 +10,7 @@ interface HeaderProps {
|
|||||||
|
|
||||||
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
|
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
|
||||||
const { logout } = useAuth();
|
const { logout } = useAuth();
|
||||||
|
const { theme, toggleTheme } = useTheme();
|
||||||
|
|
||||||
const handleLogout = () => {
|
const handleLogout = () => {
|
||||||
if (confirm("Are you sure you want to logout?")) {
|
if (confirm("Are you sure you want to logout?")) {
|
||||||
@@ -17,10 +19,10 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<header className="bg-white border-b border-gray-200">
|
<header className="bg-card border-b border-border">
|
||||||
<div className="container mx-auto max-w-4xl px-4 py-4">
|
<div className="container mx-auto max-w-4xl px-4 py-4">
|
||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<h1 className="text-2xl font-bold text-gray-900">
|
<h1 className="text-2xl font-bold text-foreground">
|
||||||
Llamactl Dashboard
|
Llamactl Dashboard
|
||||||
</h1>
|
</h1>
|
||||||
|
|
||||||
@@ -29,6 +31,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
|
|||||||
Create Instance
|
Create Instance
|
||||||
</Button>
|
</Button>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
variant="outline"
|
||||||
|
size="icon"
|
||||||
|
onClick={toggleTheme}
|
||||||
|
data-testid="theme-toggle-button"
|
||||||
|
title={`Switch to ${theme === 'light' ? 'dark' : 'light'} mode`}
|
||||||
|
>
|
||||||
|
{theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
|
||||||
|
</Button>
|
||||||
|
|
||||||
<Button
|
<Button
|
||||||
variant="outline"
|
variant="outline"
|
||||||
size="icon"
|
size="icon"
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ function InstanceList({ editInstance }: InstanceListProps) {
|
|||||||
return (
|
return (
|
||||||
<div className="flex items-center justify-center py-12" aria-label="Loading">
|
<div className="flex items-center justify-center py-12" aria-label="Loading">
|
||||||
<div className="text-center">
|
<div className="text-center">
|
||||||
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div>
|
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
|
||||||
<p className="text-gray-600">Loading instances...</p>
|
<p className="text-muted-foreground">Loading instances...</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
@@ -28,7 +28,7 @@ function InstanceList({ editInstance }: InstanceListProps) {
|
|||||||
if (error) {
|
if (error) {
|
||||||
return (
|
return (
|
||||||
<div className="text-center py-12">
|
<div className="text-center py-12">
|
||||||
<div className="text-red-600 mb-4">
|
<div className="text-destructive mb-4">
|
||||||
<p className="text-lg font-semibold">Error loading instances</p>
|
<p className="text-lg font-semibold">Error loading instances</p>
|
||||||
<p className="text-sm">{error}</p>
|
<p className="text-sm">{error}</p>
|
||||||
</div>
|
</div>
|
||||||
@@ -39,15 +39,15 @@ function InstanceList({ editInstance }: InstanceListProps) {
|
|||||||
if (instances.length === 0) {
|
if (instances.length === 0) {
|
||||||
return (
|
return (
|
||||||
<div className="text-center py-12">
|
<div className="text-center py-12">
|
||||||
<p className="text-gray-600 text-lg mb-2">No instances found</p>
|
<p className="text-foreground text-lg mb-2">No instances found</p>
|
||||||
<p className="text-gray-500 text-sm">Create your first instance to get started</p>
|
<p className="text-muted-foreground text-sm">Create your first instance to get started</p>
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<h2 className="text-xl font-semibold text-gray-900 mb-6">
|
<h2 className="text-xl font-semibold text-foreground mb-6">
|
||||||
Instances ({instances.length})
|
Instances ({instances.length})
|
||||||
</h2>
|
</h2>
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import {
|
|||||||
DialogTitle,
|
DialogTitle,
|
||||||
} from '@/components/ui/dialog'
|
} from '@/components/ui/dialog'
|
||||||
import { Badge } from '@/components/ui/badge'
|
import { Badge } from '@/components/ui/badge'
|
||||||
|
import { instancesApi } from '@/lib/api'
|
||||||
import {
|
import {
|
||||||
RefreshCw,
|
RefreshCw,
|
||||||
Download,
|
Download,
|
||||||
@@ -46,48 +47,44 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
|||||||
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
|
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
|
||||||
|
|
||||||
// Fetch logs function
|
// Fetch logs function
|
||||||
const fetchLogs = async (lines?: number) => {
|
const fetchLogs = React.useCallback(
|
||||||
if (!instanceName) return
|
async (lines?: number) => {
|
||||||
|
if (!instanceName) return
|
||||||
setLoading(true)
|
|
||||||
setError(null)
|
|
||||||
|
|
||||||
try {
|
|
||||||
const params = lines ? `?lines=${lines}` : ''
|
|
||||||
const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
|
|
||||||
|
|
||||||
if (!response.ok) {
|
setLoading(true)
|
||||||
throw new Error(`Failed to fetch logs: ${response.status}`)
|
setError(null)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const logText = await instancesApi.getLogs(instanceName, lines)
|
||||||
|
setLogs(logText)
|
||||||
|
|
||||||
|
// Auto-scroll to bottom
|
||||||
|
setTimeout(() => {
|
||||||
|
if (logContainerRef.current) {
|
||||||
|
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
|
||||||
|
}
|
||||||
|
}, 100)
|
||||||
|
} catch (err) {
|
||||||
|
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
|
||||||
|
} finally {
|
||||||
|
setLoading(false)
|
||||||
}
|
}
|
||||||
|
},
|
||||||
const logText = await response.text()
|
[instanceName]
|
||||||
setLogs(logText)
|
)
|
||||||
|
|
||||||
// Auto-scroll to bottom
|
|
||||||
setTimeout(() => {
|
|
||||||
if (logContainerRef.current) {
|
|
||||||
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
|
|
||||||
}
|
|
||||||
}, 100)
|
|
||||||
} catch (err) {
|
|
||||||
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
|
|
||||||
} finally {
|
|
||||||
setLoading(false)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initial load when dialog opens
|
// Initial load when dialog opens
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (open && instanceName) {
|
if (open && instanceName) {
|
||||||
fetchLogs(lineCount)
|
void fetchLogs(lineCount)
|
||||||
}
|
}
|
||||||
}, [open, instanceName])
|
}, [open, instanceName, fetchLogs, lineCount])
|
||||||
|
|
||||||
// Auto-refresh effect
|
// Auto-refresh effect
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (autoRefresh && isRunning && open) {
|
if (autoRefresh && isRunning && open) {
|
||||||
refreshIntervalRef.current = setInterval(() => {
|
refreshIntervalRef.current = setInterval(() => {
|
||||||
fetchLogs(lineCount)
|
void fetchLogs(lineCount)
|
||||||
}, 2000) // Refresh every 2 seconds
|
}, 2000) // Refresh every 2 seconds
|
||||||
} else {
|
} else {
|
||||||
if (refreshIntervalRef.current) {
|
if (refreshIntervalRef.current) {
|
||||||
@@ -101,7 +98,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
|||||||
clearInterval(refreshIntervalRef.current)
|
clearInterval(refreshIntervalRef.current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, [autoRefresh, isRunning, open, lineCount])
|
}, [autoRefresh, isRunning, open, lineCount, fetchLogs])
|
||||||
|
|
||||||
// Copy logs to clipboard
|
// Copy logs to clipboard
|
||||||
const copyLogs = async () => {
|
const copyLogs = async () => {
|
||||||
@@ -135,7 +132,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
|||||||
|
|
||||||
// Apply new line count
|
// Apply new line count
|
||||||
const applyLineCount = () => {
|
const applyLineCount = () => {
|
||||||
fetchLogs(lineCount)
|
void fetchLogs(lineCount)
|
||||||
setShowSettings(false)
|
setShowSettings(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -198,7 +195,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
|||||||
<Button
|
<Button
|
||||||
variant="outline"
|
variant="outline"
|
||||||
size="sm"
|
size="sm"
|
||||||
onClick={() => fetchLogs(lineCount)}
|
onClick={() => void fetchLogs(lineCount)}
|
||||||
disabled={loading}
|
disabled={loading}
|
||||||
>
|
>
|
||||||
{loading ? (
|
{loading ? (
|
||||||
@@ -290,7 +287,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
|||||||
<div className="flex items-center gap-2 w-full">
|
<div className="flex items-center gap-2 w-full">
|
||||||
<Button
|
<Button
|
||||||
variant="outline"
|
variant="outline"
|
||||||
onClick={copyLogs}
|
onClick={() => void copyLogs()}
|
||||||
disabled={!logs}
|
disabled={!logs}
|
||||||
>
|
>
|
||||||
{copied ? (
|
{copied ? (
|
||||||
|
|||||||
@@ -19,6 +19,15 @@ import {
|
|||||||
} from 'lucide-react'
|
} from 'lucide-react'
|
||||||
import { serverApi } from '@/lib/api'
|
import { serverApi } from '@/lib/api'
|
||||||
|
|
||||||
|
// Helper to get version from environment
|
||||||
|
const getAppVersion = (): string => {
|
||||||
|
try {
|
||||||
|
return (import.meta.env as Record<string, string>).VITE_APP_VERSION || 'unknown'
|
||||||
|
} catch {
|
||||||
|
return 'unknown'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
interface SystemInfoModalProps {
|
interface SystemInfoModalProps {
|
||||||
open: boolean
|
open: boolean
|
||||||
onOpenChange: (open: boolean) => void
|
onOpenChange: (open: boolean) => void
|
||||||
@@ -109,9 +118,20 @@ const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
) : systemInfo ? (
|
) : systemInfo ? (
|
||||||
<div className="space-y-6">
|
<div className="space-y-6">
|
||||||
{/* Version Section */}
|
{/* Llamactl Version Section */}
|
||||||
<div className="space-y-3">
|
<div className="space-y-3">
|
||||||
<h3 className="font-semibold">Version</h3>
|
<h3 className="font-semibold">Llamactl Version</h3>
|
||||||
|
|
||||||
|
<div className="bg-gray-900 rounded-lg p-4">
|
||||||
|
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
|
||||||
|
{getAppVersion()}
|
||||||
|
</pre>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Llama Server Version Section */}
|
||||||
|
<div className="space-y-3">
|
||||||
|
<h3 className="font-semibold">Llama Server Version</h3>
|
||||||
|
|
||||||
<div className="bg-gray-900 rounded-lg p-4">
|
<div className="bg-gray-900 rounded-lg p-4">
|
||||||
<div className="mb-2">
|
<div className="mb-2">
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
|
|||||||
|
|
||||||
interface ZodFormFieldProps {
|
interface ZodFormFieldProps {
|
||||||
fieldKey: keyof CreateInstanceOptions
|
fieldKey: keyof CreateInstanceOptions
|
||||||
value: any
|
value: string | number | boolean | string[] | undefined
|
||||||
onChange: (key: keyof CreateInstanceOptions, value: any) => void
|
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
|
||||||
}
|
}
|
||||||
|
|
||||||
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
|
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
|
||||||
@@ -18,7 +18,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
// Get type from Zod schema
|
// Get type from Zod schema
|
||||||
const fieldType = getFieldType(fieldKey)
|
const fieldType = getFieldType(fieldKey)
|
||||||
|
|
||||||
const handleChange = (newValue: any) => {
|
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
|
||||||
onChange(fieldKey, newValue)
|
onChange(fieldKey, newValue)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<div className="flex items-center space-x-2">
|
<div className="flex items-center space-x-2">
|
||||||
<Checkbox
|
<Checkbox
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
checked={value || false}
|
checked={typeof value === 'boolean' ? value : false}
|
||||||
onCheckedChange={(checked) => handleChange(checked)}
|
onCheckedChange={(checked) => handleChange(checked)}
|
||||||
/>
|
/>
|
||||||
<Label htmlFor={fieldKey} className="text-sm font-normal">
|
<Label htmlFor={fieldKey} className="text-sm font-normal">
|
||||||
@@ -51,10 +51,14 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
type="number"
|
type="number"
|
||||||
value={value || ''}
|
step="any" // This allows decimal numbers
|
||||||
|
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
|
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
|
||||||
handleChange(numValue)
|
// Only update if the parsed value is valid or the input is empty
|
||||||
|
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
|
||||||
|
handleChange(numValue)
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
placeholder={config.placeholder}
|
placeholder={config.placeholder}
|
||||||
/>
|
/>
|
||||||
@@ -101,7 +105,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
<Input
|
<Input
|
||||||
id={fieldKey}
|
id={fieldKey}
|
||||||
type="text"
|
type="text"
|
||||||
value={value || ''}
|
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
|
||||||
onChange={(e) => handleChange(e.target.value || undefined)}
|
onChange={(e) => handleChange(e.target.value || undefined)}
|
||||||
placeholder={config.placeholder}
|
placeholder={config.placeholder}
|
||||||
/>
|
/>
|
||||||
|
|||||||
54
webui/src/contexts/ThemeContext.tsx
Normal file
54
webui/src/contexts/ThemeContext.tsx
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
import { createContext, useContext, useEffect, useState, type ReactNode } from "react";
|
||||||
|
|
||||||
|
type Theme = "light" | "dark";
|
||||||
|
|
||||||
|
interface ThemeContextType {
|
||||||
|
theme: Theme;
|
||||||
|
toggleTheme: () => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
|
||||||
|
|
||||||
|
interface ThemeProviderProps {
|
||||||
|
children: ReactNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ThemeProvider({ children }: ThemeProviderProps) {
|
||||||
|
const [theme, setTheme] = useState<Theme>(() => {
|
||||||
|
const stored = localStorage.getItem("theme");
|
||||||
|
if (stored === "light" || stored === "dark") {
|
||||||
|
return stored;
|
||||||
|
}
|
||||||
|
return window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light";
|
||||||
|
});
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const root = document.documentElement;
|
||||||
|
|
||||||
|
if (theme === "dark") {
|
||||||
|
root.classList.add("dark");
|
||||||
|
} else {
|
||||||
|
root.classList.remove("dark");
|
||||||
|
}
|
||||||
|
|
||||||
|
localStorage.setItem("theme", theme);
|
||||||
|
}, [theme]);
|
||||||
|
|
||||||
|
const toggleTheme = () => {
|
||||||
|
setTheme(prevTheme => prevTheme === "light" ? "dark" : "light");
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<ThemeContext.Provider value={{ theme, toggleTheme }}>
|
||||||
|
{children}
|
||||||
|
</ThemeContext.Provider>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function useTheme() {
|
||||||
|
const context = useContext(ThemeContext);
|
||||||
|
if (context === undefined) {
|
||||||
|
throw new Error("useTheme must be used within a ThemeProvider");
|
||||||
|
}
|
||||||
|
return context;
|
||||||
|
}
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
import type { CreateInstanceOptions} from '@/schemas/instanceOptions';
|
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
|
||||||
import { getAllFieldKeys } from '@/schemas/instanceOptions'
|
|
||||||
|
|
||||||
// Only define the basic fields we want to show by default
|
// Only define the basic fields we want to show by default
|
||||||
export const basicFieldsConfig: Record<string, {
|
export const basicFieldsConfig: Record<string, {
|
||||||
label: string
|
label: string
|
||||||
description?: string
|
description?: string
|
||||||
placeholder?: string
|
placeholder?: string
|
||||||
@@ -22,6 +21,15 @@ export const basicFieldsConfig: Record<string, {
|
|||||||
placeholder: '5',
|
placeholder: '5',
|
||||||
description: 'Delay in seconds before attempting restart'
|
description: 'Delay in seconds before attempting restart'
|
||||||
},
|
},
|
||||||
|
idle_timeout: {
|
||||||
|
label: 'Idle Timeout (minutes)',
|
||||||
|
placeholder: '60',
|
||||||
|
description: 'Time in minutes before instance is considered idle and stopped'
|
||||||
|
},
|
||||||
|
on_demand_start: {
|
||||||
|
label: 'On-Demand Start',
|
||||||
|
description: 'Start instance upon receiving OpenAI-compatible API request'
|
||||||
|
},
|
||||||
model: {
|
model: {
|
||||||
label: 'Model Path',
|
label: 'Model Path',
|
||||||
placeholder: '/path/to/model.gguf',
|
placeholder: '/path/to/model.gguf',
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
auto_restart: z.boolean().optional(),
|
auto_restart: z.boolean().optional(),
|
||||||
max_restarts: z.number().optional(),
|
max_restarts: z.number().optional(),
|
||||||
restart_delay: z.number().optional(),
|
restart_delay: z.number().optional(),
|
||||||
|
idle_timeout: z.number().optional(),
|
||||||
|
on_demand_start: z.boolean().optional(),
|
||||||
|
|
||||||
// Common params
|
// Common params
|
||||||
verbose_prompt: z.boolean().optional(),
|
verbose_prompt: z.boolean().optional(),
|
||||||
@@ -14,12 +16,12 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
cpu_mask: z.string().optional(),
|
cpu_mask: z.string().optional(),
|
||||||
cpu_range: z.string().optional(),
|
cpu_range: z.string().optional(),
|
||||||
cpu_strict: z.number().optional(),
|
cpu_strict: z.number().optional(),
|
||||||
priority: z.number().optional(),
|
prio: z.number().optional(),
|
||||||
poll: z.number().optional(),
|
poll: z.number().optional(),
|
||||||
cpu_mask_batch: z.string().optional(),
|
cpu_mask_batch: z.string().optional(),
|
||||||
cpu_range_batch: z.string().optional(),
|
cpu_range_batch: z.string().optional(),
|
||||||
cpu_strict_batch: z.number().optional(),
|
cpu_strict_batch: z.number().optional(),
|
||||||
priority_batch: z.number().optional(),
|
prio_batch: z.number().optional(),
|
||||||
poll_batch: z.number().optional(),
|
poll_batch: z.number().optional(),
|
||||||
ctx_size: z.number().optional(),
|
ctx_size: z.number().optional(),
|
||||||
predict: z.number().optional(),
|
predict: z.number().optional(),
|
||||||
@@ -82,7 +84,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
seed: z.number().optional(),
|
seed: z.number().optional(),
|
||||||
sampling_seq: z.string().optional(),
|
sampling_seq: z.string().optional(),
|
||||||
ignore_eos: z.boolean().optional(),
|
ignore_eos: z.boolean().optional(),
|
||||||
temperature: z.number().optional(),
|
temp: z.number().optional(),
|
||||||
top_k: z.number().optional(),
|
top_k: z.number().optional(),
|
||||||
top_p: z.number().optional(),
|
top_p: z.number().optional(),
|
||||||
min_p: z.number().optional(),
|
min_p: z.number().optional(),
|
||||||
@@ -109,7 +111,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
json_schema: z.string().optional(),
|
json_schema: z.string().optional(),
|
||||||
json_schema_file: z.string().optional(),
|
json_schema_file: z.string().optional(),
|
||||||
|
|
||||||
// Server/Example-specific params
|
// Example-specific params
|
||||||
no_context_shift: z.boolean().optional(),
|
no_context_shift: z.boolean().optional(),
|
||||||
special: z.boolean().optional(),
|
special: z.boolean().optional(),
|
||||||
no_warmup: z.boolean().optional(),
|
no_warmup: z.boolean().optional(),
|
||||||
@@ -149,8 +151,6 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
no_prefill_assistant: z.boolean().optional(),
|
no_prefill_assistant: z.boolean().optional(),
|
||||||
slot_prompt_similarity: z.number().optional(),
|
slot_prompt_similarity: z.number().optional(),
|
||||||
lora_init_without_apply: z.boolean().optional(),
|
lora_init_without_apply: z.boolean().optional(),
|
||||||
|
|
||||||
// Speculative decoding params
|
|
||||||
draft_max: z.number().optional(),
|
draft_max: z.number().optional(),
|
||||||
draft_min: z.number().optional(),
|
draft_min: z.number().optional(),
|
||||||
draft_p_min: z.number().optional(),
|
draft_p_min: z.number().optional(),
|
||||||
|
|||||||
13
webui/src/vite-env.d.ts
vendored
Normal file
13
webui/src/vite-env.d.ts
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
/// <reference types="vite/client" />
|
||||||
|
|
||||||
|
declare global {
|
||||||
|
interface ImportMetaEnv {
|
||||||
|
readonly VITE_APP_VERSION?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ImportMeta {
|
||||||
|
readonly env: ImportMetaEnv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export {}
|
||||||
@@ -18,8 +18,9 @@
|
|||||||
"baseUrl": ".",
|
"baseUrl": ".",
|
||||||
"paths": {
|
"paths": {
|
||||||
"@/*": ["./src/*"]
|
"@/*": ["./src/*"]
|
||||||
}
|
},
|
||||||
|
"types": ["vite/client"]
|
||||||
},
|
},
|
||||||
"include": ["src"],
|
"include": ["src", "src/vite-env.d.ts"],
|
||||||
"references": [{ "path": "./tsconfig.node.json" }]
|
"references": [{ "path": "./tsconfig.node.json" }]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user