169 Commits

Author SHA1 Message Date
eb5abae173 Merge pull request #66 from lordmathis/fix/disable-node-edit
fix: Prevent node change on update
2025-10-16 22:37:59 +02:00
696a2cb18b Prevent node change on update 2025-10-16 22:35:29 +02:00
e7402f0029 Merge pull request #65 from lordmathis/fix/local-node
fix: Detect local instances based on local node in nodes array
2025-10-16 22:28:01 +02:00
5c9a397746 Fix get local proxy 2025-10-16 22:11:29 +02:00
e97ca727d1 Clarify node configuration in docs 2025-10-16 21:50:06 +02:00
9f3c01384b Remove stripNodesFromOptions function 2025-10-16 21:29:27 +02:00
c5097e59be Fix local instance detection 2025-10-16 21:26:04 +02:00
cf20f304b3 Merge pull request #61 from lordmathis/fix/docs-formatting
fix: Add MkDocs hook to fix line endings in markdown files
2025-10-09 23:28:09 +02:00
72eba48b80 Add MkDocs hook to fix line endings in markdown files 2025-10-09 23:23:17 +02:00
c3037f914d Merge pull request #60 from lordmathis/lordmathis-patch-1
Update docs.yaml
2025-10-09 22:31:38 +02:00
81266b4bc4 Update docs.yaml 2025-10-09 22:29:23 +02:00
a31af94e7b Merge pull request #59 from lordmathis/feat/multi-host
feat: Implement multi node support
2025-10-09 22:23:27 +02:00
9ee0a184b3 Re-validate instance name in DeleteInstance for improved security 2025-10-09 22:18:53 +02:00
5436c28a1f Add instance name validation before deletion for security 2025-10-09 22:10:40 +02:00
73b9dd5bc7 Rename workflows for consistency 2025-10-09 21:53:14 +02:00
f61e8dad5c Add User Docs badge to README 2025-10-09 21:51:38 +02:00
ab2770bdd9 Add documentation for remote node deployment and configuration 2025-10-09 21:50:39 +02:00
e7a6a7003e Skip remote instances in checkAllTimeouts and EvictLRUInstance methods 2025-10-09 21:13:38 +02:00
2b950ee649 Implement updateLocalInstanceFromRemote to preserve Nodes field when syncing remote instance data 2025-10-09 20:39:21 +02:00
b965b77c18 Prevent remote instances from using local proxy in GetProxy method 2025-10-09 20:24:54 +02:00
8a16a195de Fix getting remote instance logs 2025-10-09 20:22:32 +02:00
9684a8a09b Enhance instance management to preserve local state for remote instances 2025-10-09 19:34:52 +02:00
9d5f01d4ae Auto-select first node in InstanceSettingsCard if none is selected 2025-10-09 19:13:58 +02:00
e281708b20 Enhance auto-start logic to differentiate between remote and local instances 2025-10-09 18:56:23 +02:00
8d9b0c0621 Initialize timeProvider and logger in UnmarshalJSON for Process 2025-10-09 18:56:12 +02:00
6c1a76691d Improve cleanup of options in InstanceDialog to skip empty strings and arrays 2025-10-09 18:49:36 +02:00
5d958ed283 Fix backend_options cleanup to exclude empty arrays in InstanceDialog 2025-10-09 18:38:33 +02:00
56b95d1243 Refactor InstanceSettingsCard and API types to use NodesMap 2025-10-08 19:52:39 +02:00
688b815ca7 Add LocalNode configuration 2025-10-08 19:43:53 +02:00
7f6725da96 Refactor NodeConfig handling to use a map 2025-10-08 19:24:24 +02:00
3418735204 Add stripNodesFromOptions function to prevent routing loops in remote requests 2025-10-07 20:27:31 +02:00
2f1cf5acdc Refactor CreateRemoteInstance and UpdateRemoteInstance to directly use options parameter in API requests 2025-10-07 19:57:21 +02:00
01380e6641 Update instance manager tests to use empty NodeConfig slice 2025-10-07 19:18:13 +02:00
6298b03636 Refactor RemoteOpenAIProxy to use cached proxies and restore request body handling 2025-10-07 18:57:08 +02:00
aae3f84d49 Implement caching for remote instance proxies and enhance proxy request handling 2025-10-07 18:44:23 +02:00
554796391b Remove test config file 2025-10-07 18:05:30 +02:00
16b28bac05 Merge branch 'main' into feat/multi-host 2025-10-07 18:04:24 +02:00
1892dc8315 Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy
feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`
2025-10-06 20:23:44 +02:00
Anuruth Lertpiya
997bd1b063 Changed status code to StatusBadRequest (400) if requested invalid model name. 2025-10-05 14:53:20 +00:00
Anuruth Lertpiya
fa43f9e967 Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/ 2025-10-05 14:28:33 +00:00
db9eebeb8b Merge pull request #56 from lordmathis/fix/body-already-read
Fix double read of json response when content-length header is missing
2025-10-04 22:28:22 +02:00
bd062f8ca0 Mock Response.clone for tests 2025-10-04 22:22:25 +02:00
8ebdb1a183 Fix double read of json response when content-length header is missing 2025-10-04 22:16:28 +02:00
7272212081 Merge pull request #55 from lordmathis/fix/auto-restart
fix: Set status to Stopped for instances with auto-restart disabled
2025-10-04 21:45:12 +02:00
035e184789 Merge branch 'main' into fix/auto-restart 2025-10-04 21:22:50 +02:00
d15976e7aa Implement auto-stop for instances with auto-restart disabled and add corresponding tests 2025-10-04 21:17:55 +02:00
4fa75d9801 Merge pull request #52 from BobbyL2k/feat/config-cors-headers
feat: Added support for configuring access-control-request-headers for CORS
2025-10-04 20:45:27 +02:00
Anuruth Lertpiya
0e1bc8a352 Added support for configuring CORS headers 2025-10-04 09:13:40 +00:00
b728a7c6b2 Fix fetchNodes call to ensure proper handling of promise 2025-10-03 10:53:29 +02:00
a491f29483 Add node selection functionality to InstanceSettingsCard and define Node API 2025-10-02 23:18:33 +02:00
670f8ff81b Split up handlers 2025-10-02 23:11:20 +02:00
da56456504 Add node management endpoints to handle listing and retrieving node details 2025-10-02 22:51:41 +02:00
c30053e51c Enhance instance loading to support remote instances and handle node configuration 2025-10-01 22:59:45 +02:00
347c58e15f Enhance instance manager to persist remote instances and update tracking on modifications 2025-10-01 22:58:57 +02:00
2ed67eb672 Add remote instance proxying functionality to handler 2025-10-01 22:17:19 +02:00
0188f82306 Implement remote instance creation and deletion in instance manager 2025-10-01 22:05:18 +02:00
e0f176de10 Enhance instance manager to support remote instance management and update related tests 2025-10-01 20:25:06 +02:00
2759be65a5 Add remote instance management functionality and configuration support 2025-09-30 21:09:05 +02:00
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
a824f066ec Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
2025-09-25 23:07:24 +02:00
2cd9d374a7 Add Docker badge to UI 2025-09-25 23:04:24 +02:00
031d6c7017 Update Docker command arguments for llama-server and vllm with volume mounts 2025-09-25 22:51:51 +02:00
282344af23 Fix docker command args building 2025-09-25 22:51:40 +02:00
bc9e0535c3 Refactor command building and argument handling 2025-09-25 22:05:46 +02:00
2d925b473d Add Docker support documentation and configuration for backends 2025-09-24 22:15:21 +02:00
ba0f877185 Fix tests 2025-09-24 21:35:44 +02:00
840a7bc650 Add Docker command handling for backend options and refactor command building 2025-09-24 21:34:54 +02:00
76ac93bedc Implement Docker command handling for Llama, MLX, and vLLM backends 2025-09-24 21:31:58 +02:00
72d2a601c8 Update Docker args in LoadConfig and tests to include 'run --rm' prefix 2025-09-24 21:27:51 +02:00
9a56660f68 Refactor backend configuration to use structured settings and update environment variable handling 2025-09-24 20:31:20 +02:00
78a483ee4a Merge pull request #41 from lordmathis/fix/docs-release
fix: Refactor docs workflow to trigger on version tags
2025-09-23 22:35:05 +02:00
cdcef7c7ae Refactor docs workflow to trigger on version tags 2025-09-23 22:32:02 +02:00
6f5d886089 Merge pull request #40 from lordmathis/feat/system-info
feat: rework system info dialog
2025-09-23 22:11:42 +02:00
e3bf8ac05a Update SystemInfo dialog 2025-09-23 22:05:31 +02:00
edf0575925 Replace SystemInfoDialog with BackendInfoDialog and update related references 2025-09-23 21:44:04 +02:00
71a48aa3b6 Update server API functions to use /backends/llama-cpp path 2025-09-23 21:28:23 +02:00
30e40ecd30 Refactor API endpoints to use /backends/llama-cpp path and update related documentation 2025-09-23 21:27:58 +02:00
322e1c5eb7 Merge pull request #39 from lordmathis/feat/instance-dialog
feat: Redesign create/edit instance dialog
2025-09-23 21:14:34 +02:00
2cbd666d38 Redesign create/edit instance dialog 2025-09-23 21:11:00 +02:00
9ebc05fa3a Merge pull request #38 from lordmathis/feat/instance-card
feat: Redesign instance card
2025-09-23 19:48:20 +02:00
05e4335389 Fix instance management tests 2025-09-23 19:45:45 +02:00
850cf018e3 Refactor BackendBadge component 2025-09-23 19:20:53 +02:00
9c3da55c5d Improve InstanceCard layout 2025-09-23 18:12:58 +02:00
16d311a3d0 Merge pull request #37 from lordmathis/lordmathis-patch-1
fix: Set default docs version
2025-09-23 13:48:53 +02:00
32f58502de Update docs.yml 2025-09-23 13:46:58 +02:00
788f5a2246 Merge pull request #36 from lordmathis/lordmathis-patch-1
fix: Run docs build job on every update
2025-09-23 13:21:53 +02:00
37f464007f Update docs.yml 2025-09-23 13:19:54 +02:00
84d994c625 Merge pull request #35 from lordmathis/chore/docs-update
chore: Update docs
2025-09-22 23:24:12 +02:00
120875351f Fix image paths for MkDocs rendering in readme_sync.py 2025-09-22 23:22:27 +02:00
3a63308d5f Update error descriptions in API documentation for clarity 2025-09-22 22:39:01 +02:00
46622d2107 Update documentation and add README synchronization 2025-09-22 22:37:53 +02:00
ebc82c37aa Merge pull request #34 from lordmathis/feat/vllm-backend
feat: Implement vLLM backend
2025-09-22 21:58:19 +02:00
48b3a39dfe Move badges in instance card 2025-09-22 21:54:04 +02:00
c10153f59f Add BackendBadge component and integrate it into InstanceCard 2025-09-22 21:48:33 +02:00
588b025fb1 Handle empty responses for JSON endpoints in apiCall function 2025-09-22 21:39:44 +02:00
6dcf0f806e Fix VLLM command placeholder formatting 2025-09-22 21:30:59 +02:00
184d6df1bc Fix vllm command parsing 2025-09-22 21:25:50 +02:00
313666ea17 Fix missing vllm proxy setup 2025-09-22 20:51:00 +02:00
c3ca5b95f7 Update BuildCommandArgs to use positional argument for model and adjust tests accordingly 2025-09-22 20:32:03 +02:00
2c86fc6470 Update api referrence 2025-09-21 22:16:56 +02:00
785915943b Update api docs 2025-09-21 22:03:07 +02:00
55765d2020 Add vLLM backend support to documentation and update instance management instructions 2025-09-21 21:57:36 +02:00
6ff9aa5470 Remove vLLM backend implementation specification document 2025-09-21 21:38:10 +02:00
501afb7f0d Refactor form components and improve API error handling 2025-09-21 21:33:53 +02:00
b665194307 Add vLLM backend support to webui 2025-09-21 20:58:43 +02:00
7eb59aa7e0 Remove unused JSON unmarshal test and clean up command argument checks 2025-09-19 20:46:25 +02:00
64842e74b0 Refactor command parsing and building 2025-09-19 20:23:25 +02:00
34a949d22e Refactor command argument building and parsing 2025-09-19 19:59:46 +02:00
ec5485bd0e Refactor command argument building across backends 2025-09-19 19:46:54 +02:00
9eecb37aec Refactor MLX and VLLM server options parsing and args building 2025-09-19 19:39:36 +02:00
c7136d5206 Refactor command parsing logic across backends to utilize a unified CommandParserConfig structure 2025-09-19 18:36:23 +02:00
4df02a6519 Initial vLLM backend support 2025-09-19 18:05:12 +02:00
02fdae24ee Merge pull request #33 from lordmathis/feat/doc-versioning
feat: Docs versioning
2025-09-18 21:07:04 +02:00
9a8647775d Setup docs versioning 2025-09-18 21:04:11 +02:00
3081a1986b Merge pull request #32 from lordmathis/feat/mlx-backend
feat: Implement mlx-lm backend
2025-09-18 20:34:04 +02:00
6a580667ed Remove LlamaExecutable checks from default and file loading tests 2025-09-18 20:30:26 +02:00
2a20817078 Remove redundant LlamaExecutable field from instance configuration in tests 2025-09-18 20:29:04 +02:00
5e2d237887 Update project description for clarity and consistency in README 2025-09-18 20:21:30 +02:00
84c3453281 Refactor features section in README for improved clarity and organization 2025-09-18 20:14:03 +02:00
8006dd3841 Fix formatting in README for consistency in feature descriptions 2025-09-18 20:03:19 +02:00
8820dc1146 Enhance documentation for MLX backend support 2025-09-18 20:01:18 +02:00
11296bc5f8 Update README to include MLX backend support and enhance usage instructions 2025-09-18 19:34:40 +02:00
5121f0e302 Remove PythonPath references from MlxServerOptions and related configurations 2025-09-17 21:59:55 +02:00
587be68077 Add MLX backend support with configuration and parsing enhancements 2025-09-16 22:38:39 +02:00
cc5d8acd92 Refactor instance and manager tests to use BackendConfig for LlamaExecutable and MLXLMExecutable 2025-09-16 21:45:50 +02:00
154b754aff Add MLX command parsing and routing support 2025-09-16 21:39:08 +02:00
63fea02d66 Add MLX backend support in CreateInstanceOptions and validation 2025-09-16 21:38:33 +02:00
468688cdbc Pass backend options to instances 2025-09-16 21:37:48 +02:00
988c4aca40 Add MLX backend config options 2025-09-16 21:14:19 +02:00
1f25e9d05b Merge pull request #31 from lordmathis/feat/parse-command
feat: Implement command parsing in Create Instance
2025-09-15 22:18:39 +02:00
1b5934303b Enhance command parsing in ParseLlamaCommand and improve error handling in ParseCommandRequest 2025-09-15 22:12:56 +02:00
ccabd84568 Add margin to textarea in ParseCommandDialog for improved spacing 2025-09-15 21:36:24 +02:00
e7b06341c3 Enhance command parsing in ParseLlamaCommand 2025-09-15 21:29:46 +02:00
323056096c Implement llama-server command parsing and add UI components for command input 2025-09-15 21:04:14 +02:00
cb1669f853 Merge pull request #30 from lordmathis/dependabot/npm_and_yarn/webui/npm_and_yarn-f5c1666f0c
Bump vite from 7.0.5 to 7.1.5 in /webui in the npm_and_yarn group across 1 directory
2025-09-14 10:47:38 +02:00
dependabot[bot]
a5d1f24cbf Bump vite in /webui in the npm_and_yarn group across 1 directory
Bumps the npm_and_yarn group with 1 update in the /webui directory: [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `vite` from 7.0.5 to 7.1.5
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.1.5/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-version: 7.1.5
  dependency-type: direct:development
  dependency-group: npm_and_yarn
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-09 21:38:38 +00:00
92f0bd02f2 Merge pull request #29 from lordmathis/lordmathis-patch-1
chore: Switch main dashboard screenshot
2025-09-04 22:54:06 +02:00
0a16f617ad Add files via upload 2025-09-04 22:47:14 +02:00
92 changed files with 8138 additions and 3377 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

103
.github/workflows/docs.yaml vendored Normal file
View File

@@ -0,0 +1,103 @@
name: User Docs
on:
push:
branches: [ main ]
tags: [ 'v*' ]
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: write
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
deploy-dev:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Configure Git
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
- name: Deploy development version
run: |
mike deploy --push --update-aliases dev latest
# Set dev as default if no default exists
if ! mike list | grep -q "default"; then
mike set-default --push dev
fi
deploy-release:
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Configure Git
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
- name: Deploy release version
run: |
VERSION=${GITHUB_REF#refs/tags/}
mike deploy --push --update-aliases $VERSION stable
mike set-default --push stable

View File

@@ -1,65 +0,0 @@
name: Build and Deploy Documentation
on:
push:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
- '.github/workflows/docs.yml'
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0 # Needed for git-revision-date-localized plugin
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
- name: Upload documentation artifact
if: github.ref == 'refs/heads/main'
uses: actions/upload-pages-artifact@v3
with:
path: ./site
deploy:
if: github.ref == 'refs/heads/main'
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

6
.gitignore vendored
View File

@@ -32,4 +32,8 @@ go.work.sum
# .vscode/ # .vscode/
node_modules/ node_modules/
dist/ dist/
__pycache__/
site/

172
README.md
View File

@@ -1,31 +1,45 @@
# llamactl # llamactl
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
**Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.** **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
## Why llamactl? ## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) ### 🚀 Easy Model Management
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name - **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) - **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
🔐 **API Key Authentication**: Separate keys for management vs inference access - **State Persistence**: Ensure instances remain intact across server restarts
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits ### 🔗 Universal Compatibility
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
💾 **State Persistence**: Ensure instances remain intact across server restarts - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Support**: Run backends in containers
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **API Key Authentication**: Separate keys for management vs inference access
### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
### 🔗 Remote Instance Deployment
- **Remote Node Support**: Deploy instances on remote hosts
- **Central Management**: Manage remote instances from a single dashboard
- **Seamless Routing**: Automatic request routing to remote instances
![Dashboard Screenshot](docs/images/dashboard.png) ![Dashboard Screenshot](docs/images/dashboard.png)
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
## Quick Start ## Quick Start
```bash ```bash
# 1. Install llama-server (one-time setup) # 1. Install backend (one-time setup)
# See: https://github.com/ggml-org/llama.cpp#quick-start # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# Or use Docker - no local installation required
# 2. Download and run llamactl # 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -42,15 +56,27 @@ llamactl
### Create and manage instances via web dashboard: ### Create and manage instances via web dashboard:
1. Open http://localhost:8080 1. Open http://localhost:8080
2. Click "Create Instance" 2. Click "Create Instance"
3. Set model path and GPU layers 3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Start or stop the instance 4. Set model path and backend-specific options
5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API: ### Or use the REST API:
```bash ```bash
# Create instance # Create llama.cpp instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \ curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \ -H "Authorization: Bearer your-key" \
-d '{"model": "/path/to/model.gguf", "gpu_layers": 32}' -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
# Create MLX instance (macOS)
curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK # Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \ curl -X POST localhost:8080/v1/chat/completions \
@@ -74,7 +100,30 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page # Windows - Download from releases page
``` ```
### Option 2: Build from Source ### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Features:** CUDA support, automatic latest release installation, no backend dependencies.
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+ Requires Go 1.24+ and Node.js 22+
```bash ```bash
git clone https://github.com/lordmathis/llamactl.git git clone https://github.com/lordmathis/llamactl.git
@@ -85,16 +134,65 @@ go build -o llamactl ./cmd/server
## Prerequisites ## Prerequisites
### Backend Dependencies
**For llama.cpp backend:**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed: You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash ```bash
# Quick install methods:
# Homebrew (macOS) # Homebrew (macOS)
brew install llama.cpp brew install llama.cpp
# Or build from source - see llama.cpp docs # Or build from source - see llama.cpp docs
# Or use Docker - no local installation required
``` ```
**For MLX backend (macOS only):**
You need MLX-LM installed:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
**For vLLM backend:**
You need vLLM installed:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# Or use Docker - no local installation required
```
## Backend Docker Support
llamactl can run backends in Docker containers:
```yaml
backends:
llama-cpp:
docker:
enabled: true
vllm:
docker:
enabled: true
```
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
## Configuration ## Configuration
llamactl works out of the box with sensible defaults. llamactl works out of the box with sensible defaults.
@@ -104,8 +202,35 @@ server:
host: "0.0.0.0" # Server host to bind to host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all) allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} # Environment variables for the container
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} # Environment variables for the container
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below) data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
@@ -115,7 +240,6 @@ instances:
max_instances: -1 # Max instances (-1 = unlimited) max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited) max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances default_restart_delay: 5 # Restart delay (seconds) for new instances

View File

@@ -19,6 +19,243 @@ const docTemplate = `{
"host": "{{.Host}}", "host": "{{.Host}}",
"basePath": "{{.BasePath}}", "basePath": "{{.BasePath}}",
"paths": { "paths": {
"/backends/llama-cpp/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"backends"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"backends"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/llama-cpp/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"backends"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": { "/instances": {
"get": { "get": {
"security": [ "security": [
@@ -505,90 +742,6 @@ const docTemplate = `{
} }
} }
}, },
"/server/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"server"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"server"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"server"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/": { "/v1/": {
"post": { "post": {
"security": [ "security": [
@@ -609,7 +762,7 @@ const docTemplate = `{
"description": "OpenAI response" "description": "OpenAI response"
}, },
"400": { "400": {
"description": "Invalid request body or model name", "description": "Invalid request body or instance name",
"schema": { "schema": {
"type": "string" "type": "string"
} }
@@ -681,522 +834,46 @@ const docTemplate = `{
} }
}, },
"definitions": { "definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": { "auto_restart": {
"description": "Auto restart", "description": "Auto restart",
"type": "boolean" "type": "boolean"
}, },
"batch_size": { "backend_options": {
"type": "integer" "type": "object",
"additionalProperties": {}
}, },
"cache_reuse": { "backend_type": {
"type": "integer" "$ref": "#/definitions/backends.BackendType"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
}, },
"idle_timeout": { "idle_timeout": {
"description": "Idle timeout", "description": "Idle timeout",
"type": "integer" "type": "integer"
}, },
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": { "max_restarts": {
"type": "integer" "type": "integer"
}, },
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"on_demand_start": { "on_demand_start": {
"description": "On demand start", "description": "On demand start",
"type": "boolean" "type": "boolean"
}, },
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay": { "restart_delay": {
"type": "integer" "description": "seconds",
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"type": "integer" "type": "integer"
} }
} }
@@ -1264,6 +941,14 @@ const docTemplate = `{
"type": "string" "type": "string"
} }
} }
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
} }
} }
}` }`

View File

@@ -12,6 +12,243 @@
}, },
"basePath": "/api/v1", "basePath": "/api/v1",
"paths": { "paths": {
"/backends/llama-cpp/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"backends"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"backends"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/llama-cpp/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"backends"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": { "/instances": {
"get": { "get": {
"security": [ "security": [
@@ -498,90 +735,6 @@
} }
} }
}, },
"/server/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"server"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"server"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/server/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"server"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/": { "/v1/": {
"post": { "post": {
"security": [ "security": [
@@ -602,7 +755,7 @@
"description": "OpenAI response" "description": "OpenAI response"
}, },
"400": { "400": {
"description": "Invalid request body or model name", "description": "Invalid request body or instance name",
"schema": { "schema": {
"type": "string" "type": "string"
} }
@@ -674,522 +827,46 @@
} }
}, },
"definitions": { "definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": { "auto_restart": {
"description": "Auto restart", "description": "Auto restart",
"type": "boolean" "type": "boolean"
}, },
"batch_size": { "backend_options": {
"type": "integer" "type": "object",
"additionalProperties": {}
}, },
"cache_reuse": { "backend_type": {
"type": "integer" "$ref": "#/definitions/backends.BackendType"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
}, },
"idle_timeout": { "idle_timeout": {
"description": "Idle timeout", "description": "Idle timeout",
"type": "integer" "type": "integer"
}, },
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": { "max_restarts": {
"type": "integer" "type": "integer"
}, },
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"on_demand_start": { "on_demand_start": {
"description": "On demand start", "description": "On demand start",
"type": "boolean" "type": "boolean"
}, },
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"prio": {
"type": "integer"
},
"prio_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay": { "restart_delay": {
"type": "integer" "description": "seconds",
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temp": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"type": "integer" "type": "integer"
} }
} }
@@ -1257,6 +934,14 @@
"type": "string" "type": "string"
} }
} }
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
} }
} }
} }

View File

@@ -1,352 +1,35 @@
basePath: /api/v1 basePath: /api/v1
definitions: definitions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions: instance.CreateInstanceOptions:
properties: properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart: auto_restart:
description: Auto restart description: Auto restart
type: boolean type: boolean
batch_size: backend_options:
type: integer additionalProperties: {}
cache_reuse: type: object
type: integer backend_type:
cache_type_k: $ref: '#/definitions/backends.BackendType'
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
idle_timeout: idle_timeout:
description: Idle timeout description: Idle timeout
type: integer type: integer
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
type: integer
max_restarts: max_restarts:
type: integer type: integer
metrics:
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
on_demand_start: on_demand_start:
description: On demand start description: On demand start
type: boolean type: boolean
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
prio:
type: integer
prio_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay: restart_delay:
type: integer description: seconds
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temp:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
type: integer type: integer
type: object type: object
instance.InstanceStatus: instance.InstanceStatus:
@@ -391,6 +74,11 @@ definitions:
object: object:
type: string type: string
type: object type: object
server.ParseCommandRequest:
properties:
command:
type: string
type: object
info: info:
contact: {} contact: {}
description: llamactl is a control server for managing Llama Server instances. description: llamactl is a control server for managing Llama Server instances.
@@ -400,6 +88,153 @@ info:
title: llamactl API title: llamactl API
version: "1.0" version: "1.0"
paths: paths:
/backends/llama-cpp/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- backends
/backends/llama-cpp/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- backends
/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
description: Parses a llama-server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
"500":
description: Internal Server Error
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/llama-cpp/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- backends
/backends/mlx/parse-command:
post:
consumes:
- application/json
description: Parses MLX-LM server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
post:
consumes:
- application/json
description: Parses a vLLM serve command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances: /instances:
get: get:
description: Returns a list of all instances managed by the server description: Returns a list of all instances managed by the server
@@ -710,57 +545,6 @@ paths:
summary: Stop a running instance summary: Stop a running instance
tags: tags:
- instances - instances
/server/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- server
/server/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- server
/server/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- server
/v1/: /v1/:
post: post:
consumes: consumes:
@@ -772,7 +556,7 @@ paths:
"200": "200":
description: OpenAI response description: OpenAI response
"400": "400":
description: Invalid request body or model name description: Invalid request body or instance name
schema: schema:
type: string type: string
"500": "500":

View File

@@ -58,7 +58,7 @@ func main() {
} }
// Initialize the instance manager // Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Instances) instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes, cfg.LocalNode)
// Create a new handler with the instance manager // Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg) handler := server.NewHandler(instanceManager, cfg)

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY apidocs/ ./apidocs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

View File

@@ -2,3 +2,4 @@ mkdocs-material==9.5.3
mkdocs==1.5.3 mkdocs==1.5.3
pymdown-extensions==10.7 pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4 mkdocs-git-revision-date-localized-plugin==1.2.4
mike==2.0.0

60
docs/fix_line_endings.py Normal file
View File

@@ -0,0 +1,60 @@
"""
MkDocs hook to fix line endings for proper rendering.
Automatically adds two spaces at the end of lines that need line breaks.
"""
import re
def on_page_markdown(markdown, page, config, **kwargs):
"""
Fix line endings in markdown content for proper MkDocs rendering.
Adds two spaces at the end of lines that need line breaks.
"""
lines = markdown.split('\n')
processed_lines = []
in_code_block = False
for i, line in enumerate(lines):
stripped = line.strip()
# Track code blocks
if stripped.startswith('```'):
in_code_block = not in_code_block
processed_lines.append(line)
continue
# Skip processing inside code blocks
if in_code_block:
processed_lines.append(line)
continue
# Skip empty lines
if not stripped:
processed_lines.append(line)
continue
# Skip lines that shouldn't have line breaks:
# - Headers (# ## ###)
# - Blockquotes (>)
# - Table rows (|)
# - Lines already ending with two spaces
# - YAML front matter and HTML tags
# - Standalone punctuation lines
if (stripped.startswith('#') or
stripped.startswith('>') or
'|' in stripped or
line.endswith(' ') or
stripped.startswith('---') or
stripped.startswith('<') or
stripped.endswith('>') or
stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
processed_lines.append(line)
continue
# Add two spaces to lines that end with regular text or most punctuation
if stripped and not in_code_block:
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -17,8 +17,38 @@ server:
host: "0.0.0.0" # Server host to bind to host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all) allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below) data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
@@ -28,7 +58,6 @@ instances:
max_instances: -1 # Max instances (-1 = unlimited) max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited) max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances default_restart_delay: 5 # Restart delay (seconds) for new instances
@@ -41,6 +70,10 @@ auth:
inference_keys: [] # Keys for inference endpoints inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints management_keys: [] # Keys for management endpoints
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration for multi-node deployment
main: # Default local node (empty config)
``` ```
## Configuration Files ## Configuration Files
@@ -76,14 +109,89 @@ server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0") host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080) port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"]) allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false) enable_swagger: false # Enable Swagger UI (default: false)
``` ```
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_HOST` - Server host - `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port - `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins - `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false) - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration
```yaml
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:**
**LlamaCpp Backend:**
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Instance Configuration ### Instance Configuration
@@ -97,7 +205,6 @@ instances:
max_instances: -1 # Maximum instances (-1 = unlimited) max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited) max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds default_restart_delay: 5 # Default restart delay in seconds
@@ -113,9 +220,8 @@ instances:
- `LLAMACTL_LOGS_DIR` - Log directory path - `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false) - `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances - `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances - `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances - `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false) - `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
@@ -133,18 +239,32 @@ auth:
management_keys: [] # List of valid management API keys management_keys: [] # List of valid management API keys
``` ```
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false) - `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys - `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false) - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options ### Remote Node Configuration
View all available command line options: llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
```bash ```yaml
llamactl --help local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration map
main: # Local node (empty address means local)
address: "" # Not used for local node
api_key: "" # Not used for local node
worker1: # Remote worker node
address: "http://192.168.1.10:8080"
api_key: "worker1-api-key" # Management API key for authentication
``` ```
You can also override configuration using command line flags when starting llamactl. **Node Configuration Fields:**
- `local_node`: Specifies which node in the `nodes` map represents the local node. Must match exactly what other nodes call this node.
- `nodes`: Map of node configurations
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
- `api_key`: Management API key for authenticating with the remote node
**Environment Variables:**
- `LLAMACTL_LOCAL_NODE` - Name of the local node

View File

@@ -4,11 +4,14 @@ This guide will walk you through installing Llamactl on your system.
## Prerequisites ## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed: You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
**Quick install methods:**
```bash ```bash
# Homebrew (macOS/Linux) # Homebrew (macOS/Linux)
brew install llama.cpp brew install llama.cpp
@@ -18,6 +21,38 @@ winget install llama.cpp
Or build from source - see llama.cpp docs Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods ## Installation Methods
### Option 1: Download Binary (Recommended) ### Option 1: Download Binary (Recommended)
@@ -36,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
# Windows - Download from releases page # Windows - Download from releases page
``` ```
### Option 2: Build from Source ### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
#### Using Docker Compose
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
#### Using Docker Build and Run
**llamactl with llama.cpp CUDA:**
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
**llamactl with vLLM CUDA:**
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
**llamactl built from source:**
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements: Requirements:
- Go 1.24 or later - Go 1.24 or later
@@ -57,6 +157,13 @@ cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server go build -o llamactl ./cmd/server
``` ```
## Remote Node Installation
For deployments with remote nodes:
- Install llamactl on each node using any of the methods above
- Configure API keys for authentication between nodes
- Ensure node names are consistent across all configurations
## Verification ## Verification
Verify your installation by checking the version: Verify your installation by checking the version:
@@ -68,3 +175,5 @@ llamactl --version
## Next Steps ## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running! Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.

View File

@@ -29,8 +29,9 @@ You should see the Llamactl web interface.
1. Click the "Add Instance" button 1. Click the "Add Instance" button
2. Fill in the instance configuration: 2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name - **Name**: Give your instance a descriptive name
- **Model Path**: Path to your Llama.cpp model file - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Additional Options**: Any extra Llama.cpp parameters - **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance" 3. Click "Create Instance"
@@ -43,21 +44,65 @@ Once created, you can:
- **View logs** by clicking the logs button - **View logs** by clicking the logs button
- **Stop** the instance when needed - **Stop** the instance when needed
## Example Configuration ## Example Configurations
Here's a basic example configuration for a Llama 2 model: Here are basic example configurations for each backend:
**llama.cpp backend:**
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"model_path": "/path/to/llama-2-7b-chat.gguf", "backend_type": "llama_cpp",
"options": { "backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4, "threads": 4,
"context_size": 2048 "ctx_size": 2048,
"gpu_layers": 32
} }
} }
``` ```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
}
}
```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API ## Using the API
You can also manage instances via the REST API: You can also manage instances via the REST API:
@@ -66,12 +111,14 @@ You can also manage instances via the REST API:
# List all instances # List all instances
curl http://localhost:8080/api/instances curl http://localhost:8080/api/instances
# Create a new instance # Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances \ curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"name": "my-model", "backend_type": "llama_cpp",
"model_path": "/path/to/model.gguf", "backend_options": {
"model": "/path/to/model.gguf"
}
}' }'
# Start an instance # Start an instance

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 31 KiB

View File

@@ -1,23 +1,16 @@
# Llamactl Documentation # Llamactl Documentation
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.** Welcome to the Llamactl documentation!
![Dashboard Screenshot](images/dashboard.png) ![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl? ## What is Llamactl?
Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management. **{{HEADLINE}}**
## Features ## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) {{FEATURES}}
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
## Quick Links ## Quick Links

62
docs/readme_sync.py Normal file
View File

@@ -0,0 +1,62 @@
"""
MkDocs hook to sync content from README.md to docs/index.md
"""
import re
import os
def on_page_markdown(markdown, page, config, **kwargs):
"""Process markdown content before rendering"""
# Only process the index.md file
if page.file.src_path != 'index.md':
return markdown
# Get the path to README.md (relative to mkdocs.yml)
readme_path = os.path.join(os.path.dirname(config['config_file_path']), 'README.md')
if not os.path.exists(readme_path):
print(f"Warning: README.md not found at {readme_path}")
return markdown
try:
with open(readme_path, 'r', encoding='utf-8') as f:
readme_content = f.read()
except Exception as e:
print(f"Error reading README.md: {e}")
return markdown
# Extract headline (the text in bold after the title)
headline_match = re.search(r'\*\*(.*?)\*\*', readme_content)
headline = headline_match.group(1) if headline_match else 'Management server for llama.cpp and MLX instances'
# Extract features section - everything between ## Features and the next ## heading
features_match = re.search(r'## Features\n(.*?)(?=\n## |\Z)', readme_content, re.DOTALL)
if features_match:
features_content = features_match.group(1).strip()
# Just add line breaks at the end of each line for proper MkDocs rendering
features_with_breaks = add_line_breaks(features_content)
else:
features_with_breaks = "Features content not found in README.md"
# Replace placeholders in the markdown
markdown = markdown.replace('{{HEADLINE}}', headline)
markdown = markdown.replace('{{FEATURES}}', features_with_breaks)
# Fix image paths: convert docs/images/ to images/ for MkDocs
markdown = re.sub(r'docs/images/', 'images/', markdown)
return markdown
def add_line_breaks(content):
"""Add two spaces at the end of each line for proper MkDocs line breaks"""
lines = content.split('\n')
processed_lines = []
for line in lines:
if line.strip(): # Only add spaces to non-empty lines
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -116,7 +116,19 @@ Create and start a new instance.
POST /api/v1/instances/{name} POST /api/v1/instances/{name}
``` ```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options. **Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:** **Response:**
```json ```json
@@ -170,7 +182,7 @@ POST /api/v1/instances/{name}/start
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"status": "starting", "status": "running",
"created": 1705312200 "created": 1705312200
} }
``` ```
@@ -191,7 +203,7 @@ POST /api/v1/instances/{name}/stop
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"status": "stopping", "status": "stopped",
"created": 1705312200 "created": 1705312200
} }
``` ```
@@ -208,7 +220,7 @@ POST /api/v1/instances/{name}/restart
```json ```json
{ {
"name": "llama2-7b", "name": "llama2-7b",
"status": "restarting", "status": "running",
"created": 1705312200 "created": 1705312200
} }
``` ```
@@ -310,15 +322,15 @@ POST /v1/reranking
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md). The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:** **Error Responses:**
- `400 Bad Request`: Invalid request body or missing model name - `400 Bad Request`: Invalid request body or missing instance name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled - `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit - `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values ## Instance Status Values
Instances can have the following status values: Instances can have the following status values:
- `stopped`: Instance is not running - `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests - `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed - `failed`: Instance failed to start or crashed
## Error Responses ## Error Responses
@@ -354,7 +366,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \ -H "Authorization: Bearer your-api-key" \
-d '{ -d '{
"model": "/models/llama-2-7b.gguf" "backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}' }'
# Check instance status # Check instance status
@@ -386,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model http://localhost:8080/api/v1/instances/my-model
``` ```
### Remote Node Instance Example
```bash
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/v1/instances/remote-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
# Check status of remote instance
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/remote-model
# Use remote instance with OpenAI-compatible API
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "remote-model",
"messages": [
{"role": "user", "content": "Hello from remote node!"}
]
}'
```
### Using the Proxy Endpoint ### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance: You can also directly proxy requests to the llama-server instance:
@@ -401,6 +453,102 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
}' }'
``` ```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation ## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at: If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:

View File

@@ -1,6 +1,6 @@
# Managing Instances # Managing Instances
Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API. Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview ## Overview
@@ -39,40 +39,79 @@ Each instance is displayed as a card showing:
1. Click the **"Create Instance"** button on the dashboard 1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field) 2. Enter a unique **Name** for your instance (only required field)
3. Configure model source (choose one): 3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
- **Model Path**: Full path to your downloaded GGUF model file 4. **Choose Backend Type**:
- **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`) - **llama.cpp**: For GGUF models using llama-server
- **HuggingFace File**: Specific file within the repo (optional, uses default if not specified) - **MLX**: For MLX-optimized models (macOS only)
4. Configure optional instance management settings: - **vLLM**: For distributed serving and high-throughput inference
5. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
6. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure - **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts - **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts - **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
5. Configure optional llama-server backend options: - **Environment Variables**: Set custom environment variables for the instance process
- **Threads**: Number of CPU threads to use 7. Configure backend-specific options:
- **Context Size**: Context window size (ctx_size) - **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **GPU Layers**: Number of layers to offload to GPU - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **Port**: Network port (auto-assigned by llamactl if not specified) - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
- **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md)) 8. Click **"Create"** to save the instance
6. Click **"Create"** to save the instance
### Via API ### Via API
```bash ```bash
# Create instance with local model file # Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/instances/my-instance \ curl -X POST http://localhost:8080/api/instances/my-llama-instance \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"backend_type": "llama_cpp", "backend_type": "llama_cpp",
"backend_options": { "backend_options": {
"model": "/path/to/model.gguf", "model": "/path/to/model.gguf",
"threads": 8, "threads": 8,
"ctx_size": 4096 "ctx_size": 4096,
"gpu_layers": 32
} }
}' }'
# Create instance with HuggingFace model # Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"top_p": 0.9,
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
@@ -81,9 +120,19 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
"hf_repo": "unsloth/gemma-3-27b-it-GGUF", "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf", "hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32 "gpu_layers": 32
}
}'
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/instances/remote-llama \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
}, },
"auto_restart": true, "nodes": ["worker1"]
"max_restarts": 3
}' }'
``` ```
@@ -166,14 +215,17 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
## Instance Proxy ## Instance Proxy
Llamactl proxies all requests to the underlying llama-server instances. Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash ```bash
# Get instance details # Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/ curl http://localhost:8080/api/instances/{name}/proxy/
``` ```
Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information. All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health ### Instance Health
@@ -188,3 +240,4 @@ Check the health status of your instances:
```bash ```bash
curl http://localhost:8080/api/instances/{name}/proxy/health curl http://localhost:8080/api/instances/{name}/proxy/health
``` ```

View File

@@ -125,6 +125,34 @@ This helps determine if the issue is with llamactl or with the underlying llama.
http://localhost:8080/api/v1/instances http://localhost:8080/api/v1/instances
``` ```
## Remote Node Issues
### Node Configuration
**Problem:** Remote instances not appearing or cannot be managed
**Solutions:**
1. **Verify node configuration:**
```yaml
local_node: "main" # Must match a key in nodes map
nodes:
main:
address: "" # Empty for local node
worker1:
address: "http://worker1.internal:8080"
api_key: "secure-key" # Must match worker1's management key
```
2. **Check node name consistency:**
- `local_node` on each node must match what other nodes call it
- Node names are case-sensitive
3. **Test remote node connectivity:**
```bash
curl -H "Authorization: Bearer remote-node-key" \
http://remote-node:8080/api/v1/instances
```
## Debugging and Logs ## Debugging and Logs
### Viewing Instance Logs ### Viewing Instance Logs

View File

@@ -61,8 +61,20 @@ nav:
plugins: plugins:
- search - search
- git-revision-date-localized - git-revision-date-localized
- mike:
version_selector: true
css_dir: css
javascript_dir: js
canonical_version: null
hooks:
- docs/readme_sync.py
- docs/fix_line_endings.py
extra: extra:
version:
provider: mike
default: stable
social: social:
- icon: fontawesome/brands/github - icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl link: https://github.com/lordmathis/llamactl

View File

@@ -4,4 +4,7 @@ type BackendType string
const ( const (
BackendTypeLlamaCpp BackendType = "llama_cpp" BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
) )

93
pkg/backends/builder.go Normal file
View File

@@ -0,0 +1,93 @@
package backends
import (
"fmt"
"llamactl/pkg/config"
"reflect"
"strconv"
"strings"
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
var args []string
v := reflect.ValueOf(options).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
if !field.CanInterface() {
continue
}
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
} else {
// Comma-separated: --flag value1,value2
var values []string
for j := 0; j < field.Len(); j++ {
values = append(values, field.Index(j).String())
}
args = append(args, "--"+flagName, strings.Join(values, ","))
}
}
}
}
return args
}
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
// Start with configured Docker arguments (should include "run", "--rm", etc.)
dockerArgs := make([]string, len(backendConfig.Docker.Args))
copy(dockerArgs, backendConfig.Docker.Args)
// Add environment variables
for key, value := range backendConfig.Docker.Environment {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image name
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
// Add backend args and instance args
dockerArgs = append(dockerArgs, backendConfig.Args...)
dockerArgs = append(dockerArgs, instanceArgs...)
return "docker", dockerArgs, nil
}

View File

@@ -2,11 +2,33 @@ package llamacpp
import ( import (
"encoding/json" "encoding/json"
"llamactl/pkg/backends"
"reflect" "reflect"
"strconv" "strconv"
"strings"
) )
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes)
var multiValuedFlags = map[string]bool{
// Parsing keys (with underscores)
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
type LlamaServerOptions struct { type LlamaServerOptions struct {
// Common params // Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"` VerbosePrompt bool `json:"verbose_prompt,omitempty"`
@@ -315,62 +337,31 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments // BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string { func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string // Llama uses multiple flags for arrays by default (not comma-separated)
// Use package-level multiValuedFlags variable
return backends.BuildCommandArgs(o, multiValuedFlags)
}
v := reflect.ValueOf(o).Elem() func (o *LlamaServerOptions) BuildDockerArgs() []string {
t := v.Type() // For llama, Docker args are the same as normal args
return o.BuildCommandArgs()
}
for i := 0; i < v.NumField(); i++ { // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
field := v.Field(i) // Supports multiple formats:
fieldType := t.Field(i) // 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
// Use package-level multiValuedFlags variable
// Skip unexported fields var llamaOptions LlamaServerOptions
if !field.CanInterface() { if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
continue return nil, err
}
// Get the JSON tag to determine the flag name
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Remove ",omitempty" from the tag
flagName := jsonTag
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
flagName = jsonTag[:commaIndex]
}
// Convert snake_case to kebab-case for CLI flags
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
} }
return args return &llamaOptions, nil
} }

View File

@@ -378,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
} }
} }
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
// Helper functions // Helper functions
func contains(slice []string, item string) bool { func contains(slice []string, item string) bool {
return slices.Contains(slice, item) return slices.Contains(slice, item)

56
pkg/backends/mlx/mlx.go Normal file
View File

@@ -0,0 +1,56 @@
package mlx
import (
"llamactl/pkg/backends"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
DraftModel string `json:"draft_model,omitempty"`
NumDraftTokens int `json:"num_draft_tokens,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
}
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}
return &mlxOptions, nil
}

View File

@@ -0,0 +1,157 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

213
pkg/backends/parser.go Normal file
View File

@@ -0,0 +1,213 @@
package backends
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
return fmt.Errorf("command cannot be empty")
}
// Extract arguments and positional model
args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
if err != nil {
return err
}
// Parse flags into map
options, err := parseFlags(args, multiValuedFlags)
if err != nil {
return err
}
// If we found a positional model and no --model flag was provided, set the model
if modelFromPositional != "" {
if _, hasModelFlag := options["model"]; !hasModelFlag {
options["model"] = modelFromPositional
}
}
// Convert to target struct via JSON
jsonData, err := json.Marshal(options)
if err != nil {
return fmt.Errorf("failed to marshal options: %w", err)
}
if err := json.Unmarshal(jsonData, target); err != nil {
return fmt.Errorf("failed to unmarshal to target: %w", err)
}
return nil
}
// normalizeCommand handles multiline commands with backslashes
func normalizeCommand(command string) string {
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
re = regexp.MustCompile(`\s+`)
return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
}
// extractArgs extracts arguments from command, removing executable and subcommands
// Returns: args, modelFromPositional, error
func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
// Check for unterminated quotes
if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
return nil, "", fmt.Errorf("unterminated quoted string")
}
tokens := strings.Fields(command)
if len(tokens) == 0 {
return nil, "", fmt.Errorf("no tokens found")
}
// Skip executable
start := 0
firstToken := tokens[0]
// Check for executable name (with or without path)
if strings.Contains(firstToken, string(filepath.Separator)) {
baseName := filepath.Base(firstToken)
for _, execName := range executableNames {
if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
start = 1
break
}
}
} else {
for _, execName := range executableNames {
if strings.EqualFold(firstToken, execName) {
start = 1
break
}
}
}
// Skip subcommand if present
if start < len(tokens) {
for _, subCmd := range subcommandNames {
if strings.EqualFold(tokens[start], subCmd) {
start++
break
}
}
}
// Handle case where command starts with subcommand (no executable)
if start == 0 {
for _, subCmd := range subcommandNames {
if strings.EqualFold(firstToken, subCmd) {
start = 1
break
}
}
}
args := tokens[start:]
// Extract first positional argument (model) if present and not a flag
var modelFromPositional string
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
modelFromPositional = args[0]
args = args[1:] // Remove the model from args to process remaining flags
}
return args, modelFromPositional, nil
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
arg := args[i]
if !strings.HasPrefix(arg, "-") {
continue
}
// Check for malformed flags (more than two leading dashes)
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Get flag name and value
var flagName, value string
var hasValue bool
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
flagName = strings.TrimLeft(parts[0], "-")
value = parts[1]
hasValue = true
} else {
flagName = strings.TrimLeft(arg, "-")
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
value = args[i+1]
hasValue = true
i++ // Skip next arg since we consumed it
}
}
// Convert kebab-case to snake_case for JSON
flagName = strings.ReplaceAll(flagName, "-", "_")
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {
options[flagName] = []string{value}
}
} else {
options[flagName] = parseValue(value)
}
} else {
// Boolean flag
options[flagName] = true
}
}
return options, nil
}
// parseValue converts string to appropriate type
func parseValue(value string) any {
// Remove quotes
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
// Try boolean
switch strings.ToLower(value) {
case "true":
return true
case "false":
return false
}
// Try integer
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
// Try float
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
// Return as string
return value
}

200
pkg/backends/vllm/vllm.go Normal file
View File

@@ -0,0 +1,200 @@
package vllm
import (
"llamactl/pkg/backends"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var multiValuedFlags = map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and engine configuration
Model string `json:"model,omitempty"`
Tokenizer string `json:"tokenizer,omitempty"`
SkipTokenizerInit bool `json:"skip_tokenizer_init,omitempty"`
Revision string `json:"revision,omitempty"`
CodeRevision string `json:"code_revision,omitempty"`
TokenizerRevision string `json:"tokenizer_revision,omitempty"`
TokenizerMode string `json:"tokenizer_mode,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
DownloadDir string `json:"download_dir,omitempty"`
LoadFormat string `json:"load_format,omitempty"`
ConfigFormat string `json:"config_format,omitempty"`
Dtype string `json:"dtype,omitempty"`
KVCacheDtype string `json:"kv_cache_dtype,omitempty"`
QuantizationParamPath string `json:"quantization_param_path,omitempty"`
Seed int `json:"seed,omitempty"`
MaxModelLen int `json:"max_model_len,omitempty"`
GuidedDecodingBackend string `json:"guided_decoding_backend,omitempty"`
DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
WorkerUseRay bool `json:"worker_use_ray,omitempty"`
RayWorkersUseNSight bool `json:"ray_workers_use_nsight,omitempty"`
// Performance and serving configuration
BlockSize int `json:"block_size,omitempty"`
EnablePrefixCaching bool `json:"enable_prefix_caching,omitempty"`
DisableSlidingWindow bool `json:"disable_sliding_window,omitempty"`
UseV2BlockManager bool `json:"use_v2_block_manager,omitempty"`
NumLookaheadSlots int `json:"num_lookahead_slots,omitempty"`
SwapSpace int `json:"swap_space,omitempty"`
CPUOffloadGB int `json:"cpu_offload_gb,omitempty"`
GPUMemoryUtilization float64 `json:"gpu_memory_utilization,omitempty"`
NumGPUBlocksOverride int `json:"num_gpu_blocks_override,omitempty"`
MaxNumBatchedTokens int `json:"max_num_batched_tokens,omitempty"`
MaxNumSeqs int `json:"max_num_seqs,omitempty"`
MaxLogprobs int `json:"max_logprobs,omitempty"`
DisableLogStats bool `json:"disable_log_stats,omitempty"`
Quantization string `json:"quantization,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeTheta float64 `json:"rope_theta,omitempty"`
EnforceEager bool `json:"enforce_eager,omitempty"`
MaxContextLenToCapture int `json:"max_context_len_to_capture,omitempty"`
MaxSeqLenToCapture int `json:"max_seq_len_to_capture,omitempty"`
DisableCustomAllReduce bool `json:"disable_custom_all_reduce,omitempty"`
TokenizerPoolSize int `json:"tokenizer_pool_size,omitempty"`
TokenizerPoolType string `json:"tokenizer_pool_type,omitempty"`
TokenizerPoolExtraConfig string `json:"tokenizer_pool_extra_config,omitempty"`
EnableLoraBias bool `json:"enable_lora_bias,omitempty"`
LoraExtraVocabSize int `json:"lora_extra_vocab_size,omitempty"`
LoraRank int `json:"lora_rank,omitempty"`
PromptLookbackDistance int `json:"prompt_lookback_distance,omitempty"`
PreemptionMode string `json:"preemption_mode,omitempty"`
// Distributed and parallel processing
TensorParallelSize int `json:"tensor_parallel_size,omitempty"`
PipelineParallelSize int `json:"pipeline_parallel_size,omitempty"`
MaxParallelLoadingWorkers int `json:"max_parallel_loading_workers,omitempty"`
DisableAsyncOutputProc bool `json:"disable_async_output_proc,omitempty"`
WorkerClass string `json:"worker_class,omitempty"`
EnabledLoraModules string `json:"enabled_lora_modules,omitempty"`
MaxLoraRank int `json:"max_lora_rank,omitempty"`
FullyShardedLoras bool `json:"fully_sharded_loras,omitempty"`
LoraModules string `json:"lora_modules,omitempty"`
PromptAdapters string `json:"prompt_adapters,omitempty"`
MaxPromptAdapterToken int `json:"max_prompt_adapter_token,omitempty"`
Device string `json:"device,omitempty"`
SchedulerDelay float64 `json:"scheduler_delay,omitempty"`
EnableChunkedPrefill bool `json:"enable_chunked_prefill,omitempty"`
SpeculativeModel string `json:"speculative_model,omitempty"`
SpeculativeModelQuantization string `json:"speculative_model_quantization,omitempty"`
SpeculativeRevision string `json:"speculative_revision,omitempty"`
SpeculativeMaxModelLen int `json:"speculative_max_model_len,omitempty"`
SpeculativeDisableByBatchSize int `json:"speculative_disable_by_batch_size,omitempty"`
NgptSpeculativeLength int `json:"ngpt_speculative_length,omitempty"`
SpeculativeDisableMqa bool `json:"speculative_disable_mqa,omitempty"`
ModelLoaderExtraConfig string `json:"model_loader_extra_config,omitempty"`
IgnorePatterns string `json:"ignore_patterns,omitempty"`
PreloadedLoraModules string `json:"preloaded_lora_modules,omitempty"`
// OpenAI server specific options
UDS string `json:"uds,omitempty"`
UvicornLogLevel string `json:"uvicorn_log_level,omitempty"`
ResponseRole string `json:"response_role,omitempty"`
SSLKeyfile string `json:"ssl_keyfile,omitempty"`
SSLCertfile string `json:"ssl_certfile,omitempty"`
SSLCACerts string `json:"ssl_ca_certs,omitempty"`
SSLCertReqs int `json:"ssl_cert_reqs,omitempty"`
RootPath string `json:"root_path,omitempty"`
Middleware []string `json:"middleware,omitempty"`
ReturnTokensAsTokenIDS bool `json:"return_tokens_as_token_ids,omitempty"`
DisableFrontendMultiprocessing bool `json:"disable_frontend_multiprocessing,omitempty"`
EnableAutoToolChoice bool `json:"enable_auto_tool_choice,omitempty"`
ToolCallParser string `json:"tool_call_parser,omitempty"`
ToolServer string `json:"tool_server,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateContentFormat string `json:"chat_template_content_format,omitempty"`
AllowCredentials bool `json:"allow_credentials,omitempty"`
AllowedOrigins []string `json:"allowed_origins,omitempty"`
AllowedMethods []string `json:"allowed_methods,omitempty"`
AllowedHeaders []string `json:"allowed_headers,omitempty"`
APIKey []string `json:"api_key,omitempty"`
EnableLogOutputs bool `json:"enable_log_outputs,omitempty"`
EnableTokenUsage bool `json:"enable_token_usage,omitempty"`
EnableAsyncEngineDebug bool `json:"enable_async_engine_debug,omitempty"`
EngineUseRay bool `json:"engine_use_ray,omitempty"`
DisableLogRequests bool `json:"disable_log_requests,omitempty"`
MaxLogLen int `json:"max_log_len,omitempty"`
// Additional engine configuration
Task string `json:"task,omitempty"`
MultiModalConfig string `json:"multi_modal_config,omitempty"`
LimitMmPerPrompt string `json:"limit_mm_per_prompt,omitempty"`
EnableSleepMode bool `json:"enable_sleep_mode,omitempty"`
EnableChunkingRequest bool `json:"enable_chunking_request,omitempty"`
CompilationConfig string `json:"compilation_config,omitempty"`
DisableSlidingWindowMask bool `json:"disable_sliding_window_mask,omitempty"`
EnableTRTLLMEngineLatency bool `json:"enable_trtllm_engine_latency,omitempty"`
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// For vLLM native, model is a positional argument after "serve"
func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string
// Add model as positional argument if specified (for native execution)
if o.Model != "" {
args = append(args, o.Model)
}
// Create a copy without Model field to avoid --model flag
optionsCopy := *o
optionsCopy.Model = ""
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
args = append(args, flagArgs...)
return args
}
func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
args = append(args, flagArgs...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
return nil, err
}
return &vllmOptions, nil
}

View File

@@ -0,0 +1,153 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

View File

@@ -1,6 +1,7 @@
package config package config
import ( import (
"log"
"os" "os"
"path/filepath" "path/filepath"
"runtime" "runtime"
@@ -10,14 +11,41 @@ import (
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled"`
Image string `yaml:"image"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm"`
MLX BackendSettings `yaml:"mlx"`
}
// AppConfig represents the configuration for llamactl // AppConfig represents the configuration for llamactl
type AppConfig struct { type AppConfig struct {
Server ServerConfig `yaml:"server"` Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"` Backends BackendConfig `yaml:"backends"`
Auth AuthConfig `yaml:"auth"` Instances InstancesConfig `yaml:"instances"`
Version string `yaml:"-"` Auth AuthConfig `yaml:"auth"`
CommitHash string `yaml:"-"` LocalNode string `yaml:"local_node,omitempty"`
BuildTime string `yaml:"-"` Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
} }
// ServerConfig contains HTTP server configuration // ServerConfig contains HTTP server configuration
@@ -31,8 +59,14 @@ type ServerConfig struct {
// Allowed origins for CORS (e.g., "http://localhost:3000") // Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"` AllowedOrigins []string `yaml:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers"`
// Enable Swagger UI for API documentation // Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"` EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
} }
// InstancesConfig contains instance management configuration // InstancesConfig contains instance management configuration
@@ -61,9 +95,6 @@ type InstancesConfig struct {
// Enable LRU eviction for instance logs // Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"` EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Default auto-restart setting for new instances // Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"` DefaultAutoRestart bool `yaml:"default_auto_restart"`
@@ -99,6 +130,11 @@ type AuthConfig struct {
ManagementKeys []string `yaml:"management_keys"` ManagementKeys []string `yaml:"management_keys"`
} }
type NodeConfig struct {
Address string `yaml:"address"`
APIKey string `yaml:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence: // LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults // 1. Hardcoded defaults
// 2. Config file // 2. Config file
@@ -110,18 +146,57 @@ func LoadConfig(configPath string) (AppConfig, error) {
Host: "0.0.0.0", Host: "0.0.0.0",
Port: 8080, Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false, EnableSwagger: false,
}, },
LocalNode: "main",
Nodes: map[string]NodeConfig{
"main": {}, // Local node with empty config
},
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
},
Instances: InstancesConfig{ Instances: InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(), DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"), // NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), // should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true, AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true, EnableLRUEviction: true,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
@@ -145,6 +220,14 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables // 3. Override with environment variables
loadEnvVars(&cfg) loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
return cfg, nil return cfg, nil
} }
@@ -165,6 +248,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
if err := yaml.Unmarshal(data, cfg); err != nil { if err := yaml.Unmarshal(data, cfg); err != nil {
return err return err
} }
log.Printf("Read config at %s", path)
return nil return nil
} }
} }
@@ -229,9 +313,126 @@ func loadEnvVars(cfg *AppConfig) {
cfg.Instances.EnableLRUEviction = b cfg.Instances.EnableLRUEviction = b
} }
} }
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { // Backend config
cfg.Instances.LlamaExecutable = llamaExec // LlamaCpp backend
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
} }
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil { if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b cfg.Instances.DefaultAutoRestart = b
@@ -279,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" { if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",") cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
} }
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
} }
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000" // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -304,6 +510,32 @@ func ParsePortRange(s string) [2]int {
return [2]int{0, 0} // Invalid format return [2]int{0, 0} // Invalid format
} }
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory // getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string { func getDefaultDataDirectory() string {
switch runtime.GOOS { switch runtime.GOOS {
@@ -336,6 +568,10 @@ func getDefaultDataDirectory() string {
// getDefaultConfigLocations returns platform-specific config file locations // getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string { func getDefaultConfigLocations() []string {
var locations []string var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir() homeDir, _ := os.UserHomeDir()
switch runtime.GOOS { switch runtime.GOOS {
@@ -365,3 +601,17 @@ func getDefaultConfigLocations() []string {
return locations return locations
} }
// GetBackendSettings resolves backend settings
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return BackendSettings{}
}
}

View File

@@ -42,9 +42,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
if cfg.Instances.MaxInstances != -1 { if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
} }
if cfg.Instances.LlamaExecutable != "llama-server" {
t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if !cfg.Instances.DefaultAutoRestart { if !cfg.Instances.DefaultAutoRestart {
t.Error("Expected default auto restart to be true") t.Error("Expected default auto restart to be true")
} }
@@ -101,9 +98,6 @@ instances:
if cfg.Instances.MaxInstances != 5 { if cfg.Instances.MaxInstances != 5 {
t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
} }
if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" {
t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if cfg.Instances.DefaultAutoRestart { if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false") t.Error("Expected auto restart to be false")
} }
@@ -123,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000", "LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOGS_DIR": "/env/logs", "LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20", "LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false", "LLAMACTL_DEFAULT_AUTO_RESTART": "false",
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7", "LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
"LLAMACTL_DEFAULT_RESTART_DELAY": "15", "LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -156,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.MaxInstances != 20 { if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
} }
if cfg.Instances.LlamaExecutable != "/env/llama-server" { if cfg.Backends.LlamaCpp.Command != "llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable) t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
} }
if cfg.Instances.DefaultAutoRestart { if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false") t.Error("Expected auto restart to be false")
@@ -355,3 +348,294 @@ server:
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
} }
} }
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "custom-llama",
Args: []string{"--verbose"},
Docker: &config.DockerSettings{
Enabled: true,
Image: "custom-llama:latest",
Args: []string{"--gpus", "all"},
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
},
},
VLLM: config.BackendSettings{
Command: "custom-vllm",
Args: []string{"serve", "--debug"},
},
MLX: config.BackendSettings{
Command: "custom-mlx",
Args: []string{},
},
}
// Test llama-cpp with Docker
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
}
if settings.Docker == nil || !settings.Docker.Enabled {
t.Error("Expected Docker to be enabled")
}
if settings.Docker.Image != "custom-llama:latest" {
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
}
// Test vLLM without Docker
settings = bc.GetBackendSettings("vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
}
if settings.Docker != nil && settings.Docker.Enabled {
t.Error("Expected Docker to be disabled or nil")
}
// Test MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
bc := &config.BackendConfig{}
// Test empty llama-cpp
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty vLLM
settings = bc.GetBackendSettings("vllm")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
}
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
envVars := map[string]string{
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
"LLAMACTL_VLLM_COMMAND": "env-vllm",
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
"LLAMACTL_MLX_COMMAND": "env-mlx",
}
// Set env vars and ensure cleanup
for key, value := range envVars {
os.Setenv(key, value)
defer os.Unsetenv(key)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Verify llama-cpp environment overrides
if cfg.Backends.LlamaCpp.Command != "env-llama" {
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
}
expectedArgs := []string{"--verbose", "--threads", "4"}
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
}
if !cfg.Backends.LlamaCpp.Docker.Enabled {
t.Error("Expected llama Docker to be enabled")
}
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
}
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
}
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
}
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
}
// Verify vLLM environment overrides
if cfg.Backends.VLLM.Command != "env-vllm" {
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
}
if cfg.Backends.VLLM.Docker.Enabled {
t.Error("Expected vLLM Docker to be disabled")
}
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
}
// Verify MLX environment overrides
if cfg.Backends.MLX.Command != "env-mlx" {
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
}
}
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
// Test invalid backend type returns empty settings
settings := bc.GetBackendSettings("invalid-backend")
if settings.Command != "" {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
}
func TestLoadConfig_LocalNode(t *testing.T) {
t.Run("default local node", func(t *testing.T) {
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "main" {
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
}
})
t.Run("local node from file", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "worker1"
nodes:
worker1:
address: ""
worker2:
address: "http://192.168.1.10:8080"
api_key: "test-key"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "worker1" {
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
}
// Verify nodes map (includes default "main" + worker1 + worker2)
if len(cfg.Nodes) != 3 {
t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
}
// Verify local node exists and is empty
localNode, exists := cfg.Nodes["worker1"]
if !exists {
t.Error("Expected local node 'worker1' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
if localNode.APIKey != "" {
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
}
// Verify remote node
remoteNode, exists := cfg.Nodes["worker2"]
if !exists {
t.Error("Expected remote node 'worker2' to exist in nodes map")
}
if remoteNode.Address != "http://192.168.1.10:8080" {
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
}
// Verify default main node still exists
_, exists = cfg.Nodes["main"]
if !exists {
t.Error("Expected default 'main' node to still exist in nodes map")
}
})
t.Run("custom local node name in config", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "primary"
nodes:
primary:
address: ""
worker1:
address: "http://192.168.1.10:8080"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "primary" {
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
}
// Verify nodes map includes default "main" + primary + worker1
if len(cfg.Nodes) != 3 {
t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
}
localNode, exists := cfg.Nodes["primary"]
if !exists {
t.Error("Expected local node 'primary' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
})
t.Run("local node from environment variable", func(t *testing.T) {
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "custom-node" {
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
}
})
}

View File

@@ -31,9 +31,11 @@ func (realTimeProvider) Now() time.Time {
// Process represents a running instance of the llama server // Process represents a running instance of the llama server
type Process struct { type Process struct {
Name string `json:"name"` Name string `json:"name"`
options *CreateInstanceOptions `json:"-"` options *CreateInstanceOptions `json:"-"`
globalSettings *config.InstancesConfig globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig
localNodeName string `json:"-"` // Name of the local node for remote detection
// Status // Status
Status InstanceStatus `json:"status"` Status InstanceStatus `json:"status"`
@@ -65,22 +67,24 @@ type Process struct {
} }
// NewInstance creates a new instance with the given name, log path, and options // NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process { func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, localNodeName string, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options // Validate and copy options
options.ValidateAndApplyDefaults(name, globalSettings) options.ValidateAndApplyDefaults(name, globalInstanceSettings)
// Create the instance logger // Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogsDir) logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
return &Process{ return &Process{
Name: name, Name: name,
options: options, options: options,
globalSettings: globalSettings, globalInstanceSettings: globalInstanceSettings,
logger: logger, globalBackendSettings: globalBackendSettings,
timeProvider: realTimeProvider{}, localNodeName: localNodeName,
Created: time.Now().Unix(), logger: logger,
Status: Stopped, timeProvider: realTimeProvider{},
onStatusChange: onStatusChange, Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
} }
} }
@@ -96,7 +100,17 @@ func (i *Process) GetPort() int {
if i.options != nil { if i.options != nil {
switch i.options.BackendType { switch i.options.BackendType {
case backends.BackendTypeLlamaCpp: case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Port if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
} }
} }
return 0 return 0
@@ -108,7 +122,17 @@ func (i *Process) GetHost() string {
if i.options != nil { if i.options != nil {
switch i.options.BackendType { switch i.options.BackendType {
case backends.BackendTypeLlamaCpp: case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Host if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Host
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
} }
} }
return "" return ""
@@ -123,8 +147,13 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
return return
} }
// Preserve the original nodes to prevent changing instance location
if i.options != nil && i.options.Nodes != nil {
options.Nodes = i.options.Nodes
}
// Validate and copy options // Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalSettings) options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = options i.options = options
// Clear the proxy so it gets recreated with new options // Clear the proxy so it gets recreated with new options
@@ -149,12 +178,29 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
return nil, fmt.Errorf("instance %s has no options set", i.Name) return nil, fmt.Errorf("instance %s has no options set", i.Name)
} }
// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
if len(i.options.Nodes) > 0 && i.options.Nodes[0] != i.localNodeName {
return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
}
var host string var host string
var port int var port int
switch i.options.BackendType { switch i.options.BackendType {
case backends.BackendTypeLlamaCpp: case backends.BackendTypeLlamaCpp:
host = i.options.LlamaServerOptions.Host if i.options.LlamaServerOptions != nil {
port = i.options.LlamaServerOptions.Port host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
} }
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port)) targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
@@ -164,6 +210,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(targetURL) proxy := httputil.NewSingleHostReverseProxy(targetURL)
var responseHeaders map[string]string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
case backends.BackendTypeVllm:
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
case backends.BackendTypeMlxLm:
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
}
proxy.ModifyResponse = func(resp *http.Response) error { proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts // Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers // llamactl will add its own CORS headers
@@ -173,6 +228,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
resp.Header.Del("Access-Control-Allow-Credentials") resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age") resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers") resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range responseHeaders {
resp.Header.Set(key, value)
}
return nil return nil
} }
@@ -187,14 +246,33 @@ func (i *Process) MarshalJSON() ([]byte, error) {
i.mu.RLock() i.mu.RLock()
defer i.mu.RUnlock() defer i.mu.RUnlock()
// Determine if docker is enabled for this instance's backend
var dockerEnabled bool
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeVllm:
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeMlxLm:
// MLX does not support docker currently
}
}
// Use anonymous struct to avoid recursion // Use anonymous struct to avoid recursion
type Alias Process type Alias Process
return json.Marshal(&struct { return json.Marshal(&struct {
*Alias *Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
}{ }{
Alias: (*Alias)(i), Alias: (*Alias)(i),
Options: i.options, Options: i.options,
DockerEnabled: dockerEnabled,
}) })
} }
@@ -215,9 +293,39 @@ func (i *Process) UnmarshalJSON(data []byte) error {
// Handle options with validation and defaults // Handle options with validation and defaults
if aux.Options != nil { if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings) aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = aux.Options i.options = aux.Options
} }
// Initialize fields that are not serialized
if i.timeProvider == nil {
i.timeProvider = realTimeProvider{}
}
if i.logger == nil && i.globalInstanceSettings != nil {
i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
}
return nil return nil
} }
func (i *Process) IsRemote() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options == nil {
return false
}
// If no nodes specified, it's a local instance
if len(i.options.Nodes) == 0 {
return false
}
// If the first node is the local node, treat it as a local instance
if i.options.Nodes[0] == i.localNodeName {
return false
}
// Otherwise, it's a remote instance
return true
}

View File

@@ -11,6 +11,21 @@ import (
) )
func TestNewInstance(t *testing.T) { func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
DefaultAutoRestart: true, DefaultAutoRestart: true,
@@ -29,7 +44,7 @@ func TestNewInstance(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
if inst.Name != "test-instance" { if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name) t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -60,6 +75,21 @@ func TestNewInstance(t *testing.T) {
} }
func TestNewInstance_WithRestartOptions(t *testing.T) { func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
DefaultAutoRestart: true, DefaultAutoRestart: true,
@@ -85,7 +115,7 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
// Check that explicit values override defaults // Check that explicit values override defaults
@@ -101,6 +131,21 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
} }
func TestSetOptions(t *testing.T) { func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
DefaultAutoRestart: true, DefaultAutoRestart: true,
@@ -119,7 +164,7 @@ func TestSetOptions(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, "main", mockOnStatusChange)
// Update options // Update options
newOptions := &instance.CreateInstanceOptions{ newOptions := &instance.CreateInstanceOptions{
@@ -146,7 +191,74 @@ func TestSetOptions(t *testing.T) {
} }
} }
func TestSetOptions_PreservesNodes(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Create instance with initial nodes
initialOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
Nodes: []string{"worker1"},
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, "main", mockOnStatusChange)
// Try to update with different nodes
updatedOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
Nodes: []string{"worker2"}, // Attempt to change node
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
inst.SetOptions(updatedOptions)
opts := inst.GetOptions()
// Nodes should remain unchanged
if len(opts.Nodes) != 1 || opts.Nodes[0] != "worker1" {
t.Errorf("Expected nodes to remain ['worker1'], got %v", opts.Nodes)
}
// Other options should be updated
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
}
}
func TestGetProxy(t *testing.T) { func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -162,7 +274,7 @@ func TestGetProxy(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Get proxy for the first time // Get proxy for the first time
proxy1, err := inst.GetProxy() proxy1, err := inst.GetProxy()
@@ -184,6 +296,21 @@ func TestGetProxy(t *testing.T) {
} }
func TestMarshalJSON(t *testing.T) { func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
DefaultAutoRestart: true, DefaultAutoRestart: true,
@@ -202,7 +329,7 @@ func TestMarshalJSON(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
data, err := json.Marshal(instance) data, err := json.Marshal(instance)
if err != nil { if err != nil {
@@ -338,6 +465,21 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
}, },
} }
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -356,7 +498,7 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange) instance := instance.NewInstance("test", backendConfig, globalSettings, options, "main", mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
if opts.MaxRestarts == nil { if opts.MaxRestarts == nil {

View File

@@ -5,10 +5,14 @@ import (
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"os"
"os/exec" "os/exec"
"runtime" "runtime"
"syscall" "syscall"
"time" "time"
"llamactl/pkg/backends"
"llamactl/pkg/config"
) )
// Start starts the llama server instance and returns an error if it fails. // Start starts the llama server instance and returns an error if it fails.
@@ -34,14 +38,20 @@ func (i *Process) Start() error {
// Initialize last request time to current time when starting // Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix()) i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files // Create log files
if err := i.logger.Create(); err != nil { if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
} }
args := i.options.BuildCommandArgs() // Build command using backend-specific methods
i.ctx, i.cancel = context.WithCancel(context.Background()) cmd, cmdErr := i.buildCommand()
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...) if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
i.cmd = cmd
if runtime.GOOS != "windows" { if runtime.GOOS != "windows" {
setProcAttrs(i.cmd) setProcAttrs(i.cmd)
@@ -175,9 +185,21 @@ func (i *Process) WaitForHealthy(timeout int) error {
var host string var host string
var port int var port int
switch opts.BackendType { switch opts.BackendType {
case "llama-cpp": case backends.BackendTypeLlamaCpp:
host = opts.LlamaServerOptions.Host if opts.LlamaServerOptions != nil {
port = opts.LlamaServerOptions.Port host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if opts.MlxServerOptions != nil {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
} }
if host == "" { if host == "" {
host = "localhost" host = "localhost"
@@ -343,3 +365,53 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i
return true, maxRestarts, restartDelay return true, maxRestarts, restartDelay
} }
// buildCommand builds the command to execute using backend-specific logic
func (i *Process) buildCommand() (*exec.Cmd, error) {
// Get backend configuration
backendConfig, err := i.getBackendConfig()
if err != nil {
return nil, err
}
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute
command := i.options.GetCommand(backendConfig)
// Build command arguments
args := i.options.BuildCommandArgs(backendConfig)
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}
// getBackendConfig resolves the backend configuration for the current instance
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
var backendTypeStr string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
backendTypeStr = "llama-cpp"
case backends.BackendTypeMlxLm:
backendTypeStr = "mlx"
case backends.BackendTypeVllm:
backendTypeStr = "vllm"
default:
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
return &settings, nil
}

View File

@@ -5,8 +5,11 @@ import (
"fmt" "fmt"
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
"maps"
) )
type CreateInstanceOptions struct { type CreateInstanceOptions struct {
@@ -18,12 +21,18 @@ type CreateInstanceOptions struct {
OnDemandStart *bool `json:"on_demand_start,omitempty"` OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout // Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"` BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"` BackendOptions map[string]any `json:"backend_options,omitempty"`
// LlamaServerOptions contains the options for the llama server Nodes []string `json:"nodes,omitempty"`
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"` LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
} }
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions // UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
@@ -55,6 +64,30 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err) return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
} }
} }
case backends.BackendTypeMlxLm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.MlxServerOptions = &mlx.MlxServerOptions{}
if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default: default:
return fmt.Errorf("unknown backend type: %s", c.BackendType) return fmt.Errorf("unknown backend type: %s", c.BackendType)
} }
@@ -72,19 +105,50 @@ func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
Alias: (*Alias)(c), Alias: (*Alias)(c),
} }
// Convert LlamaServerOptions back to BackendOptions map for JSON // Convert backend-specific options back to BackendOptions map for JSON
if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil { switch c.BackendType {
data, err := json.Marshal(c.LlamaServerOptions) case backends.BackendTypeLlamaCpp:
if err != nil { if c.LlamaServerOptions != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err) data, err := json.Marshal(c.LlamaServerOptions)
} if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil { if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err) return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
} }
aux.BackendOptions = backendOpts aux.BackendOptions = backendOpts
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
data, err := json.Marshal(c.MlxServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
} }
return json.Marshal(aux) return json.Marshal(aux)
@@ -129,13 +193,75 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
} }
} }
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
return "docker"
}
return backendConfig.Command
}
// BuildCommandArgs builds command line arguments for the backend // BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string { func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp: var args []string
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs() if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
// For Docker, start with Docker args
args = append(args, backendConfig.Docker.Args...)
args = append(args, backendConfig.Docker.Image)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
}
}
} else {
// For native execution, start with backend args
args = append(args, backendConfig.Args...)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
}
} }
} }
return []string{}
return args
}
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
} }

View File

@@ -33,6 +33,15 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
// Timeout-related tests // Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) { func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -47,13 +56,22 @@ func TestUpdateLastRequestTime(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic // Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime() inst.UpdateLastRequestTime()
} }
func TestShouldTimeout_NotRunning(t *testing.T) { func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -70,7 +88,7 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration // Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() { if inst.ShouldTimeout() {
@@ -79,6 +97,15 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
} }
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -105,7 +132,7 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
// Simulate running state // Simulate running state
inst.SetStatus(instance.Running) inst.SetStatus(instance.Running)
@@ -117,6 +144,15 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
} }
func TestShouldTimeout_WithinTimeLimit(t *testing.T) { func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -133,7 +169,7 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
inst.SetStatus(instance.Running) inst.SetStatus(instance.Running)
// Update last request time to now // Update last request time to now
@@ -146,6 +182,15 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
} }
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -162,7 +207,7 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
inst.SetStatus(instance.Running) inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time // Use MockTimeProvider to simulate old last request time
@@ -181,6 +226,15 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
} }
func TestTimeoutConfiguration_Validation(t *testing.T) { func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{ globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
} }
@@ -209,7 +263,7 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
// Mock onStatusChange function // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {} mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange) inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, "main", mockOnStatusChange)
opts := inst.GetOptions() opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout { if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {

View File

@@ -6,6 +6,7 @@ import (
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"log" "log"
"net/http"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@@ -25,38 +26,74 @@ type InstanceManager interface {
StopInstance(name string) (*instance.Process, error) StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error) RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error) GetInstanceLogs(name string, numLines int) (string, error)
Shutdown() Shutdown()
} }
type RemoteManager interface {
ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteRemoteInstance(node *config.NodeConfig, name string) error
StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
}
type instanceManager struct { type instanceManager struct {
mu sync.RWMutex mu sync.RWMutex
instances map[string]*instance.Process instances map[string]*instance.Process
runningInstances map[string]struct{} runningInstances map[string]struct{}
ports map[int]bool ports map[int]bool
instancesConfig config.InstancesConfig instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig
localNodeName string // Name of the local node
// Timeout checker // Timeout checker
timeoutChecker *time.Ticker timeoutChecker *time.Ticker
shutdownChan chan struct{} shutdownChan chan struct{}
shutdownDone chan struct{} shutdownDone chan struct{}
isShutdown bool isShutdown bool
// Remote instance management
httpClient *http.Client
instanceNodeMap map[string]*config.NodeConfig // Maps instance name to its node config
nodeConfigMap map[string]*config.NodeConfig // Maps node name to node config for quick lookup
} }
// NewInstanceManager creates a new instance of InstanceManager. // NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager { func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig, localNodeName string) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 { if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
} }
// Build node config map for quick lookup
nodeConfigMap := make(map[string]*config.NodeConfig)
for name := range nodesConfig {
nodeCopy := nodesConfig[name]
nodeConfigMap[name] = &nodeCopy
}
im := &instanceManager{ im := &instanceManager{
instances: make(map[string]*instance.Process), instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}), runningInstances: make(map[string]struct{}),
ports: make(map[int]bool), ports: make(map[int]bool),
instancesConfig: instancesConfig, instancesConfig: instancesConfig,
backendsConfig: backendsConfig,
localNodeName: localNodeName,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}), shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}), shutdownDone: make(chan struct{}),
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
instanceNodeMap: make(map[string]*config.NodeConfig),
nodeConfigMap: nodeConfigMap,
} }
// Load existing instances from disk // Load existing instances from disk
@@ -236,24 +273,44 @@ func (im *instanceManager) loadInstance(name, path string) error {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name) return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
} }
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) { options := persistedInstance.GetOptions()
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
// Check if this is a remote instance
// An instance is remote if Nodes is specified AND the first node is not the local node
isRemote := options != nil && len(options.Nodes) > 0 && options.Nodes[0] != im.localNodeName
var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
if !isRemote {
// Only set status callback for local instances
statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
} }
// Create new inst using NewInstance (handles validation, defaults, setup) // Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback) inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, statusCallback)
// Restore persisted fields that NewInstance doesn't set // Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status) inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps // Handle remote instance mapping
if inst.GetPort() > 0 { if isRemote {
port := inst.GetPort() nodeName := options.Nodes[0]
if im.ports[port] { nodeConfig, exists := im.nodeConfigMap[nodeName]
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port) if !exists {
return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
}
im.instanceNodeMap[name] = nodeConfig
} else {
// Check for port conflicts only for local instances
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
}
im.ports[port] = true
} }
im.ports[port] = true
} }
im.instances[name] = inst im.instances[name] = inst
@@ -261,25 +318,48 @@ func (im *instanceManager) loadInstance(name, path string) error {
} }
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled // autoStartInstances starts instances that were running when persisted and have auto-restart enabled
// For instances with auto-restart disabled, it sets their status to Stopped
func (im *instanceManager) autoStartInstances() { func (im *instanceManager) autoStartInstances() {
im.mu.RLock() im.mu.RLock()
var instancesToStart []*instance.Process var instancesToStart []*instance.Process
var instancesToStop []*instance.Process
for _, inst := range im.instances { for _, inst := range im.instances {
if inst.IsRunning() && // Was running when persisted if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil && inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil && inst.GetOptions().AutoRestart != nil {
*inst.GetOptions().AutoRestart { if *inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst) instancesToStart = append(instancesToStart, inst)
} else {
// Instance was running but auto-restart is disabled, mark as stopped
instancesToStop = append(instancesToStop, inst)
}
} }
} }
im.mu.RUnlock() im.mu.RUnlock()
// Stop instances that have auto-restart disabled
for _, inst := range instancesToStop {
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
inst.SetStatus(instance.Stopped)
}
// Start instances that have auto-restart enabled
for _, inst := range instancesToStart { for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name) log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance) // Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped) inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err) // Check if this is a remote instance
if node := im.getNodeForInstance(inst); node != nil {
// Remote instance - use StartRemoteInstance
if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
}
} else {
// Local instance - call Start() directly
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
} }
} }
} }
@@ -294,3 +374,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
delete(im.runningInstances, name) delete(im.runningInstances, name)
} }
} }
// getNodeForInstance returns the node configuration for a remote instance
// Returns nil if the instance is not remote or the node is not found
func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
if !inst.IsRemote() {
return nil
}
// Check if we have a cached mapping
if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
return nodeConfig
}
return nil
}

View File

@@ -15,18 +15,26 @@ import (
) )
func TestNewInstanceManager(t *testing.T) { func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
MaxInstances: 5, MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
mgr := manager.NewInstanceManager(cfg) mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
if mgr == nil { if mgr == nil {
t.Fatal("NewInstanceManager returned nil") t.Fatal("NewInstanceManager returned nil")
} }
@@ -44,6 +52,15 @@ func TestNewInstanceManager(t *testing.T) {
func TestPersistence(t *testing.T) { func TestPersistence(t *testing.T) {
tempDir := t.TempDir() tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
InstancesDir: tempDir, InstancesDir: tempDir,
@@ -52,7 +69,7 @@ func TestPersistence(t *testing.T) {
} }
// Test instance persistence on creation // Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg) manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp, BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{ LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -73,7 +90,7 @@ func TestPersistence(t *testing.T) {
} }
// Test loading instances from disk // Test loading instances from disk
manager2 := manager.NewInstanceManager(cfg) manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
instances, err := manager2.ListInstances() instances, err := manager2.ListInstances()
if err != nil { if err != nil {
t.Fatalf("ListInstances failed: %v", err) t.Fatalf("ListInstances failed: %v", err)
@@ -172,15 +189,86 @@ func TestShutdown(t *testing.T) {
// Helper function to create a test manager with standard config // Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager { func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test", LogsDir: "/tmp/test",
MaxInstances: 10, MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
return manager.NewInstanceManager(cfg) return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
}
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Create first manager and instance with auto-restart disabled
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
autoRestart := false
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
AutoRestart: &autoRestart,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Simulate instance being in running state when persisted
// (this would happen if the instance was running when llamactl was stopped)
inst.SetStatus(instance.Running)
// Shutdown first manager
manager1.Shutdown()
// Create second manager (simulating restart of llamactl)
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
// Get the loaded instance
loadedInst, err := manager2.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
// The instance should be marked as Stopped, not Running
// because auto-restart is disabled
if loadedInst.IsRunning() {
t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
}
if loadedInst.GetStatus() != instance.Stopped {
t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
}
manager2.Shutdown()
} }

View File

@@ -3,6 +3,7 @@ package manager
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends" "llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/validation" "llamactl/pkg/validation"
"os" "os"
@@ -11,16 +12,65 @@ import (
type MaxRunningInstancesError error type MaxRunningInstancesError error
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
// while preserving the Nodes field to maintain remote instance tracking
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
if localInst == nil || remoteInst == nil {
return
}
// Get the remote instance options
remoteOptions := remoteInst.GetOptions()
if remoteOptions == nil {
return
}
// Preserve the Nodes field from the local instance
localOptions := localInst.GetOptions()
var preservedNodes []string
if localOptions != nil && len(localOptions.Nodes) > 0 {
preservedNodes = make([]string, len(localOptions.Nodes))
copy(preservedNodes, localOptions.Nodes)
}
// Create a copy of remote options and restore the Nodes field
updatedOptions := *remoteOptions
updatedOptions.Nodes = preservedNodes
// Update the local instance with all remote data
localInst.SetOptions(&updatedOptions)
localInst.Status = remoteInst.Status
localInst.Created = remoteInst.Created
}
// ListInstances returns a list of all instances managed by the instance manager. // ListInstances returns a list of all instances managed by the instance manager.
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) { func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
defer im.mu.RUnlock() localInstances := make([]*instance.Process, 0, len(im.instances))
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances { for _, inst := range im.instances {
instances = append(instances, inst) localInstances = append(localInstances, inst)
} }
return instances, nil im.mu.RUnlock()
// Update remote instances with live state
for _, inst := range localInstances {
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.GetRemoteInstance(node, inst.Name)
if err != nil {
// Log error but continue with stale data
// Don't fail the entire list operation due to one remote failure
continue
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
}
}
return localInstances, nil
} }
// CreateInstance creates a new instance with the given options and returns it. // CreateInstance creates a new instance with the given options and returns it.
@@ -43,16 +93,57 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
// Check max instances limit after acquiring the lock // Check if instance with this name already exists (must be globally unique)
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil { if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name) return nil, fmt.Errorf("instance with name %s already exists", name)
} }
// Check if this is a remote instance
// An instance is remote if Nodes is specified AND the first node is not the local node
isRemote := len(options.Nodes) > 0 && options.Nodes[0] != im.localNodeName
var nodeConfig *config.NodeConfig
if isRemote {
// Validate that the node exists
nodeName := options.Nodes[0] // Use first node for now
var exists bool
nodeConfig, exists = im.nodeConfigMap[nodeName]
if !exists {
return nil, fmt.Errorf("node %s not found", nodeName)
}
// Create the remote instance on the remote node
remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
if err != nil {
return nil, err
}
// Create a local stub that preserves the Nodes field for tracking
// We keep the original options (with Nodes) so IsRemote() works correctly
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, nil)
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Add to local tracking maps (but don't count towards limits)
im.instances[name] = inst
im.instanceNodeMap[name] = nodeConfig
// Persist the remote instance locally for tracking across restarts
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
}
return inst, nil
}
// Local instance creation
// Check max instances limit for local instances only
localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Assign and validate port for backend-specific options // Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil { if err := im.assignAndValidatePort(options); err != nil {
return nil, err return nil, err
@@ -62,7 +153,7 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
im.onStatusChange(name, oldStatus, newStatus) im.onStatusChange(name, oldStatus, newStatus)
} }
inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback) inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, im.localNodeName, statusCallback)
im.instances[inst.Name] = inst im.instances[inst.Name] = inst
if err := im.persistInstance(inst); err != nil { if err := im.persistInstance(inst); err != nil {
@@ -73,28 +164,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
} }
// GetInstance retrieves an instance by its name. // GetInstance retrieves an instance by its name.
// For remote instances, this fetches the live state from the remote node and updates the local stub.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) { func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
defer im.mu.RUnlock() inst, exists := im.instances[name]
im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
return instance, nil
// Check if instance is remote and fetch live state
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.GetRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
// Return the local stub (preserving Nodes field)
return inst, nil
}
return inst, nil
} }
// UpdateInstance updates the options of an existing instance and returns it. // UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options. // If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) { func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] inst, exists := im.instances[name]
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.UpdateRemoteInstance(node, name, options)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
// Persist the updated remote instance locally
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
}
return inst, nil
}
if options == nil { if options == nil {
return nil, fmt.Errorf("instance options cannot be nil") return nil, fmt.Errorf("instance options cannot be nil")
} }
@@ -105,55 +236,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
} }
// Check if instance is running before updating options // Check if instance is running before updating options
wasRunning := instance.IsRunning() wasRunning := inst.IsRunning()
// If the instance is running, stop it first // If the instance is running, stop it first
if wasRunning { if wasRunning {
if err := instance.Stop(); err != nil { if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err) return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
} }
} }
// Now update the options while the instance is stopped // Now update the options while the instance is stopped
instance.SetOptions(options) inst.SetOptions(options)
// If it was running before, start it again with the new options // If it was running before, start it again with the new options
if wasRunning { if wasRunning {
if err := instance.Start(); err != nil { if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err) return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
} }
} }
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil { if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
} }
return instance, nil return inst, nil
} }
// DeleteInstance removes stopped instance by its name. // DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error { func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() inst, exists := im.instances[name]
im.mu.Unlock()
instance, exists := im.instances[name]
if !exists { if !exists {
return fmt.Errorf("instance with name %s not found", name) return fmt.Errorf("instance with name %s not found", name)
} }
if instance.IsRunning() { // Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
err := im.DeleteRemoteInstance(node, name)
if err != nil {
return err
}
// Clean up local tracking
im.mu.Lock()
defer im.mu.Unlock()
delete(im.instances, name)
delete(im.instanceNodeMap, name)
// Delete the instance's config file if persistence is enabled
// Re-validate instance name for security (defense in depth)
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
return fmt.Errorf("invalid instance name for file deletion: %w", err)
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
}
return nil
}
if inst.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name) return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
} }
delete(im.ports, instance.GetPort()) im.mu.Lock()
defer im.mu.Unlock()
delete(im.ports, inst.GetPort())
delete(im.instances, name) delete(im.instances, name)
// Delete the instance's config file if persistence is enabled // Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json") // Re-validate instance name for security (defense in depth)
validatedName, err := validation.ValidateInstanceName(inst.Name)
if err != nil {
return fmt.Errorf("invalid instance name for file deletion: %w", err)
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) { if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err) return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
} }
return nil return nil
@@ -163,33 +329,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
// If the instance is already running, it returns an error. // If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) { func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] inst, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name) // Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.StartRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
} }
if inst.IsRunning() {
return inst, fmt.Errorf("instance with name %s is already running", name)
}
// Check max running instances limit for local instances only
im.mu.RLock()
localRunningCount := 0
for instName := range im.runningInstances {
if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
localRunningCount++
}
}
maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
if maxRunningExceeded { if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances)) return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
} }
if err := instance.Start(); err != nil { if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err) return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
} }
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
err := im.persistInstance(instance) err := im.persistInstance(inst)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
} }
return instance, nil return inst, nil
} }
func (im *instanceManager) IsMaxRunningInstancesReached() bool { func (im *instanceManager) IsMaxRunningInstancesReached() bool {
@@ -206,51 +398,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
// StopInstance stops a running instance and returns it. // StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) { func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] inst, exists := im.instances[name]
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name) // Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.StopRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
} }
if err := instance.Stop(); err != nil { if !inst.IsRunning() {
return inst, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err) return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
} }
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock() defer im.mu.Unlock()
err := im.persistInstance(instance) err := im.persistInstance(inst)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
} }
return instance, nil return inst, nil
} }
// RestartInstance stops and then starts an instance, returning the updated instance. // RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) { func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name) im.mu.RLock()
inst, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.RestartRemoteInstance(node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.mu.Lock()
im.updateLocalInstanceFromRemote(inst, remoteInst)
im.mu.Unlock()
return inst, nil
}
inst, err := im.StopInstance(name)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return im.StartInstance(instance.Name) return im.StartInstance(inst.Name)
} }
// GetInstanceLogs retrieves the logs for a specific instance by its name. // GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) { func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
im.mu.RLock() im.mu.RLock()
_, exists := im.instances[name] inst, exists := im.instances[name]
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return "", fmt.Errorf("instance with name %s not found", name) return "", fmt.Errorf("instance with name %s not found", name)
} }
// TODO: Implement actual log retrieval logic // Check if instance is remote and delegate to remote operation
return fmt.Sprintf("Logs for instance %s", name), nil if node := im.getNodeForInstance(inst); node != nil {
return im.GetRemoteInstanceLogs(node, name, numLines)
}
// Get logs from the local instance
return inst.GetLogs(numLines)
} }
// getPortFromOptions extracts the port from backend-specific options // getPortFromOptions extracts the port from backend-specific options
@@ -260,6 +496,14 @@ func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOp
if options.LlamaServerOptions != nil { if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port return options.LlamaServerOptions.Port
} }
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
} }
return 0 return 0
} }
@@ -271,6 +515,14 @@ func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOpti
if options.LlamaServerOptions != nil { if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port options.LlamaServerOptions.Port = port
} }
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
} }
} }

View File

@@ -62,12 +62,20 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
} }
// Test max instances limit // Test max instances limit
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
limitedManager := manager.NewInstanceManager(cfg) limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
_, err = limitedManager.CreateInstance("instance1", options) _, err = limitedManager.CreateInstance("instance1", options)
if err != nil { if err != nil {

222
pkg/manager/remote_ops.go Normal file
View File

@@ -0,0 +1,222 @@
package manager
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"net/http"
)
// makeRemoteRequest is a helper function to make HTTP requests to a remote node
func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
var reqBody io.Reader
if body != nil {
jsonData, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("failed to marshal request body: %w", err)
}
reqBody = bytes.NewBuffer(jsonData)
}
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
req, err := http.NewRequest(method, url, reqBody)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
if nodeConfig.APIKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
}
resp, err := im.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to execute request: %w", err)
}
return resp, nil
}
// parseRemoteResponse is a helper function to parse API responses
func parseRemoteResponse(resp *http.Response, result any) error {
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
if result != nil {
if err := json.Unmarshal(body, result); err != nil {
return fmt.Errorf("failed to unmarshal response: %w", err)
}
}
return nil
}
// ListRemoteInstances lists all instances on the remote node
func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
if err != nil {
return nil, err
}
var instances []*instance.Process
if err := parseRemoteResponse(resp, &instances); err != nil {
return nil, err
}
return instances, nil
}
// CreateRemoteInstance creates a new instance on the remote node
func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// GetRemoteInstance retrieves an instance by name from the remote node
func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// UpdateRemoteInstance updates an existing instance on the remote node
func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// DeleteRemoteInstance deletes an instance from the remote node
func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
path := fmt.Sprintf("/api/v1/instances/%s/", name)
resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
if err != nil {
return err
}
return parseRemoteResponse(resp, nil)
}
// StartRemoteInstance starts an instance on the remote node
func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/start", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// StopRemoteInstance stops an instance on the remote node
func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// RestartRemoteInstance restarts an instance on the remote node
func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Process
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Logs endpoint might return plain text or JSON
// Try to parse as JSON first (in case it's wrapped in a response object)
var logResponse struct {
Logs string `json:"logs"`
}
if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
return logResponse.Logs, nil
}
// Otherwise, return as plain text
return string(body), nil
}

View File

@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {
// Identify instances that should timeout // Identify instances that should timeout
for _, inst := range im.instances { for _, inst := range im.instances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
if inst.ShouldTimeout() { if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name) timeoutInstances = append(timeoutInstances, inst.Name)
} }
@@ -34,12 +39,17 @@ func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock() im.mu.RLock()
var lruInstance *instance.Process var lruInstance *instance.Process
for name, _ := range im.runningInstances { for name := range im.runningInstances {
inst := im.instances[name] inst := im.instances[name]
if inst == nil { if inst == nil {
continue continue
} }
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 { if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout continue // Skip instances without idle timeout
} }

View File

@@ -13,13 +13,17 @@ import (
func TestTimeoutFunctionality(t *testing.T) { func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization // Test timeout checker initialization
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
MLX: config.BackendSettings{Command: "mlx_lm.server"},
}
cfg := config.InstancesConfig{ cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10, TimeoutCheckInterval: 10,
MaxInstances: 5, MaxInstances: 5,
} }
manager := manager.NewInstanceManager(cfg) manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}, "main")
if manager == nil { if manager == nil {
t.Fatal("Manager should be initialized with timeout checker") t.Fatal("Manager should be initialized with timeout checker")
} }

View File

@@ -1,631 +1,29 @@
package server package server
import ( import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager" "llamactl/pkg/manager"
"net/http" "net/http"
"os/exec" "net/http/httputil"
"strconv" "sync"
"strings" "time"
"github.com/go-chi/chi/v5"
) )
type Handler struct { type Handler struct {
InstanceManager manager.InstanceManager InstanceManager manager.InstanceManager
cfg config.AppConfig cfg config.AppConfig
httpClient *http.Client
remoteProxies map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
remoteProxiesMu sync.RWMutex
} }
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler { func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{ return &Handler{
InstanceManager: im, InstanceManager: im,
cfg: cfg, cfg: cfg,
} httpClient: &http.Client{
} Timeout: 30 * time.Second,
},
// VersionHandler godoc remoteProxies: make(map[string]*httputil.ReverseProxy),
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags server
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags server
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags server
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
if lines == "" {
lines = "-1"
}
num_lines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or model name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract model name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Model name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on model name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
} }
} }

View File

@@ -0,0 +1,320 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/instance"
"net/http"
"os/exec"
"strings"
"github.com/go-chi/chi/v5"
)
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Get the instance name from the URL parameter
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
options := inst.GetOptions()
if options == nil {
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
return
}
if options.BackendType != backends.BackendTypeLlamaCpp {
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
return
}
if !inst.IsRunning() {
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(name); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
proxy.ServeHTTP(w, r)
}
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}

View File

@@ -0,0 +1,445 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"net/http/httputil"
"net/url"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
)
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
numLines := -1 // Default to all lines
if lines != "" {
parsedLines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
numLines = parsedLines
}
// Use the instance manager which handles both local and remote instances
logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Check if this is a remote instance
if inst.IsRemote() {
h.RemoteInstanceProxy(w, r, name, inst)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// RemoteInstanceProxy proxies requests to a remote instance
func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
// Get the node name from instance options
options := inst.GetOptions()
if options == nil || len(options.Nodes) == 0 {
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
return
}
nodeName := options.Nodes[0]
// Check if we have a cached proxy for this node
h.remoteProxiesMu.RLock()
proxy, exists := h.remoteProxies[nodeName]
h.remoteProxiesMu.RUnlock()
if !exists {
// Find node configuration
nodeConfig, exists := h.cfg.Nodes[nodeName]
if !exists {
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
return
}
// Create reverse proxy to remote node
targetURL, err := url.Parse(nodeConfig.Address)
if err != nil {
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
return
}
proxy = httputil.NewSingleHostReverseProxy(targetURL)
// Modify request before forwarding
originalDirector := proxy.Director
apiKey := nodeConfig.APIKey // Capture for closure
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key if configured
if apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
}
}
// Cache the proxy by node name
h.remoteProxiesMu.Lock()
h.remoteProxies[nodeName] = proxy
h.remoteProxiesMu.Unlock()
}
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}

View File

@@ -0,0 +1,79 @@
package server
import (
"encoding/json"
"net/http"
"github.com/go-chi/chi/v5"
)
// NodeResponse represents a sanitized node configuration for API responses
type NodeResponse struct {
Address string `json:"address"`
}
// ListNodes godoc
// @Summary List all configured nodes
// @Description Returns a map of all nodes configured in the server (node name -> node config)
// @Tags nodes
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
// @Failure 500 {string} string "Internal Server Error"
// @Router /nodes [get]
func (h *Handler) ListNodes() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Convert to sanitized response format (map of name -> NodeResponse)
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
for name, node := range h.cfg.Nodes {
nodeResponses[name] = NodeResponse{
Address: node.Address,
}
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetNode godoc
// @Summary Get details of a specific node
// @Description Returns the details of a specific node by name
// @Tags nodes
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Node Name"
// @Success 200 {object} NodeResponse "Node details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 404 {string} string "Node not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /nodes/{name} [get]
func (h *Handler) GetNode() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
return
}
nodeConfig, exists := h.cfg.Nodes[name]
if !exists {
http.Error(w, "Node not found", http.StatusNotFound)
return
}
// Convert to sanitized response format
nodeResponse := NodeResponse{
Address: nodeConfig.Address,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
return
}
}
}

View File

@@ -0,0 +1,206 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/instance"
"net/http"
"net/http/httputil"
"net/url"
)
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
return
}
// Check if this is a remote instance
if inst.IsRemote() {
// Restore the body for the remote proxy
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
h.RemoteOpenAIProxy(w, r, modelName, inst)
return
}
if !inst.IsRunning() {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
// Get the node name from instance options
options := inst.GetOptions()
if options == nil || len(options.Nodes) == 0 {
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
return
}
nodeName := options.Nodes[0]
// Check if we have a cached proxy for this node
h.remoteProxiesMu.RLock()
proxy, exists := h.remoteProxies[nodeName]
h.remoteProxiesMu.RUnlock()
if !exists {
// Find node configuration
nodeConfig, exists := h.cfg.Nodes[nodeName]
if !exists {
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
return
}
// Create reverse proxy to remote node
targetURL, err := url.Parse(nodeConfig.Address)
if err != nil {
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
return
}
proxy = httputil.NewSingleHostReverseProxy(targetURL)
// Modify request before forwarding
originalDirector := proxy.Director
apiKey := nodeConfig.APIKey // Capture for closure
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key if configured
if apiKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
}
}
// Cache the proxy
h.remoteProxiesMu.Lock()
h.remoteProxies[nodeName] = proxy
h.remoteProxiesMu.Unlock()
}
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}

View File

@@ -0,0 +1,22 @@
package server
import (
"fmt"
"net/http"
)
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}

View File

@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(cors.Handler(cors.Options{ r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.cfg.Server.AllowedOrigins, AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"}, AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"}, AllowedHeaders: handler.cfg.Server.AllowedHeaders,
ExposedHeaders: []string{"Link"}, ExposedHeaders: []string{"Link"},
AllowCredentials: false, AllowCredentials: false,
MaxAge: 300, MaxAge: 300,
@@ -44,10 +44,29 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Get("/version", handler.VersionHandler()) // Get server version r.Get("/version", handler.VersionHandler()) // Get server version
r.Route("/server", func(r chi.Router) { // Backend-specific endpoints
r.Get("/help", handler.LlamaServerHelpHandler()) r.Route("/backends", func(r chi.Router) {
r.Get("/version", handler.LlamaServerVersionHandler()) r.Route("/llama-cpp", func(r chi.Router) {
r.Get("/devices", handler.LlamaServerListDevicesHandler()) r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.LlamaServerListDevicesHandler())
r.Post("/parse-command", handler.ParseLlamaCommand())
})
r.Route("/mlx", func(r chi.Router) {
r.Post("/parse-command", handler.ParseMlxCommand())
})
r.Route("/vllm", func(r chi.Router) {
r.Post("/parse-command", handler.ParseVllmCommand())
})
})
// Node management endpoints
r.Route("/nodes", func(r chi.Router) {
r.Get("/", handler.ListNodes()) // List all nodes
r.Route("/{name}", func(r chi.Router) {
r.Get("/", handler.GetNode())
})
}) })
// Instance management endpoints // Instance management endpoints
@@ -93,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
}) })
r.Route("/llama-cpp/{name}", func(r chi.Router) {
// Public Routes
// Allow llama-cpp server to serve its own WebUI if it is running.
// Don't auto start the server since it can be accessed without an API key
r.Get("/", handler.LlamaCppProxy(false))
// Private Routes
r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}
// This handler auto start the server if it's not running
llamaCppHandler := handler.LlamaCppProxy(true)
// llama.cpp server specific proxy endpoints
r.Get("/props", llamaCppHandler)
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
r.Get("/slots", llamaCppHandler)
r.Post("/apply-template", llamaCppHandler)
r.Post("/completion", llamaCppHandler)
r.Post("/detokenize", llamaCppHandler)
r.Post("/embeddings", llamaCppHandler)
r.Post("/infill", llamaCppHandler)
r.Post("/metrics", llamaCppHandler)
r.Post("/props", llamaCppHandler)
r.Post("/reranking", llamaCppHandler)
r.Post("/tokenize", llamaCppHandler)
// OpenAI-compatible proxy endpoint
// Handles all POST requests to /v1/*, including:
// - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
// on "model" field being optional, and handler.OpenAIProxy requires it.
r.Post("/v1/*", llamaCppHandler)
})
})
// Serve WebUI files // Serve WebUI files
if err := webui.SetupWebUI(r); err != nil { if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err) fmt.Printf("Failed to set up WebUI: %v\n", err)

View File

@@ -44,6 +44,10 @@ func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
switch options.BackendType { switch options.BackendType {
case backends.BackendTypeLlamaCpp: case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options) return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm:
return validateMlxOptions(options)
case backends.BackendTypeVllm:
return validateVllmOptions(options)
default: default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType)) return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
} }
@@ -68,6 +72,43 @@ func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
return nil return nil
} }
// validateMlxOptions validates MLX backend specific options
func validateMlxOptions(options *instance.CreateInstanceOptions) error {
if options.MlxServerOptions == nil {
return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
}
return nil
}
// validateVllmOptions validates vLLM backend specific options
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
if options.VllmServerOptions == nil {
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct // validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error { func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v) val := reflect.ValueOf(v)

100
webui/package-lock.json generated
View File

@@ -19,6 +19,7 @@
"lucide-react": "^0.525.0", "lucide-react": "^0.525.0",
"react": "^19.1.0", "react": "^19.1.0",
"react-dom": "^19.1.0", "react-dom": "^19.1.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1", "tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.11", "tailwindcss": "^4.1.11",
"zod": "^4.0.5" "zod": "^4.0.5"
@@ -42,7 +43,7 @@
"tw-animate-css": "^1.3.5", "tw-animate-css": "^1.3.5",
"typescript": "^5.8.3", "typescript": "^5.8.3",
"typescript-eslint": "^8.38.0", "typescript-eslint": "^8.38.0",
"vite": "^7.0.5", "vite": "^7.1.5",
"vitest": "^3.2.4" "vitest": "^3.2.4"
} }
}, },
@@ -2109,6 +2110,60 @@
"node": ">=14.0.0" "node": ">=14.0.0"
} }
}, },
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/wasi-threads": "1.0.2",
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
"version": "1.0.2",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
"version": "0.2.11",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/core": "^1.4.3",
"@emnapi/runtime": "^1.4.3",
"@tybys/wasm-util": "^0.9.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
"version": "0.9.0",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
"version": "2.8.0",
"inBundle": true,
"license": "0BSD",
"optional": true
},
"node_modules/@tailwindcss/oxide-win32-arm64-msvc": { "node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
"version": "4.1.11", "version": "4.1.11",
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz", "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",
@@ -4190,10 +4245,13 @@
} }
}, },
"node_modules/fdir": { "node_modules/fdir": {
"version": "6.4.6", "version": "6.5.0",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==", "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
"license": "MIT", "license": "MIT",
"engines": {
"node": ">=12.0.0"
},
"peerDependencies": { "peerDependencies": {
"picomatch": "^3 || ^4" "picomatch": "^3 || ^4"
}, },
@@ -6693,6 +6751,16 @@
"node": ">=18" "node": ">=18"
} }
}, },
"node_modules/sonner": {
"version": "2.0.7",
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
"integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==",
"license": "MIT",
"peerDependencies": {
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
"react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-rc"
}
},
"node_modules/source-map-js": { "node_modules/source-map-js": {
"version": "1.2.1", "version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@@ -6973,13 +7041,13 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/tinyglobby": { "node_modules/tinyglobby": {
"version": "0.2.14", "version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz", "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==", "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"fdir": "^6.4.4", "fdir": "^6.5.0",
"picomatch": "^4.0.2" "picomatch": "^4.0.3"
}, },
"engines": { "engines": {
"node": ">=12.0.0" "node": ">=12.0.0"
@@ -7356,17 +7424,17 @@
} }
}, },
"node_modules/vite": { "node_modules/vite": {
"version": "7.0.5", "version": "7.1.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.0.5.tgz", "resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
"integrity": "sha512-1mncVwJxy2C9ThLwz0+2GKZyEXuC3MyWtAAlNftlZZXZDP3AJt5FmwcMit/IGGaNZ8ZOB2BNO/HFUB+CpN0NQw==", "integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"esbuild": "^0.25.0", "esbuild": "^0.25.0",
"fdir": "^6.4.6", "fdir": "^6.5.0",
"picomatch": "^4.0.2", "picomatch": "^4.0.3",
"postcss": "^8.5.6", "postcss": "^8.5.6",
"rollup": "^4.40.0", "rollup": "^4.43.0",
"tinyglobby": "^0.2.14" "tinyglobby": "^0.2.15"
}, },
"bin": { "bin": {
"vite": "bin/vite.js" "vite": "bin/vite.js"

View File

@@ -28,6 +28,7 @@
"lucide-react": "^0.525.0", "lucide-react": "^0.525.0",
"react": "^19.1.0", "react": "^19.1.0",
"react-dom": "^19.1.0", "react-dom": "^19.1.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1", "tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.11", "tailwindcss": "^4.1.11",
"zod": "^4.0.5" "zod": "^4.0.5"
@@ -51,7 +52,7 @@
"tw-animate-css": "^1.3.5", "tw-animate-css": "^1.3.5",
"typescript": "^5.8.3", "typescript": "^5.8.3",
"typescript-eslint": "^8.38.0", "typescript-eslint": "^8.38.0",
"vite": "^7.0.5", "vite": "^7.1.5",
"vitest": "^3.2.4" "vitest": "^3.2.4"
} }
} }

View File

@@ -8,6 +8,7 @@ import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext"; import { useInstances } from "@/contexts/InstancesContext";
import { useAuth } from "@/contexts/AuthContext"; import { useAuth } from "@/contexts/AuthContext";
import { ThemeProvider } from "@/contexts/ThemeContext"; import { ThemeProvider } from "@/contexts/ThemeContext";
import { Toaster } from "sonner";
function App() { function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth(); const { isAuthenticated, isLoading: authLoading } = useAuth();
@@ -85,6 +86,8 @@ function App() {
open={isSystemInfoModalOpen} open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen} onOpenChange={setIsSystemInfoModalOpen}
/> />
<Toaster />
</div> </div>
</ThemeProvider> </ThemeProvider>
); );

View File

@@ -160,7 +160,7 @@ describe('App Component - Critical Business Logic Only', () => {
expect(screen.getAllByTitle('Start instance').length).toBeGreaterThan(0) expect(screen.getAllByTitle('Start instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('Stop instance').length).toBeGreaterThan(0) expect(screen.getAllByTitle('Stop instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('Edit instance').length).toBe(2) expect(screen.getAllByTitle('Edit instance').length).toBe(2)
expect(screen.getAllByTitle('Delete instance').length).toBeGreaterThan(0) expect(screen.getAllByTitle('More actions').length).toBe(2)
}) })
it('delete confirmation calls correct API', async () => { it('delete confirmation calls correct API', async () => {
@@ -174,8 +174,17 @@ describe('App Component - Critical Business Logic Only', () => {
expect(screen.getByText('test-instance-1')).toBeInTheDocument() expect(screen.getByText('test-instance-1')).toBeInTheDocument()
}) })
const deleteButtons = screen.getAllByTitle('Delete instance') // First click the "More actions" button to reveal the delete button
await user.click(deleteButtons[0]) const moreActionsButtons = screen.getAllByTitle('More actions')
await user.click(moreActionsButtons[0])
// Wait for the delete button to appear and click it
await waitFor(() => {
expect(screen.getByTitle('Delete instance')).toBeInTheDocument()
})
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
// Verify confirmation and API call // Verify confirmation and API call
expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance-1"?') expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance-1"?')

View File

@@ -0,0 +1,65 @@
import React from "react";
import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Server, Package } from "lucide-react";
interface BackendBadgeProps {
backend?: BackendTypeValue;
docker?: boolean;
}
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
if (!backend) {
return null;
}
const getText = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "llama.cpp";
case BackendType.MLX_LM:
return "MLX";
case BackendType.VLLM:
return "vLLM";
default:
return backend;
}
};
const getColorClasses = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "bg-blue-100 text-blue-800 border-blue-200 dark:bg-blue-900 dark:text-blue-200 dark:border-blue-800";
case BackendType.MLX_LM:
return "bg-green-100 text-green-800 border-green-200 dark:bg-green-900 dark:text-green-200 dark:border-green-800";
case BackendType.VLLM:
return "bg-purple-100 text-purple-800 border-purple-200 dark:bg-purple-900 dark:text-purple-200 dark:border-purple-800";
default:
return "bg-gray-100 text-gray-800 border-gray-200 dark:bg-gray-900 dark:text-gray-200 dark:border-gray-800";
}
};
return (
<div className="flex items-center gap-1">
<Badge
variant="outline"
className={`flex items-center gap-1.5 ${getColorClasses()}`}
>
<Server className="h-3 w-3" />
<span className="text-xs">{getText()}</span>
</Badge>
{docker && (
<Badge
variant="outline"
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
title="Docker enabled"
>
<Package className="h-3 w-3" />
<span className="text-[10px] uppercase tracking-wide">Docker</span>
</Badge>
)}
</div>
);
};
export default BackendBadge;

View File

@@ -2,11 +2,10 @@ import React from 'react'
import { Input } from '@/components/ui/input' import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label' import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox' import { Checkbox } from '@/components/ui/checkbox'
import type { BackendOptions } from '@/schemas/instanceOptions'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils' import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface BackendFormFieldProps { interface BackendFormFieldProps {
fieldKey: keyof BackendOptions fieldKey: string
value: string | number | boolean | string[] | undefined value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
} }
@@ -46,7 +45,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}
@@ -73,7 +71,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}
@@ -100,7 +97,6 @@ const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, on
<div className="grid gap-2"> <div className="grid gap-2">
<Label htmlFor={fieldKey}> <Label htmlFor={fieldKey}>
{config.label} {config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label> </Label>
<Input <Input
id={fieldKey} id={fieldKey}

View File

@@ -2,9 +2,10 @@
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Instance } from "@/types/instance"; import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react"; import { Edit, FileText, Play, Square, Trash2, MoreHorizontal } from "lucide-react";
import LogsDialog from "@/components/LogDialog"; import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge"; import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react"; import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth"; import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -24,6 +25,7 @@ function InstanceCard({
editInstance, editInstance,
}: InstanceCardProps) { }: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false); const [isLogsOpen, setIsLogsOpen] = useState(false);
const [showAllActions, setShowAllActions] = useState(false);
const health = useInstanceHealth(instance.name, instance.status); const health = useInstanceHealth(instance.name, instance.status);
const handleStart = () => { const handleStart = () => {
@@ -54,36 +56,44 @@ function InstanceCard({
return ( return (
<> <>
<Card> <Card className="hover:shadow-md transition-shadow">
<CardHeader className="pb-3"> <CardHeader className="pb-4">
<div className="flex items-center justify-between"> {/* Header with instance name and status badges */}
<CardTitle className="text-lg">{instance.name}</CardTitle> <div className="space-y-3">
{running && <HealthBadge health={health} />} <CardTitle className="text-lg font-semibold leading-tight break-words">
{instance.name}
</CardTitle>
{/* Badges row */}
<div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
{running && <HealthBadge health={health} />}
</div>
</div> </div>
</CardHeader> </CardHeader>
<CardContent> <CardContent className="pt-0">
<div className="flex gap-1"> {/* Primary actions - always visible */}
<div className="flex items-center gap-2 mb-3">
<Button <Button
size="sm" size="sm"
variant="outline" variant={running ? "outline" : "default"}
onClick={handleStart} onClick={running ? handleStop : handleStart}
disabled={running} className="flex-1"
title="Start instance" title={running ? "Stop instance" : "Start instance"}
data-testid="start-instance-button" data-testid={running ? "stop-instance-button" : "start-instance-button"}
> >
<Play className="h-4 w-4" /> {running ? (
</Button> <>
<Square className="h-4 w-4 mr-1" />
<Button Stop
size="sm" </>
variant="outline" ) : (
onClick={handleStop} <>
disabled={!running} <Play className="h-4 w-4 mr-1" />
title="Stop instance" Start
data-testid="stop-instance-button" </>
> )}
<Square className="h-4 w-4" />
</Button> </Button>
<Button <Button
@@ -99,24 +109,40 @@ function InstanceCard({
<Button <Button
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleLogs} onClick={() => setShowAllActions(!showAllActions)}
title="View logs" title="More actions"
data-testid="view-logs-button"
> >
<FileText className="h-4 w-4" /> <MoreHorizontal className="h-4 w-4" />
</Button>
<Button
size="sm"
variant="destructive"
onClick={handleDelete}
disabled={running}
title="Delete instance"
data-testid="delete-instance-button"
>
<Trash2 className="h-4 w-4" />
</Button> </Button>
</div> </div>
{/* Secondary actions - collapsible */}
{showAllActions && (
<div className="flex items-center gap-2 pt-2 border-t border-border">
<Button
size="sm"
variant="outline"
onClick={handleLogs}
title="View logs"
data-testid="view-logs-button"
className="flex-1"
>
<FileText className="h-4 w-4 mr-1" />
Logs
</Button>
<Button
size="sm"
variant="destructive"
onClick={handleDelete}
disabled={running}
title="Delete instance"
data-testid="delete-instance-button"
>
<Trash2 className="h-4 w-4" />
</Button>
</div>
)}
</CardContent> </CardContent>
</Card> </Card>

View File

@@ -1,7 +1,5 @@
import React, { useState, useEffect } from "react"; import React, { useState, useEffect } from "react";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { import {
Dialog, Dialog,
DialogContent, DialogContent,
@@ -11,10 +9,9 @@ import {
DialogTitle, DialogTitle,
} from "@/components/ui/dialog"; } from "@/components/ui/dialog";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance"; import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils"; import ParseCommandDialog from "@/components/ParseCommandDialog";
import { ChevronDown, ChevronRight } from "lucide-react"; import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard";
import ZodFormField from "@/components/ZodFormField"; import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard";
import BackendFormField from "@/components/BackendFormField";
interface InstanceDialogProps { interface InstanceDialogProps {
open: boolean; open: boolean;
@@ -33,14 +30,9 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
const [instanceName, setInstanceName] = useState(""); const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({}); const [formData, setFormData] = useState<CreateInstanceOptions>({});
const [showAdvanced, setShowAdvanced] = useState(false);
const [nameError, setNameError] = useState(""); const [nameError, setNameError] = useState("");
const [showParseDialog, setShowParseDialog] = useState(false);
// Get field lists dynamically from the type
const basicFields = getBasicFields();
const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields();
const advancedBackendFields = getAdvancedBackendFields();
// Reset form when dialog opens/closes or when instance changes // Reset form when dialog opens/closes or when instance changes
useEffect(() => { useEffect(() => {
@@ -58,16 +50,26 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
backend_options: {}, backend_options: {},
}); });
} }
setShowAdvanced(false); // Always start with basic view
setNameError(""); // Reset any name errors setNameError(""); // Reset any name errors
} }
}, [open, instance]); }, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => { const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
setFormData((prev) => ({ setFormData((prev) => {
...prev, // If backend_type is changing, clear backend_options
[key]: value, if (key === 'backend_type' && prev.backend_type !== value) {
})); return {
...prev,
[key]: value,
backend_options: {}, // Clear backend options when backend type changes
};
}
return {
...prev,
[key]: value,
};
});
}; };
const handleBackendFieldChange = (key: string, value: any) => { const handleBackendFieldChange = (key: string, value: any) => {
@@ -76,7 +78,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
backend_options: { backend_options: {
...prev.backend_options, ...prev.backend_options,
[key]: value, [key]: value,
}, } as any,
})); }));
}; };
@@ -104,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields // Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {}; const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => { Object.entries(formData).forEach(([key, value]) => {
if (key === 'backend_options' && value && typeof value === 'object') { if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
// Handle backend_options specially - clean nested object // Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {}; const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => { Object.entries(value).forEach(([backendKey, backendValue]) => {
@@ -116,13 +118,17 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
cleanBackendOptions[backendKey] = backendValue; cleanBackendOptions[backendKey] = backendValue;
} }
}); });
// Only include backend_options if it has content // Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) { if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions; (cleanOptions as any)[key] = cleanBackendOptions;
} }
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) { } else if (value !== undefined && value !== null) {
// Handle arrays - don't include empty arrays // Skip empty strings
if (typeof value === 'string' && value.trim() === "") {
return;
}
// Skip empty arrays
if (Array.isArray(value) && value.length === 0) { if (Array.isArray(value) && value.length === 0) {
return; return;
} }
@@ -138,12 +144,15 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
onOpenChange(false); onOpenChange(false);
}; };
const toggleAdvanced = () => {
setShowAdvanced(!showAdvanced); const handleCommandParsed = (parsedOptions: CreateInstanceOptions) => {
setFormData(prev => ({
...prev,
...parsedOptions,
}));
setShowParseDialog(false);
}; };
// Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic // Save button label logic
let saveButtonLabel = "Create Instance"; let saveButtonLabel = "Create Instance";
@@ -170,168 +179,25 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</DialogHeader> </DialogHeader>
<div className="flex-1 overflow-y-auto"> <div className="flex-1 overflow-y-auto">
<div className="grid gap-6 py-4"> <div className="space-y-6 py-4">
{/* Instance Name - Special handling since it's not in CreateInstanceOptions */} {/* Instance Settings Card */}
<div className="grid gap-2"> <InstanceSettingsCard
<Label htmlFor="name"> instanceName={instanceName}
Instance Name <span className="text-red-500">*</span> nameError={nameError}
</Label> isEditing={isEditing}
<Input formData={formData}
id="name" onNameChange={handleNameChange}
value={instanceName} onChange={handleFieldChange}
onChange={(e) => handleNameChange(e.target.value)} />
placeholder="my-instance"
disabled={isEditing} // Don't allow name changes when editing
className={nameError ? "border-red-500" : ""}
/>
{nameError && <p className="text-sm text-red-500">{nameError}</p>}
<p className="text-sm text-muted-foreground">
Unique identifier for the instance
</p>
</div>
{/* Auto Restart Configuration Section */} {/* Backend Configuration Card */}
<div className="space-y-4"> <BackendConfigurationCard
<h3 className="text-lg font-medium"> formData={formData}
Auto Restart Configuration onBackendFieldChange={handleBackendFieldChange}
</h3> onChange={handleFieldChange}
onParseCommand={() => setShowParseDialog(true)}
/>
{/* Auto Restart Toggle */}
<ZodFormField
fieldKey="auto_restart"
value={formData.auto_restart}
onChange={handleFieldChange}
/>
{/* Show restart options only when auto restart is enabled */}
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<ZodFormField
fieldKey="max_restarts"
value={formData.max_restarts}
onChange={handleFieldChange}
/>
<ZodFormField
fieldKey="restart_delay"
value={formData.restart_delay}
onChange={handleFieldChange}
/>
</div>
)}
</div>
{/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{basicFields
.filter(
(fieldKey) =>
fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay" &&
fieldKey !== "backend_options" // backend_options is handled separately
)
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
{/* Backend Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
{/* Advanced Fields Toggle */}
<div className="border-t pt-4">
<Button
variant="ghost"
onClick={toggleAdvanced}
className="flex items-center gap-2 p-0 h-auto font-medium"
>
{showAdvanced ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
Advanced Configuration
<span className="text-muted-foreground text-sm font-normal">
(
{
advancedFields.filter(
(f) =>
!["max_restarts", "restart_delay", "backend_options"].includes(f as string)
).length + advancedBackendFields.length
}{" "}
options)
</span>
</Button>
</div>
{/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
{/* Advanced instance fields */}
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
).length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Instance Configuration</h4>
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string
)
)
.sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
)}
{/* Advanced backend fields */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
)}
</div>
)}
</div> </div>
</div> </div>
@@ -352,6 +218,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</Button> </Button>
</DialogFooter> </DialogFooter>
</DialogContent> </DialogContent>
<ParseCommandDialog
open={showParseDialog}
onOpenChange={setShowParseDialog}
onParsed={handleCommandParsed}
backendType={formData.backend_type || BackendType.LLAMA_CPP}
/>
</Dialog> </Dialog>
); );
}; };

View File

@@ -0,0 +1,151 @@
import React, { useState } from "react";
import { Button } from "@/components/ui/button";
import { Label } from "@/components/ui/label";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
import { backendsApi } from "@/lib/api";
import { toast } from "sonner";
interface ParseCommandDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onParsed: (options: CreateInstanceOptions) => void;
backendType: BackendTypeValue;
}
const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
open,
onOpenChange,
onParsed,
backendType,
}) => {
const [command, setCommand] = useState('');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const handleParse = async () => {
if (!command.trim()) {
setError("Command cannot be empty");
return;
}
setLoading(true);
setError(null);
try {
let options: CreateInstanceOptions;
// Parse based on selected backend type
switch (backendType) {
case BackendType.LLAMA_CPP:
options = await backendsApi.llamaCpp.parseCommand(command);
break;
case BackendType.MLX_LM:
options = await backendsApi.mlx.parseCommand(command);
break;
case BackendType.VLLM:
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
}
onParsed(options);
onOpenChange(false);
setCommand('');
setError(null);
toast.success('Command parsed successfully');
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
setError(errorMessage);
toast.error('Failed to parse command', {
description: errorMessage
});
} finally {
setLoading(false);
}
};
const handleOpenChange = (open: boolean) => {
if (!open) {
setCommand('');
setError(null);
}
onOpenChange(open);
};
const backendPlaceholders: Record<BackendTypeValue, string> = {
[BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
[BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
[BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
};
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
return backendPlaceholders[backendType] || "Enter your command here...";
};
return (
<Dialog open={open} onOpenChange={handleOpenChange}>
<DialogContent className="sm:max-w-[600px]">
<DialogHeader>
<DialogTitle>Parse Backend Command</DialogTitle>
<DialogDescription>
Select your backend type and paste the command to automatically populate the form fields
</DialogDescription>
</DialogHeader>
<div className="space-y-4">
<div>
<Label className="text-sm font-medium">Backend Type:
<span className="font-normal text-muted-foreground">
{backendType === BackendType.LLAMA_CPP && 'Llama Server'}
{backendType === BackendType.MLX_LM && 'MLX LM'}
{backendType === BackendType.VLLM && 'vLLM'}
</span>
</Label>
</div>
<div>
<Label htmlFor="command">Command</Label>
<textarea
id="command"
value={command}
onChange={(e) => setCommand(e.target.value)}
placeholder={getPlaceholderForBackend(backendType)}
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
/>
</div>
{error && (
<div className="text-destructive text-sm bg-destructive/10 p-3 rounded-md">
{error}
</div>
)}
</div>
<DialogFooter>
<Button variant="outline" onClick={() => handleOpenChange(false)}>
Cancel
</Button>
<Button
onClick={() => {
handleParse().catch(console.error);
}}
disabled={!command.trim() || loading}
>
{loading ? 'Parsing...' : 'Parse Command'}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};
export default ParseCommandDialog;

View File

@@ -8,16 +8,19 @@ import {
DialogHeader, DialogHeader,
DialogTitle, DialogTitle,
} from '@/components/ui/dialog' } from '@/components/ui/dialog'
import { import SelectInput from '@/components/form/SelectInput'
RefreshCw, import {
RefreshCw,
AlertCircle, AlertCircle,
Loader2, Loader2,
ChevronDown, ChevronDown,
ChevronRight, ChevronRight,
Monitor, Monitor,
HelpCircle HelpCircle,
Info
} from 'lucide-react' } from 'lucide-react'
import { serverApi } from '@/lib/api' import { serverApi } from '@/lib/api'
import { BackendType, type BackendTypeValue } from '@/types/instance'
// Helper to get version from environment // Helper to get version from environment
const getAppVersion = (): string => { const getAppVersion = (): string => {
@@ -28,166 +31,234 @@ const getAppVersion = (): string => {
} }
} }
interface SystemInfoModalProps { interface SystemInfoDialogProps {
open: boolean open: boolean
onOpenChange: (open: boolean) => void onOpenChange: (open: boolean) => void
} }
interface SystemInfo { interface BackendInfo {
version: string version: string
devices: string devices: string
help: string help: string
} }
const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({ const BACKEND_OPTIONS = [
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' },
]
const SystemInfoDialog: React.FC<SystemInfoDialogProps> = ({
open, open,
onOpenChange onOpenChange
}) => { }) => {
const [systemInfo, setSystemInfo] = useState<SystemInfo | null>(null) const [selectedBackend, setSelectedBackend] = useState<BackendTypeValue>(BackendType.LLAMA_CPP)
const [backendInfo, setBackendInfo] = useState<BackendInfo | null>(null)
const [loading, setLoading] = useState(false) const [loading, setLoading] = useState(false)
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
const [showHelp, setShowHelp] = useState(false) const [showHelp, setShowHelp] = useState(false)
// Fetch system info // Fetch backend info
const fetchSystemInfo = async () => { const fetchBackendInfo = async (backend: BackendTypeValue) => {
if (backend !== BackendType.LLAMA_CPP) {
setBackendInfo(null)
setError(null)
return
}
setLoading(true) setLoading(true)
setError(null) setError(null)
try { try {
const [version, devices, help] = await Promise.all([ const [version, devices, help] = await Promise.all([
serverApi.getVersion(), serverApi.getVersion(),
serverApi.getDevices(), serverApi.getDevices(),
serverApi.getHelp() serverApi.getHelp()
]) ])
setSystemInfo({ version, devices, help }) setBackendInfo({ version, devices, help })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch system info') setError(err instanceof Error ? err.message : 'Failed to fetch backend info')
} finally { } finally {
setLoading(false) setLoading(false)
} }
} }
// Load data when dialog opens // Load data when dialog opens or backend changes
useEffect(() => { useEffect(() => {
if (open) { if (open) {
fetchSystemInfo() void fetchBackendInfo(selectedBackend)
} }
}, [open]) }, [open, selectedBackend])
const handleBackendChange = (value: string) => {
setSelectedBackend(value as BackendTypeValue)
setShowHelp(false) // Reset help section when switching backends
}
const renderBackendSpecificContent = () => {
if (selectedBackend !== BackendType.LLAMA_CPP) {
return (
<div className="flex items-center justify-center py-8">
<div className="text-center space-y-3">
<Info className="h-8 w-8 text-gray-400 mx-auto" />
<div>
<h3 className="font-semibold text-gray-700">Backend Info Not Available</h3>
<p className="text-sm text-gray-500 mt-1">
Information for {BACKEND_OPTIONS.find(b => b.value === selectedBackend)?.label} backend is not yet implemented.
</p>
</div>
</div>
</div>
)
}
if (loading && !backendInfo) {
return (
<div className="flex items-center justify-center py-8">
<Loader2 className="h-6 w-6 animate-spin text-gray-400" />
<span className="ml-2 text-gray-400">Loading backend information...</span>
</div>
)
}
if (error) {
return (
<div className="flex items-center gap-2 p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
)
}
if (!backendInfo) {
return null
}
return (
<div className="space-y-6">
{/* Backend Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">
{BACKEND_OPTIONS.find(b => b.value === selectedBackend)?.label} Version
</h3>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --version</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{backendInfo.version}
</pre>
</div>
</div>
{/* Devices Section */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<h3 className="font-semibold">Available Devices</h3>
</div>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --list-devices</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{backendInfo.devices}
</pre>
</div>
</div>
{/* Help Section */}
<div className="space-y-3">
<Button
variant="ghost"
onClick={() => setShowHelp(!showHelp)}
className="flex items-center gap-2 p-0 h-auto font-semibold"
>
{showHelp ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
<HelpCircle className="h-4 w-4" />
Command Line Options
</Button>
{showHelp && (
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --help</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono max-h-64 overflow-y-auto">
{backendInfo.help}
</pre>
</div>
)}
</div>
</div>
)
}
return ( return (
<Dialog open={open} onOpenChange={onOpenChange} > <Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-4xl max-w-[calc(100%-2rem)] max-h-[80vh] flex flex-col"> <DialogContent className="sm:max-w-4xl max-w-[calc(100%-2rem)] max-h-[80vh] flex flex-col">
<DialogHeader> <DialogHeader>
<div className="flex items-center justify-between"> <DialogTitle className="flex items-center gap-2">
<div> <Monitor className="h-5 w-5" />
<DialogTitle className="flex items-center gap-2"> System Information
<Monitor className="h-5 w-5" /> </DialogTitle>
System Information <DialogDescription>
</DialogTitle> View system and backend-specific environment and capabilities
<DialogDescription> </DialogDescription>
Llama.cpp server environment and capabilities
</DialogDescription>
</div>
<Button
variant="outline"
size="sm"
onClick={fetchSystemInfo}
disabled={loading}
>
{loading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<RefreshCw className="h-4 w-4" />
)}
</Button>
</div>
</DialogHeader> </DialogHeader>
<div className="flex-1 overflow-y-auto"> <div className="flex-1 overflow-y-auto">
{loading && !systemInfo ? ( <div className="space-y-6">
<div className="flex items-center justify-center py-12"> {/* Llamactl Version Section - Always shown */}
<Loader2 className="h-6 w-6 animate-spin text-gray-400" /> <div className="space-y-3">
<span className="ml-2 text-gray-400">Loading system information...</span> <h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div> </div>
) : error ? (
<div className="flex items-center gap-2 p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
) : systemInfo ? (
<div className="space-y-6">
{/* Llamactl Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
{/* Llama Server Version Section */} {/* Backend Selection Section */}
<div className="space-y-3"> <div className="space-y-3">
<h3 className="font-semibold">Llama Server Version</h3> <h3 className="font-semibold">Backend Information</h3>
<div className="flex items-center gap-3">
<div className="bg-gray-900 rounded-lg p-4"> <div className="flex-1">
<div className="mb-2"> <SelectInput
<span className="text-sm text-gray-400">$ llama-server --version</span> id="backend-select"
</div> label=""
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono"> value={selectedBackend}
{systemInfo.version} onChange={(value) => handleBackendChange(value || BackendType.LLAMA_CPP)}
</pre> options={BACKEND_OPTIONS}
className="text-sm"
/>
</div> </div>
</div> {selectedBackend === BackendType.LLAMA_CPP && (
<Button
{/* Devices Section */} variant="outline"
<div className="space-y-3"> size="sm"
<div className="flex items-center gap-2"> onClick={() => void fetchBackendInfo(selectedBackend)}
<h3 className="font-semibold">Available Devices</h3> disabled={loading}
</div> >
{loading ? (
<div className="bg-gray-900 rounded-lg p-4"> <Loader2 className="h-4 w-4 animate-spin" />
<div className="mb-2"> ) : (
<span className="text-sm text-gray-400">$ llama-server --list-devices</span> <RefreshCw className="h-4 w-4" />
</div> )}
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono"> </Button>
{systemInfo.devices}
</pre>
</div>
</div>
{/* Help Section */}
<div className="space-y-3">
<Button
variant="ghost"
onClick={() => setShowHelp(!showHelp)}
className="flex items-center gap-2 p-0 h-auto font-semibold"
>
{showHelp ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
<HelpCircle className="h-4 w-4" />
Command Line Options
</Button>
{showHelp && (
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --help</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono max-h-64 overflow-y-auto">
{systemInfo.help}
</pre>
</div>
)} )}
</div> </div>
</div> </div>
) : null}
{/* Backend-specific content */}
{renderBackendSpecificContent()}
</div>
</div> </div>
<DialogFooter> <DialogFooter>

View File

@@ -1,148 +0,0 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue)
}
const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
{/* Add more backend types here as they become available */}
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default ZodFormField

View File

@@ -102,7 +102,7 @@ afterEach(() => {
it('opens logs dialog when logs button clicked', async () => { it('opens logs dialog when logs button clicked', async () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceCard <InstanceCard
instance={stoppedInstance} instance={stoppedInstance}
@@ -113,9 +113,13 @@ afterEach(() => {
/> />
) )
// First click "More actions" to reveal the logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const logsButton = screen.getByTitle('View logs') const logsButton = screen.getByTitle('View logs')
await user.click(logsButton) await user.click(logsButton)
// Should open logs dialog (we can verify this by checking if dialog title appears) // Should open logs dialog (we can verify this by checking if dialog title appears)
expect(screen.getByText(`Logs: ${stoppedInstance.name}`)).toBeInTheDocument() expect(screen.getByText(`Logs: ${stoppedInstance.name}`)).toBeInTheDocument()
}) })
@@ -125,7 +129,7 @@ afterEach(() => {
it('shows confirmation dialog and calls deleteInstance when confirmed', async () => { it('shows confirmation dialog and calls deleteInstance when confirmed', async () => {
const user = userEvent.setup() const user = userEvent.setup()
const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(true) const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(true)
render( render(
<InstanceCard <InstanceCard
instance={stoppedInstance} instance={stoppedInstance}
@@ -136,19 +140,23 @@ afterEach(() => {
/> />
) )
// First click "More actions" to reveal the delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const deleteButton = screen.getByTitle('Delete instance') const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton) await user.click(deleteButton)
expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance"?') expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance"?')
expect(mockDeleteInstance).toHaveBeenCalledWith('test-instance') expect(mockDeleteInstance).toHaveBeenCalledWith('test-instance')
confirmSpy.mockRestore() confirmSpy.mockRestore()
}) })
it('does not call deleteInstance when confirmation cancelled', async () => { it('does not call deleteInstance when confirmation cancelled', async () => {
const user = userEvent.setup() const user = userEvent.setup()
const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(false) const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(false)
render( render(
<InstanceCard <InstanceCard
instance={stoppedInstance} instance={stoppedInstance}
@@ -159,18 +167,24 @@ afterEach(() => {
/> />
) )
// First click "More actions" to reveal the delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const deleteButton = screen.getByTitle('Delete instance') const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton) await user.click(deleteButton)
expect(confirmSpy).toHaveBeenCalled() expect(confirmSpy).toHaveBeenCalled()
expect(mockDeleteInstance).not.toHaveBeenCalled() expect(mockDeleteInstance).not.toHaveBeenCalled()
confirmSpy.mockRestore() confirmSpy.mockRestore()
}) })
}) })
describe('Button State Based on Instance Status', () => { describe('Button State Based on Instance Status', () => {
it('disables start button and enables stop button for running instance', () => { it('disables start button and enables stop button for running instance', async () => {
const user = userEvent.setup()
render( render(
<InstanceCard <InstanceCard
instance={runningInstance} instance={runningInstance}
@@ -181,12 +195,19 @@ afterEach(() => {
/> />
) )
expect(screen.getByTitle('Start instance')).toBeDisabled() expect(screen.queryByTitle('Start instance')).not.toBeInTheDocument()
expect(screen.getByTitle('Stop instance')).not.toBeDisabled() expect(screen.getByTitle('Stop instance')).not.toBeDisabled()
// Expand more actions to access delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('Delete instance')).toBeDisabled() // Can't delete running instance expect(screen.getByTitle('Delete instance')).toBeDisabled() // Can't delete running instance
}) })
it('enables start button and disables stop button for stopped instance', () => { it('enables start button and disables stop button for stopped instance', async () => {
const user = userEvent.setup()
render( render(
<InstanceCard <InstanceCard
instance={stoppedInstance} instance={stoppedInstance}
@@ -198,11 +219,18 @@ afterEach(() => {
) )
expect(screen.getByTitle('Start instance')).not.toBeDisabled() expect(screen.getByTitle('Start instance')).not.toBeDisabled()
expect(screen.getByTitle('Stop instance')).toBeDisabled() expect(screen.queryByTitle('Stop instance')).not.toBeInTheDocument()
// Expand more actions to access delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('Delete instance')).not.toBeDisabled() // Can delete stopped instance expect(screen.getByTitle('Delete instance')).not.toBeDisabled() // Can delete stopped instance
}) })
it('edit and logs buttons are always enabled', () => { it('edit and logs buttons are always enabled', async () => {
const user = userEvent.setup()
render( render(
<InstanceCard <InstanceCard
instance={runningInstance} instance={runningInstance}
@@ -214,6 +242,11 @@ afterEach(() => {
) )
expect(screen.getByTitle('Edit instance')).not.toBeDisabled() expect(screen.getByTitle('Edit instance')).not.toBeDisabled()
// Expand more actions to access logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('View logs')).not.toBeDisabled() expect(screen.getByTitle('View logs')).not.toBeDisabled()
}) })
}) })
@@ -268,7 +301,7 @@ afterEach(() => {
describe('Integration with LogsModal', () => { describe('Integration with LogsModal', () => {
it('passes correct props to LogsModal', async () => { it('passes correct props to LogsModal', async () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceCard <InstanceCard
instance={runningInstance} instance={runningInstance}
@@ -279,20 +312,24 @@ afterEach(() => {
/> />
) )
// First click "More actions" to reveal the logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
// Open logs dialog // Open logs dialog
await user.click(screen.getByTitle('View logs')) await user.click(screen.getByTitle('View logs'))
// Verify dialog opened with correct instance data // Verify dialog opened with correct instance data
expect(screen.getByText('Logs: running-instance')).toBeInTheDocument() expect(screen.getByText('Logs: running-instance')).toBeInTheDocument()
// Close dialog to test close functionality // Close dialog to test close functionality
const closeButtons = screen.getAllByText('Close') const closeButtons = screen.getAllByText('Close')
const dialogCloseButton = closeButtons.find(button => const dialogCloseButton = closeButtons.find(button =>
button.closest('[data-slot="dialog-content"]') button.closest('[data-slot="dialog-content"]')
) )
expect(dialogCloseButton).toBeTruthy() expect(dialogCloseButton).toBeTruthy()
await user.click(dialogCloseButton!) await user.click(dialogCloseButton!)
// Modal should close // Modal should close
expect(screen.queryByText('Logs: running-instance')).not.toBeInTheDocument() expect(screen.queryByText('Logs: running-instance')).not.toBeInTheDocument()
}) })

View File

@@ -280,29 +280,6 @@ afterEach(() => {
}) })
}) })
describe('Advanced Fields Toggle', () => {
it('shows advanced fields when toggle clicked', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
onOpenChange={mockOnOpenChange}
onSave={mockOnSave}
/>
)
// Advanced fields should be hidden initially
expect(screen.queryByText(/Advanced Configuration/)).toBeInTheDocument()
// Click to expand
await user.click(screen.getByText(/Advanced Configuration/))
// Should show more configuration options
// Note: Specific fields depend on zodFormUtils configuration
// We're testing the toggle behavior, not specific fields
})
})
describe('Form Data Handling', () => { describe('Form Data Handling', () => {
it('cleans up undefined values before submission', async () => { it('cleans up undefined values before submission', async () => {

View File

@@ -0,0 +1,62 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface ArrayInputProps {
id: string
label: string
value: string[] | undefined
onChange: (value: string[] | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const ArrayInput: React.FC<ArrayInputProps> = ({
id,
label,
value,
onChange,
placeholder = "item1, item2, item3",
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const arrayValue = inputValue
.split(',')
.map(s => s.trim())
.filter(Boolean)
onChange(arrayValue.length > 0 ? arrayValue : undefined)
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
}
export default ArrayInput

View File

@@ -0,0 +1,42 @@
import React from 'react'
import { Checkbox } from '@/components/ui/checkbox'
import { Label } from '@/components/ui/label'
interface CheckboxInputProps {
id: string
label: string
value: boolean | undefined
onChange: (value: boolean) => void
description?: string
disabled?: boolean
className?: string
}
const CheckboxInput: React.FC<CheckboxInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
return (
<div className={`flex items-center space-x-2 ${className || ''}`}>
<Checkbox
id={id}
checked={value === true}
onCheckedChange={(checked) => onChange(!!checked)}
disabled={disabled}
/>
<Label htmlFor={id} className="text-sm font-normal">
{label}
{description && (
<span className="text-muted-foreground ml-1">- {description}</span>
)}
</Label>
</div>
)
}
export default CheckboxInput

View File

@@ -0,0 +1,144 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -0,0 +1,60 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface NumberInputProps {
id: string
label: string
value: number | undefined
onChange: (value: number | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const NumberInput: React.FC<NumberInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const numValue = parseFloat(inputValue)
if (!isNaN(numValue)) {
onChange(numValue)
}
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="number"
step="any"
value={value !== undefined ? value : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default NumberInput

View File

@@ -0,0 +1,55 @@
import React from 'react'
import { Label } from '@/components/ui/label'
interface SelectOption {
value: string
label: string
}
interface SelectInputProps {
id: string
label: string
value: string | undefined
onChange: (value: string | undefined) => void
options: SelectOption[]
description?: string
disabled?: boolean
className?: string
}
const SelectInput: React.FC<SelectInputProps> = ({
id,
label,
value,
onChange,
options,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<select
id={id}
value={value || ''}
onChange={(e) => onChange(e.target.value || undefined)}
disabled={disabled}
className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
>
{options.map(option => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))}
</select>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default SelectInput

View File

@@ -0,0 +1,47 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface TextInputProps {
id: string
label: string
value: string | number | undefined
onChange: (value: string | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const TextInput: React.FC<TextInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => onChange(e.target.value || undefined)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default TextInput

View File

@@ -0,0 +1,53 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import CheckboxInput from '@/components/form/CheckboxInput'
import NumberInput from '@/components/form/NumberInput'
interface AutoRestartConfigurationProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
formData,
onChange
}) => {
const isAutoRestartEnabled = formData.auto_restart === true
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Auto Restart Configuration</h3>
<CheckboxInput
id="auto_restart"
label="Auto Restart"
value={formData.auto_restart}
onChange={(value) => onChange('auto_restart', value)}
description="Automatically restart the instance on failure"
/>
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<NumberInput
id="max_restarts"
label="Max Restarts"
value={formData.max_restarts}
onChange={(value) => onChange('max_restarts', value)}
placeholder="3"
description="Maximum number of restart attempts (0 = unlimited)"
/>
<NumberInput
id="restart_delay"
label="Restart Delay (seconds)"
value={formData.restart_delay}
onChange={(value) => onChange('restart_delay', value)}
placeholder="5"
description="Delay in seconds before attempting restart"
/>
</div>
)}
</div>
)
}
export default AutoRestartConfiguration

View File

@@ -0,0 +1,54 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
interface BackendConfigurationProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: any) => void
showAdvanced?: boolean
}
const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
formData,
onBackendFieldChange,
showAdvanced = false
}) => {
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
{/* Advanced backend fields */}
{showAdvanced && advancedBackendFields.length > 0 && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)
}
export default BackendConfiguration

View File

@@ -0,0 +1,117 @@
import React, { useState } from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Terminal, ChevronDown, ChevronRight } from 'lucide-react'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
import SelectInput from '@/components/form/SelectInput'
interface BackendConfigurationCardProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: unknown) => void
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
onParseCommand: () => void
}
const BackendConfigurationCard: React.FC<BackendConfigurationCardProps> = ({
formData,
onBackendFieldChange,
onChange,
onParseCommand
}) => {
const [showAdvanced, setShowAdvanced] = useState(false)
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<Card>
<CardHeader>
<CardTitle>Backend Configuration</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Backend Type Selection */}
<SelectInput
id="backend_type"
label="Backend Type"
value={formData.backend_type || BackendType.LLAMA_CPP}
onChange={(value) => onChange('backend_type', value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description="Select the backend server type"
/>
{/* Parse Command Section */}
<div className="flex flex-col gap-2">
<Button
variant="outline"
onClick={onParseCommand}
className="flex items-center gap-2 w-fit"
>
<Terminal className="h-4 w-4" />
Parse Command
</Button>
<p className="text-sm text-muted-foreground">
Import settings from your backend command
</p>
</div>
{/* Basic Backend Options */}
{basicBackendFields.length > 0 && (
<div className="space-y-4">
<h3 className="text-md font-medium">Basic Backend Options</h3>
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as Record<string, unknown>)?.[fieldKey] as string | number | boolean | string[] | undefined}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
{/* Advanced Backend Options */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<Button
variant="ghost"
onClick={() => setShowAdvanced(!showAdvanced)}
className="flex items-center gap-2 p-0 h-auto font-medium"
>
{showAdvanced ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
Advanced Backend Options
<span className="text-muted-foreground text-sm font-normal">
({advancedBackendFields.length} options)
</span>
</Button>
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as Record<string, unknown>)?.[fieldKey] as string | number | boolean | string[] | undefined}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)}
</CardContent>
</Card>
)
}
export default BackendConfigurationCard

View File

@@ -0,0 +1,148 @@
import React, { useState, useEffect } from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
import SelectInput from '@/components/form/SelectInput'
import { nodesApi, type NodesMap } from '@/lib/api'
interface InstanceSettingsCardProps {
instanceName: string
nameError: string
isEditing: boolean
formData: CreateInstanceOptions
onNameChange: (name: string) => void
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
}
const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
instanceName,
nameError,
isEditing,
formData,
onNameChange,
onChange
}) => {
const [nodes, setNodes] = useState<NodesMap>({})
const [loadingNodes, setLoadingNodes] = useState(true)
useEffect(() => {
const fetchNodes = async () => {
try {
const fetchedNodes = await nodesApi.list()
setNodes(fetchedNodes)
// Auto-select first node if none selected
const nodeNames = Object.keys(fetchedNodes)
if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
onChange('nodes', [nodeNames[0]])
}
} catch (error) {
console.error('Failed to fetch nodes:', error)
} finally {
setLoadingNodes(false)
}
}
void fetchNodes()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const nodeOptions = Object.keys(nodes).map(nodeName => ({
value: nodeName,
label: nodeName
}))
const handleNodeChange = (value: string | undefined) => {
if (value) {
onChange('nodes', [value])
} else {
onChange('nodes', undefined)
}
}
const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
return (
<Card>
<CardHeader>
<CardTitle>Instance Settings</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Instance Name */}
<div className="grid gap-2">
<Label htmlFor="name">
Instance Name <span className="text-red-500">*</span>
</Label>
<Input
id="name"
value={instanceName}
onChange={(e) => onNameChange(e.target.value)}
placeholder="my-instance"
disabled={isEditing}
className={nameError ? "border-red-500" : ""}
/>
{nameError && <p className="text-sm text-red-500">{nameError}</p>}
<p className="text-sm text-muted-foreground">
Unique identifier for the instance
</p>
</div>
{/* Node Selection */}
{!loadingNodes && Object.keys(nodes).length > 0 && (
<SelectInput
id="node"
label="Node"
value={selectedNode}
onChange={handleNodeChange}
options={nodeOptions}
description={isEditing ? "Node cannot be changed after instance creation" : "Select the node where the instance will run"}
disabled={isEditing}
/>
)}
{/* Auto Restart Configuration */}
<AutoRestartConfiguration
formData={formData}
onChange={onChange}
/>
{/* Basic Instance Options */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Instance Options</h3>
<NumberInput
id="idle_timeout"
label="Idle Timeout (minutes)"
value={formData.idle_timeout}
onChange={(value) => onChange('idle_timeout', value)}
placeholder="30"
description="Minutes before stopping an idle instance"
/>
<CheckboxInput
id="on_demand_start"
label="On Demand Start"
value={formData.on_demand_start}
onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed"
/>
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div>
</CardContent>
</Card>
)
}
export default InstanceSettingsCard

View File

@@ -1,4 +1,4 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react' import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState { interface AuthContextState {
isAuthenticated: boolean isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request // Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => { const validateApiKey = async (key: string): Promise<boolean> => {
try { try {
const response = await fetch('/api/v1/instances', { const response = await fetch(document.baseURI + 'api/v1/instances', {
headers: { headers: {
'Authorization': `Bearer ${key}`, 'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json' 'Content-Type': 'application/json'

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import { beforeEach, describe, expect, it, vi } from 'vitest'
// Mock fetch globally // Mock fetch globally
const mockFetch = vi.fn() const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
}) })
it('converts HTTP errors to meaningful messages', async () => { it('converts HTTP errors to meaningful messages', async () => {
mockFetch.mockResolvedValue({ const mockResponse = {
ok: false, ok: false,
status: 409, status: 409,
text: () => Promise.resolve('Instance already exists') text: () => Promise.resolve('Instance already exists'),
}) clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.create('existing', {})) await expect(instancesApi.create('existing', {}))
.rejects .rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
}) })
it('handles empty error responses gracefully', async () => { it('handles empty error responses gracefully', async () => {
mockFetch.mockResolvedValue({ const mockResponse = {
ok: false, ok: false,
status: 500, status: 500,
text: () => Promise.resolve('') text: () => Promise.resolve(''),
}) clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.list()) await expect(instancesApi.list())
.rejects .rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
await instancesApi.getLogs('test-instance', 100) await instancesApi.getLogs('test-instance', 100)
expect(mockFetch).toHaveBeenCalledWith( expect(mockFetch).toHaveBeenCalledWith(
'/api/v1/instances/test-instance/logs?lines=100', expect.stringMatching(
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
),
expect.any(Object) expect.any(Object)
) )
}) })

View File

@@ -1,6 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1"; // Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling // Base API call function with error handling
async function apiCall<T>( async function apiCall<T>(
@@ -30,25 +34,8 @@ async function apiCall<T>(
headers, headers,
}); });
// Handle authentication errors // Handle errors using centralized error handler
if (response.status === 401) { await handleApiError(response);
throw new Error('Authentication required');
}
if (!response.ok) {
// Try to get error message from response
let errorMessage = `HTTP ${response.status}`;
try {
const errorText = await response.text();
if (errorText) {
errorMessage += `: ${errorText}`;
}
} catch {
// If we can't read the error, just use status
}
throw new Error(errorMessage);
}
// Handle empty responses (like DELETE) // Handle empty responses (like DELETE)
if (response.status === 204) { if (response.status === 204) {
@@ -60,6 +47,11 @@ async function apiCall<T>(
const text = await response.text(); const text = await response.text();
return text as T; return text as T;
} else { } else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0') {
return {} as T; // Return empty object for empty JSON responses
}
const data = await response.json() as T; const data = await response.json() as T;
return data; return data;
} }
@@ -71,16 +63,60 @@ async function apiCall<T>(
} }
} }
// Server API functions // Server API functions (moved to llama-cpp backend)
export const serverApi = { export const serverApi = {
// GET /server/help // GET /backends/llama-cpp/help
getHelp: () => apiCall<string>("/server/help", {}, "text"), getHelp: () => apiCall<string>("/backends/llama-cpp/help", {}, "text"),
// GET /server/version // GET /backends/llama-cpp/version
getVersion: () => apiCall<string>("/server/version", {}, "text"), getVersion: () => apiCall<string>("/backends/llama-cpp/version", {}, "text"),
// GET /server/devices // GET /backends/llama-cpp/devices
getDevices: () => apiCall<string>("/server/devices", {}, "text"), getDevices: () => apiCall<string>("/backends/llama-cpp/devices", {}, "text"),
};
// Backend API functions
export const backendsApi = {
llamaCpp: {
// POST /backends/llama-cpp/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/llama-cpp/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
mlx: {
// POST /backends/mlx/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/mlx/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
vllm: {
// POST /backends/vllm/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
};
// Node API types
export interface NodeResponse {
address: string;
}
export type NodesMap = Record<string, NodeResponse>;
// Node API functions
export const nodesApi = {
// GET /nodes - returns map of node name to NodeResponse
list: () => apiCall<NodesMap>("/nodes"),
// GET /nodes/{name}
get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
}; };
// Instance API functions // Instance API functions
@@ -136,5 +172,5 @@ export const instancesApi = {
}, },
// GET /instances/{name}/proxy/health // GET /instances/{name}/proxy/health
getHealth: (name: string) => apiCall<any>(`/instances/${name}/proxy/health`), getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${name}/proxy/health`),
}; };

View File

@@ -0,0 +1,33 @@
/**
* Parses error response from API calls and returns a formatted error message
*/
export async function parseErrorResponse(response: Response): Promise<string> {
let errorMessage = `HTTP ${response.status}`
try {
const errorText = await response.text()
if (errorText) {
errorMessage += `: ${errorText}`
}
} catch {
// If we can't read the error, just use status
}
return errorMessage
}
/**
* Handles common API call errors and throws appropriate Error objects
*/
export async function handleApiError(response: Response): Promise<void> {
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required')
}
if (!response.ok) {
// Clone the response before reading to avoid consuming the body stream
const errorMessage = await parseErrorResponse(response.clone())
throw new Error(errorMessage)
}
}

View File

@@ -1,47 +1,23 @@
import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions' import {
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
} from '@/schemas/instanceOptions'
// Instance-level basic fields (not backend-specific) // LlamaCpp backend-specific basic fields
export const basicFieldsConfig: Record<string, { const basicLlamaCppFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
placeholder?: string placeholder?: string
required?: boolean
}> = {
auto_restart: {
label: 'Auto Restart',
description: 'Automatically restart the instance on failure'
},
max_restarts: {
label: 'Max Restarts',
placeholder: '3',
description: 'Maximum number of restart attempts (0 = unlimited)'
},
restart_delay: {
label: 'Restart Delay (seconds)',
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// Backend-specific basic fields (these go in backend_options)
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = { }> = {
model: { model: {
label: 'Model Path', label: 'Model Path',
@@ -65,29 +41,135 @@ export const basicBackendFieldsConfig: Record<string, {
} }
} }
export function isBasicField(key: keyof CreateInstanceOptions): boolean { // MLX backend-specific basic fields
return key in basicFieldsConfig const basicMlxFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
description: 'The path to the MLX model weights, tokenizer, and config'
},
temp: {
label: 'Temperature',
placeholder: '0.0',
description: 'Default sampling temperature (default: 0.0)'
},
top_p: {
label: 'Top-P',
placeholder: '1.0',
description: 'Default nucleus sampling top-p (default: 1.0)'
},
top_k: {
label: 'Top-K',
placeholder: '0',
description: 'Default top-k sampling (default: 0, disables top-k)'
},
min_p: {
label: 'Min-P',
placeholder: '0.0',
description: 'Default min-p sampling (default: 0.0, disables min-p)'
},
max_tokens: {
label: 'Max Tokens',
placeholder: '512',
description: 'Default maximum number of tokens to generate (default: 512)'
}
} }
export function isBasicBackendField(key: keyof BackendOptions): boolean { // vLLM backend-specific basic fields
return key in basicBackendFieldsConfig const basicVllmFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'microsoft/DialoGPT-medium',
description: 'The name or path of the Hugging Face model to use'
},
tensor_parallel_size: {
label: 'Tensor Parallel Size',
placeholder: '1',
description: 'Number of GPUs to use for distributed serving'
},
gpu_memory_utilization: {
label: 'GPU Memory Utilization',
placeholder: '0.9',
description: 'The fraction of GPU memory to be used for the model executor'
}
} }
export function getBasicFields(): (keyof CreateInstanceOptions)[] { // Backend field configuration lookup
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[] const backendFieldConfigs = {
mlx_lm: basicMlxFieldsConfig,
vllm: basicVllmFieldsConfig,
llama_cpp: basicLlamaCppFieldsConfig,
} as const
const backendFieldGetters = {
mlx_lm: getAllMlxFieldKeys,
vllm: getAllVllmFieldKeys,
llama_cpp: getAllLlamaCppFieldKeys,
} as const
export function getBasicBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return Object.keys(config)
} }
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] { export function getAdvancedBackendFields(backendType?: string): string[] {
return getAllFieldKeys().filter(key => !isBasicField(key)) const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return fieldGetter().filter(key => !(key in basicConfig))
} }
export function getBasicBackendFields(): (keyof BackendOptions)[] { // Combined backend fields config for use in BackendFormField
return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[] export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
...basicLlamaCppFieldsConfig,
...basicMlxFieldsConfig,
...basicVllmFieldsConfig
} }
export function getAdvancedBackendFields(): (keyof BackendOptions)[] { // Get field type for any backend option (union type)
return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key)) export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' {
// Try to get type from LlamaCpp schema first
try {
if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) {
return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions)
}
} catch {
// Schema might not be available
}
// Try MLX schema
try {
if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) {
return getMlxFieldType(key as keyof MlxBackendOptions)
}
} catch {
// Schema might not be available
}
// Try vLLM schema
try {
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
return getVllmFieldType(key as keyof VllmBackendOptions)
}
} catch {
// Schema might not be available
}
// Default fallback
return 'text'
} }
// Re-export the Zod-based functions
export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'

View File

@@ -0,0 +1,4 @@
// Re-export all backend schemas from one place
export * from './llamacpp'
export * from './mlx'
export * from './vllm'

View File

@@ -0,0 +1,192 @@
import { z } from 'zod'
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Infer the TypeScript type from the schema
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,51 @@
import { z } from 'zod'
// Define the MLX backend options schema
export const MlxBackendOptionsSchema = z.object({
// Basic connection options
model: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
// Model and adapter options
adapter_path: z.string().optional(),
draft_model: z.string().optional(),
num_draft_tokens: z.number().optional(),
trust_remote_code: z.boolean().optional(),
// Logging and templates
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
chat_template: z.string().optional(),
use_default_chat_template: z.boolean().optional(),
chat_template_args: z.string().optional(), // JSON string
// Sampling defaults
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
top_p: z.number().optional(),
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
// Helper to get all MLX backend option field keys
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
}
// Get field type for MLX backend options
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = MlxBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
return 'text' // ZodString and others default to text
}

View File

@@ -0,0 +1,150 @@
import { z } from 'zod'
// Define the vLLM backend options schema
export const VllmBackendOptionsSchema = z.object({
// Basic connection options (auto-assigned by llamactl)
host: z.string().optional(),
port: z.number().optional(),
// Model and engine configuration
model: z.string().optional(),
tokenizer: z.string().optional(),
skip_tokenizer_init: z.boolean().optional(),
revision: z.string().optional(),
code_revision: z.string().optional(),
tokenizer_revision: z.string().optional(),
tokenizer_mode: z.string().optional(),
trust_remote_code: z.boolean().optional(),
download_dir: z.string().optional(),
load_format: z.string().optional(),
config_format: z.string().optional(),
dtype: z.string().optional(),
kv_cache_dtype: z.string().optional(),
quantization_param_path: z.string().optional(),
seed: z.number().optional(),
max_model_len: z.number().optional(),
guided_decoding_backend: z.string().optional(),
distributed_executor_backend: z.string().optional(),
worker_use_ray: z.boolean().optional(),
ray_workers_use_nsight: z.boolean().optional(),
// Performance and serving configuration
block_size: z.number().optional(),
enable_prefix_caching: z.boolean().optional(),
disable_sliding_window: z.boolean().optional(),
use_v2_block_manager: z.boolean().optional(),
num_lookahead_slots: z.number().optional(),
swap_space: z.number().optional(),
cpu_offload_gb: z.number().optional(),
gpu_memory_utilization: z.number().optional(),
num_gpu_blocks_override: z.number().optional(),
max_num_batched_tokens: z.number().optional(),
max_num_seqs: z.number().optional(),
max_logprobs: z.number().optional(),
disable_log_stats: z.boolean().optional(),
quantization: z.string().optional(),
rope_scaling: z.string().optional(),
rope_theta: z.number().optional(),
enforce_eager: z.boolean().optional(),
max_context_len_to_capture: z.number().optional(),
max_seq_len_to_capture: z.number().optional(),
disable_custom_all_reduce: z.boolean().optional(),
tokenizer_pool_size: z.number().optional(),
tokenizer_pool_type: z.string().optional(),
tokenizer_pool_extra_config: z.string().optional(),
enable_lora_bias: z.boolean().optional(),
lora_extra_vocab_size: z.number().optional(),
lora_rank: z.number().optional(),
prompt_lookback_distance: z.number().optional(),
preemption_mode: z.string().optional(),
// Distributed and parallel processing
tensor_parallel_size: z.number().optional(),
pipeline_parallel_size: z.number().optional(),
max_parallel_loading_workers: z.number().optional(),
disable_async_output_proc: z.boolean().optional(),
worker_class: z.string().optional(),
enabled_lora_modules: z.string().optional(),
max_lora_rank: z.number().optional(),
fully_sharded_loras: z.boolean().optional(),
lora_modules: z.string().optional(),
prompt_adapters: z.string().optional(),
max_prompt_adapter_token: z.number().optional(),
device: z.string().optional(),
scheduler_delay: z.number().optional(),
enable_chunked_prefill: z.boolean().optional(),
speculative_model: z.string().optional(),
speculative_model_quantization: z.string().optional(),
speculative_revision: z.string().optional(),
speculative_max_model_len: z.number().optional(),
speculative_disable_by_batch_size: z.number().optional(),
ngpt_speculative_length: z.number().optional(),
speculative_disable_mqa: z.boolean().optional(),
model_loader_extra_config: z.string().optional(),
ignore_patterns: z.string().optional(),
preloaded_lora_modules: z.string().optional(),
// OpenAI server specific options
uds: z.string().optional(),
uvicorn_log_level: z.string().optional(),
response_role: z.string().optional(),
ssl_keyfile: z.string().optional(),
ssl_certfile: z.string().optional(),
ssl_ca_certs: z.string().optional(),
ssl_cert_reqs: z.number().optional(),
root_path: z.string().optional(),
middleware: z.array(z.string()).optional(),
return_tokens_as_token_ids: z.boolean().optional(),
disable_frontend_multiprocessing: z.boolean().optional(),
enable_auto_tool_choice: z.boolean().optional(),
tool_call_parser: z.string().optional(),
tool_server: z.string().optional(),
chat_template: z.string().optional(),
chat_template_content_format: z.string().optional(),
allow_credentials: z.boolean().optional(),
allowed_origins: z.array(z.string()).optional(),
allowed_methods: z.array(z.string()).optional(),
allowed_headers: z.array(z.string()).optional(),
api_key: z.array(z.string()).optional(),
enable_log_outputs: z.boolean().optional(),
enable_token_usage: z.boolean().optional(),
enable_async_engine_debug: z.boolean().optional(),
engine_use_ray: z.boolean().optional(),
disable_log_requests: z.boolean().optional(),
max_log_len: z.number().optional(),
// Additional engine configuration
task: z.string().optional(),
multi_modal_config: z.string().optional(),
limit_mm_per_prompt: z.string().optional(),
enable_sleep_mode: z.boolean().optional(),
enable_chunking_request: z.boolean().optional(),
compilation_config: z.string().optional(),
disable_sliding_window_mask: z.boolean().optional(),
enable_trtllm_engine_latency: z.boolean().optional(),
override_pooling_config: z.string().optional(),
override_neuron_config: z.string().optional(),
override_kv_cache_align_size: z.number().optional(),
})
// Infer the TypeScript type from the schema
export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
// Helper to get all vLLM backend option field keys
export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
}
// Get field type for vLLM backend options
export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = VllmBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

View File

@@ -1,174 +1,28 @@
import { BackendType } from '@/types/instance' import { BackendType } from '@/types/instance'
import { z } from 'zod' import { z } from 'zod'
// Define the backend options schema (previously embedded in CreateInstanceOptionsSchema) // Import backend schemas from separate files
export const BackendOptionsSchema = z.object({ import {
// Common params LlamaCppBackendOptionsSchema,
verbose_prompt: z.boolean().optional(), type LlamaCppBackendOptions,
threads: z.number().optional(), getAllLlamaCppFieldKeys,
threads_batch: z.number().optional(), getLlamaCppFieldType,
cpu_mask: z.string().optional(), MlxBackendOptionsSchema,
cpu_range: z.string().optional(), type MlxBackendOptions,
cpu_strict: z.number().optional(), getAllMlxFieldKeys,
prio: z.number().optional(), getMlxFieldType,
poll: z.number().optional(), VllmBackendOptionsSchema,
cpu_mask_batch: z.string().optional(), type VllmBackendOptions,
cpu_range_batch: z.string().optional(), getAllVllmFieldKeys,
cpu_strict_batch: z.number().optional(), getVllmFieldType
prio_batch: z.number().optional(), } from './backends'
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params // Backend options union
samplers: z.string().optional(), export const BackendOptionsSchema = z.union([
seed: z.number().optional(), LlamaCppBackendOptionsSchema,
sampling_seq: z.string().optional(), MlxBackendOptionsSchema,
ignore_eos: z.boolean().optional(), VllmBackendOptionsSchema,
temp: z.number().optional(), ])
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Define the main create instance options schema // Define the main create instance options schema
export const CreateInstanceOptionsSchema = z.object({ export const CreateInstanceOptionsSchema = z.object({
@@ -179,11 +33,33 @@ export const CreateInstanceOptionsSchema = z.object({
idle_timeout: z.number().optional(), idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(), on_demand_start: z.boolean().optional(),
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
// Backend configuration // Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP]).optional(), backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(), backend_options: BackendOptionsSchema.optional(),
// Node configuration
nodes: z.array(z.string()).optional(),
}) })
// Re-export types and schemas from backend files
export {
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
}
// Infer the TypeScript types from the schemas // Infer the TypeScript types from the schemas
export type BackendOptions = z.infer<typeof BackendOptionsSchema> export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema> export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
@@ -193,36 +69,18 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[] return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
} }
// Helper to get all backend option field keys
export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
}
// Get field type from Zod schema // Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' { export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key] const fieldSchema = CreateInstanceOptionsSchema.shape[key]
if (!fieldSchema) return 'text' if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper // Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean' if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object' if (innerSchema instanceof z.ZodObject) return 'object'
return 'text' // ZodString and others default to text if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object
}
// Get field type for backend options
export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = BackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text return 'text' // ZodString and others default to text
} }

View File

@@ -3,7 +3,10 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
export { type CreateInstanceOptions } from '@/schemas/instanceOptions' export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = { export const BackendType = {
LLAMA_CPP: 'llama_cpp' LLAMA_CPP: 'llama_cpp',
MLX_LM: 'mlx_lm',
VLLM: 'vllm',
// MLX_VLM: 'mlx_vlm', // Future expansion
} as const } as const
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType] export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
@@ -20,4 +23,5 @@ export interface Instance {
name: string; name: string;
status: InstanceStatus; status: InstanceStatus;
options?: CreateInstanceOptions; options?: CreateInstanceOptions;
docker_enabled?: boolean; // indicates backend is running via Docker
} }

View File

@@ -21,4 +21,6 @@ export default defineConfig({
setupFiles: ['./src/test/setup.ts'], setupFiles: ['./src/test/setup.ts'],
css: true, css: true,
}, },
// ensures relative asset paths to support being served behind a subpath
base: "./"
}) })