347 Commits

Author SHA1 Message Date
ec84a7d331 Merge pull request #112 from lordmathis/fix/auth-middleware
fix: Remove duplicate auth middleware init
2025-12-17 19:09:12 +01:00
b45219a01e Reuse handler auth middleware 2025-12-17 19:06:04 +01:00
463bb561e1 Merge pull request #111 from lordmathis/fix/cgo-enabled-build
fix: Add multiplatform CGO_ENABLED=1 build
2025-12-17 14:38:27 +01:00
ebdb9143c0 Remove separate windows build step 2025-12-17 14:32:10 +01:00
4269d04381 Update release.yaml 2025-12-17 14:25:50 +01:00
c734329a62 Merge pull request #109 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-62cd70be13
chore: bump the npm-production group in /webui with 2 updates
2025-12-15 23:50:27 +01:00
15fcf7c377 Merge pull request #110 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-8fdc2c429d
chore: bump the npm-development group in /webui with 3 updates
2025-12-15 23:48:42 +01:00
dependabot[bot]
795f530956 chore: bump the npm-development group in /webui with 3 updates
Bumps the npm-development group in /webui with 3 updates: [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node), [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) and [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `@types/node` from 24.10.1 to 25.0.2
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node)

Updates `typescript-eslint` from 8.49.0 to 8.50.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.50.0/packages/typescript-eslint)

Updates `vite` from 7.2.2 to 7.3.0
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/v7.3.0/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.3.0/packages/vite)

---
updated-dependencies:
- dependency-name: "@types/node"
  dependency-version: 25.0.2
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: typescript-eslint
  dependency-version: 8.50.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: vite
  dependency-version: 7.3.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-15 22:06:41 +00:00
dependabot[bot]
4507358310 chore: bump the npm-production group in /webui with 2 updates
Bumps the npm-production group in /webui with 2 updates: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react) and [zod](https://github.com/colinhacks/zod).


Updates `lucide-react` from 0.560.0 to 0.561.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.561.0/packages/lucide-react)

Updates `zod` from 4.1.12 to 4.2.0
- [Release notes](https://github.com/colinhacks/zod/releases)
- [Commits](https://github.com/colinhacks/zod/compare/v4.1.12...v4.2.0)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.561.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: zod
  dependency-version: 4.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-15 22:06:01 +00:00
f3c02b4939 Merge pull request #108 from lordmathis/refactor/config
refactor: Split large config file
2025-12-13 14:27:25 +01:00
0a85409deb Split large config file 2025-12-13 13:50:59 +01:00
22fd295250 Merge pull request #107 from lordmathis/feat/logrotate
feat: Add log rotation for instance logs
2025-12-13 13:30:20 +01:00
c0cecdd377 Clean up logger 2025-12-13 13:18:30 +01:00
4d57b37a5d Remove verbose _mb suffix 2025-12-13 13:06:22 +01:00
c13b71d07f Document new log rotation config options 2025-12-13 13:02:22 +01:00
406a711682 Move LogRotationConfig to logger package 2025-12-13 12:48:50 +01:00
0b3d654945 Simplify logging config 2025-12-13 12:48:50 +01:00
e2a49402d6 Implement instance log rotation 2025-12-13 12:48:50 +01:00
48836c9c12 Merge pull request #105 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-3fe24f4500
chore: bump lucide-react from 0.555.0 to 0.556.0 in /webui in the npm-production group
2025-12-12 10:43:12 +01:00
4200b8eed9 Merge pull request #104 from lordmathis/dependabot/go_modules/go-dependencies-f180a085e8
chore: bump golang.org/x/crypto from 0.45.0 to 0.46.0 in the go-dependencies group
2025-12-11 18:51:40 +01:00
dependabot[bot]
9a7ae87df8 chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.555.0 to 0.556.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.556.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.556.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-11 17:17:18 +00:00
e54c495528 Merge pull request #106 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-808d3127cd
chore: bump the npm-development group in /webui with 2 updates
2025-12-11 18:12:15 +01:00
dependabot[bot]
83006968ca chore: bump the npm-development group in /webui with 2 updates
Bumps the npm-development group in /webui with 2 updates: [jsdom](https://github.com/jsdom/jsdom) and [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint).


Updates `jsdom` from 27.2.0 to 27.3.0
- [Release notes](https://github.com/jsdom/jsdom/releases)
- [Changelog](https://github.com/jsdom/jsdom/blob/main/Changelog.md)
- [Commits](https://github.com/jsdom/jsdom/compare/27.2.0...27.3.0)

Updates `typescript-eslint` from 8.48.0 to 8.49.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.49.0/packages/typescript-eslint)

---
updated-dependencies:
- dependency-name: jsdom
  dependency-version: 27.3.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: typescript-eslint
  dependency-version: 8.49.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-08 21:31:36 +00:00
dependabot[bot]
c8d9c6907c chore: bump golang.org/x/crypto in the go-dependencies group
Bumps the go-dependencies group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.45.0 to 0.46.0
- [Commits](https://github.com/golang/crypto/compare/v0.45.0...v0.46.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.46.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: go-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-08 21:30:38 +00:00
c776785f30 Merge pull request #103 from lordmathis/docs/api-keys
docs: Improve API key management documentation
2025-12-08 19:23:39 +01:00
1cfbd42eda Update swagger docs 2025-12-08 19:16:02 +01:00
8fee27054d Update docs for API key management 2025-12-08 19:15:42 +01:00
fd33837026 Merge pull request #101 from lordmathis/feat/api-key-mgmt
feat: Add inference api key management
2025-12-08 18:49:49 +01:00
3c4ebf7403 Addsimple python LLM test client 2025-12-08 18:44:28 +01:00
b7a0f7e3d8 Unhide migrated directory 2025-12-08 18:08:22 +01:00
d5b68a900f Add .migrated directory for migrated json files 2025-12-08 18:06:15 +01:00
00cd8c8877 Update shadcn componments 2025-12-07 18:50:52 +01:00
4b1b12a7a8 Fix lint errors 2025-12-07 18:28:01 +01:00
0ce9016488 Fix some lint issues 2025-12-07 17:40:09 +01:00
1acbcafe1c Add DialogDescription to SettingsDialog 2025-12-07 17:26:38 +01:00
00a502a268 Implement LocalStorageMock for testing 2025-12-07 17:16:40 +01:00
54fe0f7421 Fix eslint issues 2025-12-07 16:16:13 +01:00
cd1bd64889 Refactor CreateApiKeyDialog to use instance IDs 2025-12-06 22:20:39 +01:00
0fee7abc7c Simplify create key request format 2025-12-06 22:20:05 +01:00
02193bd309 Add instance ID to JSON output 2025-12-06 21:28:17 +01:00
0217f7cc4e Fix instance creation to retrieve and set the auto-generated ID 2025-12-06 20:58:17 +01:00
fa311c46ac Improve server shutdown process 2025-12-06 19:52:40 +01:00
99927160c2 Remove 'can_infer' field 2025-12-06 18:07:01 +01:00
c37c1b8161 Remove 'enabled' field from API key model and related database operations 2025-12-06 17:59:11 +01:00
80d5d44a0b Add inference api key frontend integration 2025-12-04 23:26:32 +01:00
2d0acc60f2 Fix double dash in generated keys 2025-12-04 23:25:51 +01:00
a1b6f0c1b0 Remove JSON file archiving from migration process 2025-12-04 23:02:06 +01:00
991ce3c678 Remove unnecessary canviewlogs permission 2025-12-04 22:18:29 +01:00
d9c666a245 Update deprication warnings 2025-12-04 21:23:22 +01:00
85cf712b03 Update api docs 2025-12-03 21:25:03 +01:00
5ccf493e04 Add permission checks to proxies 2025-12-03 21:14:44 +01:00
9eee42c673 Initial api key store implementation 2025-12-03 20:31:38 +01:00
470f90076f Merge pull request #99 from lordmathis/feat/db-persistence
feat: Migrate instance persistance to sqlite3
2025-12-02 20:46:55 +01:00
3fd597638b Fix migration 2025-12-02 20:38:52 +01:00
645aa63186 Fix vscode launch params 2025-12-02 20:38:52 +01:00
7c05fd278c Update configuration configs and logs paths 2025-12-02 20:38:52 +01:00
00114caa00 Add db config and move data dir config 2025-12-02 20:38:52 +01:00
7272aa26ec Refactor database interface and migration functions 2025-12-02 20:38:52 +01:00
fec989fee2 Implement SQLite database persistence for instance management 2025-12-02 20:38:52 +01:00
0c11365d7e Merge pull request #97 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-e5d9180762
chore: bump lucide-react from 0.554.0 to 0.555.0 in /webui in the npm-production group
2025-12-02 06:55:22 +01:00
dependabot[bot]
bb88fb2bb2 chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.554.0 to 0.555.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.555.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.555.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-02 01:24:30 +00:00
6d049be13e Merge pull request #96 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-e747de9977
chore: bump typescript-eslint from 8.47.0 to 8.48.0 in /webui in the npm-development group
2025-11-25 11:13:54 +01:00
dependabot[bot]
bb0d4863d8 chore: bump typescript-eslint in /webui in the npm-development group
Bumps the npm-development group in /webui with 1 update: [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint).


Updates `typescript-eslint` from 8.47.0 to 8.48.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.48.0/packages/typescript-eslint)

---
updated-dependencies:
- dependency-name: typescript-eslint
  dependency-version: 8.48.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 22:26:17 +00:00
22a747c318 Merge pull request #95 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-143683ff68
chore: bump typescript-eslint from 8.46.4 to 8.47.0 in /webui in the npm-development group
2025-11-17 23:05:49 +01:00
ceef48a125 Merge branch 'main' into dependabot/npm_and_yarn/webui/npm-development-143683ff68 2025-11-17 23:03:42 +01:00
db1347a709 Merge pull request #94 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-1d33cd6545
chore: bump lucide-react from 0.553.0 to 0.554.0 in /webui in the npm-production group
2025-11-17 23:02:23 +01:00
dependabot[bot]
e4027722d7 chore: bump typescript-eslint in /webui in the npm-development group
Bumps the npm-development group in /webui with 1 update: [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint).


Updates `typescript-eslint` from 8.46.4 to 8.47.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.47.0/packages/typescript-eslint)

---
updated-dependencies:
- dependency-name: typescript-eslint
  dependency-version: 8.47.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-17 21:43:26 +00:00
dependabot[bot]
8218c042c8 chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.553.0 to 0.554.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.554.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.554.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-17 21:42:58 +00:00
efed0f543b Merge pull request #93 from lordmathis/dependabot/npm_and_yarn/webui/js-yaml-4.1.1
chore: bump js-yaml from 4.1.0 to 4.1.1 in /webui
2025-11-16 18:42:04 +01:00
dependabot[bot]
aa0508eb9b chore: bump js-yaml from 4.1.0 to 4.1.1 in /webui
Bumps [js-yaml](https://github.com/nodeca/js-yaml) from 4.1.0 to 4.1.1.
- [Changelog](https://github.com/nodeca/js-yaml/blob/master/CHANGELOG.md)
- [Commits](https://github.com/nodeca/js-yaml/compare/4.1.0...4.1.1)

---
updated-dependencies:
- dependency-name: js-yaml
  dependency-version: 4.1.1
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-16 13:53:06 +00:00
514b1b0e76 Merge pull request #92 from lordmathis/feat/command-override
feat: Add per instance command override
2025-11-15 01:04:01 +01:00
6565be3676 Refactor ConfigContext hooks 2025-11-15 01:02:15 +01:00
ad772a05ce Refactor import statement in App.test.tsx 2025-11-15 00:45:45 +01:00
b594ade8f9 Add mocks for ConfigContext in tests to use default configuration values 2025-11-15 00:45:02 +01:00
2ceeddbce5 Improve instance creation documentation with clearer settings and options 2025-11-15 00:18:55 +01:00
6ed99fccf9 Update swagger api docs 2025-11-14 23:43:14 +01:00
4f8f4b96cd Fix docker_enabled inconsistency 2025-11-14 23:41:16 +01:00
c04c952293 Pass default config values to instance dialog 2025-11-14 23:07:30 +01:00
7544fbb1ce Refactor JSON marshaling in Options to improve thread safety 2025-11-14 21:50:58 +01:00
4f4feacaa8 Remove manual config fields assignment 2025-11-14 20:38:36 +01:00
2c28971e15 Add JSON tags to configuration structs 2025-11-14 20:37:33 +01:00
e77ed0cdef Deep copy config for sanitization 2025-11-14 20:25:40 +01:00
09605d07ab Implement ConfigContext for instance defaults 2025-11-14 19:24:18 +01:00
623e258a2a Add API endpoint to retrieve sanitized server configuration 2025-11-14 18:57:03 +01:00
91d8a9008f Add command override to webui 2025-11-14 18:44:39 +01:00
511889e56d Implement per instance command override on backend 2025-11-14 18:38:31 +01:00
19eb552dc7 Merge pull request #89 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-bfaa8512fe
chore: bump the npm-development group across 1 directory with 14 updates
2025-11-13 21:40:29 +01:00
4bddea2831 Merge branch 'main' into dependabot/npm_and_yarn/webui/npm-development-bfaa8512fe 2025-11-13 21:38:39 +01:00
b878dfe8da Merge pull request #91 from lordmathis/fix/node-types
fix: Add missing @types/node to types in tsconfig.json
2025-11-13 21:38:25 +01:00
d600212cd0 Add @types/node to types in tsconfig.json 2025-11-13 21:36:21 +01:00
dependabot[bot]
5837521821 chore: bump the npm-development group across 1 directory with 14 updates
Bumps the npm-development group with 14 updates in the /webui directory:

| Package | From | To |
| --- | --- | --- |
| [@eslint/js](https://github.com/eslint/eslint/tree/HEAD/packages/js) | `9.32.0` | `9.39.1` |
| [@types/eslint__js](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/eslint__js) | `8.42.3` | `9.14.0` |
| [@testing-library/jest-dom](https://github.com/testing-library/jest-dom) | `6.6.3` | `6.9.1` |
| [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) | `24.0.15` | `24.10.1` |
| [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/tree/HEAD/packages/plugin-react) | `4.7.0` | `5.1.1` |
| [@vitest/ui](https://github.com/vitest-dev/vitest/tree/HEAD/packages/ui) | `3.2.4` | `4.0.8` |
| [eslint](https://github.com/eslint/eslint) | `9.32.0` | `9.39.1` |
| [eslint-plugin-react-hooks](https://github.com/facebook/react/tree/HEAD/packages/eslint-plugin-react-hooks) | `5.2.0` | `7.0.1` |
| [jsdom](https://github.com/jsdom/jsdom) | `26.1.0` | `27.2.0` |
| [tw-animate-css](https://github.com/Wombosvideo/tw-animate-css) | `1.3.5` | `1.4.0` |
| [typescript](https://github.com/microsoft/TypeScript) | `5.8.3` | `5.9.3` |
| [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) | `8.38.0` | `8.46.4` |
| [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) | `7.1.11` | `7.2.2` |
| [vitest](https://github.com/vitest-dev/vitest/tree/HEAD/packages/vitest) | `3.2.4` | `4.0.8` |



Updates `@eslint/js` from 9.32.0 to 9.39.1
- [Release notes](https://github.com/eslint/eslint/releases)
- [Commits](https://github.com/eslint/eslint/commits/v9.39.1/packages/js)

Updates `@types/eslint__js` from 8.42.3 to 9.14.0
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/eslint__js)

Updates `@testing-library/jest-dom` from 6.6.3 to 6.9.1
- [Release notes](https://github.com/testing-library/jest-dom/releases)
- [Changelog](https://github.com/testing-library/jest-dom/blob/main/CHANGELOG.md)
- [Commits](https://github.com/testing-library/jest-dom/compare/v6.6.3...v6.9.1)

Updates `@types/eslint__js` from 8.42.3 to 9.14.0
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/eslint__js)

Updates `@types/node` from 24.0.15 to 24.10.1
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node)

Updates `@vitejs/plugin-react` from 4.7.0 to 5.1.1
- [Release notes](https://github.com/vitejs/vite-plugin-react/releases)
- [Changelog](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite-plugin-react/commits/plugin-react@5.1.1/packages/plugin-react)

Updates `@vitest/ui` from 3.2.4 to 4.0.8
- [Release notes](https://github.com/vitest-dev/vitest/releases)
- [Commits](https://github.com/vitest-dev/vitest/commits/v4.0.8/packages/ui)

Updates `eslint` from 9.32.0 to 9.39.1
- [Release notes](https://github.com/eslint/eslint/releases)
- [Commits](https://github.com/eslint/eslint/compare/v9.32.0...v9.39.1)

Updates `eslint-plugin-react-hooks` from 5.2.0 to 7.0.1
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/packages/eslint-plugin-react-hooks/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/HEAD/packages/eslint-plugin-react-hooks)

Updates `jsdom` from 26.1.0 to 27.2.0
- [Release notes](https://github.com/jsdom/jsdom/releases)
- [Changelog](https://github.com/jsdom/jsdom/blob/main/Changelog.md)
- [Commits](https://github.com/jsdom/jsdom/compare/26.1.0...27.2.0)

Updates `tw-animate-css` from 1.3.5 to 1.4.0
- [Release notes](https://github.com/Wombosvideo/tw-animate-css/releases)
- [Commits](https://github.com/Wombosvideo/tw-animate-css/compare/v1.3.5...v1.4.0)

Updates `typescript` from 5.8.3 to 5.9.3
- [Release notes](https://github.com/microsoft/TypeScript/releases)
- [Changelog](https://github.com/microsoft/TypeScript/blob/main/azure-pipelines.release-publish.yml)
- [Commits](https://github.com/microsoft/TypeScript/compare/v5.8.3...v5.9.3)

Updates `typescript-eslint` from 8.38.0 to 8.46.4
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.46.4/packages/typescript-eslint)

Updates `vite` from 7.1.11 to 7.2.2
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.2.2/packages/vite)

Updates `vitest` from 3.2.4 to 4.0.8
- [Release notes](https://github.com/vitest-dev/vitest/releases)
- [Commits](https://github.com/vitest-dev/vitest/commits/v4.0.8/packages/vitest)

---
updated-dependencies:
- dependency-name: "@eslint/js"
  dependency-version: 9.39.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: "@types/eslint__js"
  dependency-version: 9.14.0
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: "@testing-library/jest-dom"
  dependency-version: 6.9.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: "@types/eslint__js"
  dependency-version: 9.14.0
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: "@types/node"
  dependency-version: 24.10.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: "@vitejs/plugin-react"
  dependency-version: 5.1.1
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: "@vitest/ui"
  dependency-version: 4.0.8
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: eslint
  dependency-version: 9.39.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: eslint-plugin-react-hooks
  dependency-version: 7.0.1
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: jsdom
  dependency-version: 27.2.0
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: tw-animate-css
  dependency-version: 1.4.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: typescript
  dependency-version: 5.9.3
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: typescript-eslint
  dependency-version: 8.46.4
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: vite
  dependency-version: 7.2.2
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: vitest
  dependency-version: 4.0.8
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-13 20:19:01 +00:00
7e71ada904 Merge pull request #88 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-ebddbb6ace
chore: bump the npm-production group in /webui with 5 updates
2025-11-13 21:16:36 +01:00
5335634879 Merge branch 'main' into dependabot/npm_and_yarn/webui/npm-production-ebddbb6ace 2025-11-13 21:11:33 +01:00
15d1e17454 Merge pull request #90 from lordmathis/feat/custom-args
feat: Add support for custom args
2025-11-13 21:10:45 +01:00
72b70918fa Add useEffect to sync internal state with value prop in KeyValueInput 2025-11-13 21:04:20 +01:00
11bfe75a3c Add support for extra args for command parser 2025-11-13 20:41:08 +01:00
ae5358ff65 Change FlashAttn field type to string in LlamaServerOptions 2025-11-12 23:49:34 +01:00
bff8e7d914 Refactor command line argument keys to use snake_case format in LlamaServerOptions 2025-11-12 23:46:15 +01:00
5ad076919e Add extra args test to backend 2025-11-12 23:40:07 +01:00
c022746cd8 Fix import path for EnvVarsInput component 2025-11-12 23:13:09 +01:00
8d92f9b371 Add ExtraArgs support for Llama, Mlx, and Vllm server options 2025-11-12 23:11:22 +01:00
15180a227b Add support for extra arguments in frontend 2025-11-12 22:50:15 +01:00
dependabot[bot]
0708327a16 chore: bump the npm-production group in /webui with 5 updates
Bumps the npm-production group in /webui with 5 updates:

| Package | From | To |
| --- | --- | --- |
| [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react) | `0.525.0` | `0.553.0` |
| [react](https://github.com/facebook/react/tree/HEAD/packages/react) | `19.1.0` | `19.2.0` |
| [react-dom](https://github.com/facebook/react/tree/HEAD/packages/react-dom) | `19.1.0` | `19.2.0` |
| [tailwind-merge](https://github.com/dcastil/tailwind-merge) | `3.3.1` | `3.4.0` |
| [zod](https://github.com/colinhacks/zod) | `4.0.5` | `4.1.12` |


Updates `lucide-react` from 0.525.0 to 0.553.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.553.0/packages/lucide-react)

Updates `react` from 19.1.0 to 19.2.0
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/v19.2.0/packages/react)

Updates `react-dom` from 19.1.0 to 19.2.0
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/v19.2.0/packages/react-dom)

Updates `tailwind-merge` from 3.3.1 to 3.4.0
- [Release notes](https://github.com/dcastil/tailwind-merge/releases)
- [Commits](https://github.com/dcastil/tailwind-merge/compare/v3.3.1...v3.4.0)

Updates `zod` from 4.0.5 to 4.1.12
- [Release notes](https://github.com/colinhacks/zod/releases)
- [Commits](https://github.com/colinhacks/zod/compare/v4.0.5...v4.1.12)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.553.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: react
  dependency-version: 19.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: react-dom
  dependency-version: 19.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: tailwind-merge
  dependency-version: 3.4.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: zod
  dependency-version: 4.1.12
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-12 20:58:32 +00:00
a2740055c2 Merge pull request #87 from lordmathis/chore/update-dependabot
chore: Simplify dependabot configuration
2025-11-12 21:57:07 +01:00
0ddffaa2e6 Simplify dependabot configuration 2025-11-12 21:53:01 +01:00
9a160a5312 Merge pull request #80 from lordmathis/chore/dependabot
chore: Add dependabot configuration for Go and npm dependencies
2025-11-12 21:34:51 +01:00
8861057f11 Add dependabot configuration for Go and npm dependencies 2025-11-12 21:31:47 +01:00
34edb8a2e5 Merge pull request #78 from lordmathis/feat/inflight-requests
feat: Wait for inflight requests to finish before shutting down an instance
2025-10-30 18:08:55 +01:00
560850f86d Add shutdown state checks in HTTP handlers 2025-10-30 18:00:59 +01:00
c340439306 Add support for 'shutting_down' state in HealthBadge and health service 2025-10-29 00:09:18 +01:00
77c0e22fd0 Use instance's ServeHTTP in handlers 2025-10-29 00:01:29 +01:00
d65c5ab717 Wait for inflight requests before stopping 2025-10-29 00:00:56 +01:00
2b94244c8a Replace GetProxy with ServeHttp in instance 2025-10-29 00:00:02 +01:00
2e5644db53 Implement inflight request tracking 2025-10-28 23:59:02 +01:00
7ee22fee51 Implement shutting down status 2025-10-28 23:53:11 +01:00
e5baedb776 Merge pull request #76 from lordmathis/feat/import-export
feat: Ad support for instance import and export on frontend
2025-10-27 20:46:48 +01:00
e6205b930e Document import and export features 2025-10-27 20:44:28 +01:00
f9eb424690 Fix concurrent map write issue in MarshalJSON by initializing BackendOptions 2025-10-27 20:36:42 +01:00
5b84b64623 Fix some typescript issues 2025-10-27 20:36:31 +01:00
7813a5f2be Move import instance configuration to InstanceDialog component 2025-10-27 20:17:18 +01:00
a00c9b82a6 Add import functionality for instance configuration from JSON file 2025-10-27 20:11:22 +01:00
cbfa6bd48f Fix export functionality to exclude computed field from JSON output 2025-10-27 19:59:43 +01:00
bee0f72c10 Add export functionality to InstanceCard component 2025-10-27 19:55:07 +01:00
a5d8f541f0 Merge pull request #75 from lordmathis/fix/delete-instance
fix: Prevent restarting instance from getting deleted
2025-10-27 19:27:58 +01:00
dfcc16083c Update test configuration to use 'sh -c "sleep 999999"' command 2025-10-27 19:25:13 +01:00
6ec2919049 Fix instance start simulation in TestUpdateInstance 2025-10-27 19:14:54 +01:00
d6a6f377fc Fix logger race condition 2025-10-27 19:06:06 +01:00
cd9a71d9fc Update test configuration to use 'yes' command instead of 'sleep' 2025-10-27 18:54:20 +01:00
2c4cc5a69a Fix manager tests 2025-10-27 18:47:17 +01:00
b1fc1d2dc8 Add InstancesDir to test configuration for instance management 2025-10-27 18:38:23 +01:00
08c47a16a0 Fix operations tests 2025-10-27 18:35:16 +01:00
219db7abce Move port range validation to config 2025-10-27 18:23:49 +01:00
14131a6274 Remove redundant code 2025-10-27 18:18:25 +01:00
e65f4f1641 Remove unsupported error wrapping from log.Printf 2025-10-27 18:01:58 +01:00
5ef0654cdd Use %w for error wrapping in log messages across multiple files 2025-10-27 17:54:39 +01:00
1814772fa2 Fix instance deletion check to account for restarting status 2025-10-27 17:42:27 +01:00
f1666565d8 Merge pull request #74 from lordmathis/refactor/health-check
refactor: Improve frontend health check
2025-10-26 19:54:38 +01:00
13ef13449c Fix ts type check 2025-10-26 19:52:44 +01:00
777e07752b Fix health service tests 2025-10-26 19:48:07 +01:00
75e7b628ca Remove 'loading' and 'error' states 2025-10-26 19:12:35 +01:00
2a1bebeb24 Improve health checks for instances 2025-10-26 19:05:03 +01:00
f94d05dad2 Add Restarting state 2025-10-26 18:55:05 +01:00
14f4a80c89 Merge pull request #73 from lordmathis/refactor/docs
refactor: Update docs structure and improve content clarity
2025-10-26 17:30:50 +01:00
d768845805 Add Authorization header to curl examples. 2025-10-26 17:28:01 +01:00
4f94f63de3 Minor docs improvements 2025-10-26 17:19:53 +01:00
249ff2a7aa Capitalize godoc tags 2025-10-26 16:49:27 +01:00
6c522a2199 Ad core concepts to quick-start 2025-10-26 16:44:32 +01:00
3ff87f24bd Update swagger docs 2025-10-26 16:36:24 +01:00
eac4f834c0 Update API endpoints in managing instances and quick start documentation 2025-10-26 16:35:58 +01:00
59c954811d Update API routes in godoc 2025-10-26 16:35:42 +01:00
dd40b153d8 Minor docs improvements 2025-10-26 16:10:37 +01:00
c0cd03c75d Refactor troubleshooting documentation for instance management issues 2025-10-26 15:59:17 +01:00
6a840069e1 Fix llama.cpp link in troubleshooting docs 2025-10-26 15:35:51 +01:00
7509722dfa Clarify port and api key assignments 2025-10-26 15:32:40 +01:00
a5e9e01ff4 Update screenshots 2025-10-26 15:25:53 +01:00
7063e83cd2 Improve OpenAPI docs styling 2025-10-26 15:00:30 +01:00
781921fc5a Refactor documentation headings 2025-10-26 14:50:42 +01:00
85e21596d9 Auto generate mkdocs api reference from swagger 2025-10-26 14:43:27 +01:00
975c740272 Add API key security definitions to Swagger documentation 2025-10-26 14:42:55 +01:00
e387280405 Update docs path in contributiong 2025-10-26 14:41:59 +01:00
58c8899fd9 Update import path for API documentation 2025-10-26 14:08:48 +01:00
f98b09ea78 Move apidocs to docs folder 2025-10-26 14:04:53 +01:00
90b65cad79 Update docs navigation 2025-10-26 13:57:16 +01:00
9e88b63fca Flatten the docs structure 2025-10-26 13:55:05 +01:00
52d8c2a082 Simplify README.md 2025-10-26 13:43:44 +01:00
108a977a9c Merge pull request #72 from lordmathis/refactor/handlers
refactor: Extract common helper functions in API handlers
2025-10-26 12:07:42 +01:00
969fee837f Fix instance name retrieval 2025-10-26 11:34:45 +01:00
4e587953d8 Refactor llama server command handlers to use a common execution function 2025-10-26 11:00:10 +01:00
356c5be2c6 Improve comments 2025-10-26 10:34:36 +01:00
836e918fc5 Rename ProxyToInstance to InstanceProxy for clarity in routing 2025-10-26 10:22:37 +01:00
a7593e9a58 Split LlamaCppProxy handler 2025-10-26 10:21:40 +01:00
9259763054 Add getInstance method to handlers 2025-10-26 09:54:24 +01:00
94dce4c9bb Implement helper response handling functions 2025-10-26 00:12:33 +02:00
a3f9213f04 Implement ensureInstanceRunning helper 2025-10-25 23:44:21 +02:00
de5a38e7fd Refactor command parsing 2025-10-25 20:23:08 +02:00
ea6c76cc96 Update multi valued flags in backends 2025-10-25 19:02:46 +02:00
bd6436840e Implement common ParseCommand interface 2025-10-25 18:41:46 +02:00
0a7420c9f9 Merge pull request #71 from lordmathis/refactor/proxy
refactor: Move all proxy handling to instance package
2025-10-25 16:32:32 +02:00
c038aac91b Remove redundant UpdateLast RequestTime calls 2025-10-25 16:09:57 +02:00
7d9b983f93 Don't strip remote llama-cpp proxy prefix 2025-10-25 16:02:09 +02:00
889df3cb79 Add API key header for remote instances in proxy build 2025-10-25 14:14:39 +02:00
ff719f3ef9 Remove remote instance proxy handling from handlers 2025-10-25 14:07:11 +02:00
6a973fae2d Fix tests 2025-10-25 00:14:42 +02:00
58f8861d17 Switch manager to global app config 2025-10-25 00:14:12 +02:00
eff59a86fd Remove proxy, logger and process init from UnmarshalJSON 2025-10-24 23:41:33 +02:00
174d1772d6 Implement remote proxy handling in instance 2025-10-24 23:16:45 +02:00
4bbf45f0b9 Merge pull request #70 from lordmathis/refactor/manager
refactor: Split instance manager into single focus structs
2025-10-22 20:21:15 +02:00
a9fb0d613d Validate instance name in openai proxy 2025-10-22 18:55:57 +02:00
3b8bc658e3 Add name validation to backend handlers 2025-10-22 18:50:51 +02:00
c6053f6afd Remove old validation tests 2025-10-22 18:50:38 +02:00
c794e4f98b Move instance name validation to handlers 2025-10-22 18:40:39 +02:00
0f2c14d3ed Validate instance names to prevent injection attacks 2025-10-22 00:02:23 +02:00
13f3bed5fe Add URL encoding for instance name in API calls in webui 2025-10-21 23:36:26 +02:00
7c2c02ab2f Use url escape instead for instance name param 2025-10-21 23:24:27 +02:00
e0289ff42f Add instance name validation for URL safety and corresponding tests 2025-10-21 23:16:20 +02:00
bc025bbe28 Fix instance name validation 2025-10-21 22:57:23 +02:00
c6ebe47511 Fix path validation false positive 2025-10-21 22:47:41 +02:00
9bb106a1ce Remove deprecated operation mutex in instanceManager 2025-10-21 22:38:00 +02:00
bac18b5626 Unexport factory functions 2025-10-21 22:37:10 +02:00
2b51b4a47f Simplify manager tests 2025-10-21 22:30:08 +02:00
c44712e813 Remove redundant instance manager tests 2025-10-21 22:15:12 +02:00
6afe120a0e Implement more manager tests 2025-10-21 22:07:10 +02:00
4d05fcea46 Improve manager tests 2025-10-21 21:39:01 +02:00
7c64ab9cc6 Make StartInstance and StopInstance idempotent 2025-10-21 18:49:49 +02:00
62c431a041 Merge pull request #69 from lordmathis/dependabot/npm_and_yarn/webui/npm_and_yarn-fd296dbd23
Bump vite from 7.1.5 to 7.1.11 in /webui in the npm_and_yarn group across 1 directory
2025-10-21 18:40:46 +02:00
dependabot[bot]
e5f1b7c056 Bump vite in /webui in the npm_and_yarn group across 1 directory
Bumps the npm_and_yarn group with 1 update in the /webui directory: [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `vite` from 7.1.5 to 7.1.11
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.1.11/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-version: 7.1.11
  dependency-type: direct:development
  dependency-group: npm_and_yarn
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-21 00:23:03 +00:00
a2d4622486 Refactor instance locking mechanism to use per-instance locks for concurrency 2025-10-20 22:59:31 +02:00
d923732aba Delete unused code 2025-10-20 22:27:22 +02:00
1ae28a0b09 Unexport member struct methods 2025-10-20 22:22:09 +02:00
c537bc48b8 Refactor API path handling in remoteManager to use a constant for base path 2025-10-20 22:00:06 +02:00
ffb4b49c94 Split manager into multiple structs 2025-10-20 21:55:50 +02:00
91d956203d Merge pull request #68 from lordmathis/refactor/backend-options
refactor: Move all backend type switching to backends package
2025-10-19 21:04:09 +02:00
b25ad48605 Refactor backend options marshaling/unmarshaling 2025-10-19 20:48:05 +02:00
d8e0da9cf8 Refactor backend options to implement common interface and streamline validation 2025-10-19 20:36:57 +02:00
f42f000539 Implement mlx and cllm tests and remove redundant code 2025-10-19 19:45:31 +02:00
72fe780e31 Simplify instance tests 2025-10-19 19:14:32 +02:00
55a9450077 Fix instance tests 2025-10-19 19:08:38 +02:00
72586fc627 Simplify config tests 2025-10-19 19:06:06 +02:00
6a91fe13e0 Fix local node override tests 2025-10-19 18:59:59 +02:00
51a7ac590e Fix preventing local proxy usage for remote instances 2025-10-19 18:55:56 +02:00
82f4f7beed Ensure local node is defined in LoadConfig by adding default config if missing 2025-10-19 18:47:02 +02:00
ec65ba8968 Add debug files to .gitignore 2025-10-19 18:39:46 +02:00
867380a06d Remove GetBackendSettings method from config 2025-10-19 18:32:05 +02:00
3500971f03 Fix JSON marshaling of backend options by using a pointer 2025-10-19 18:27:22 +02:00
9da2433a7c Refactor instance and manager tests to use BackendOptions structure 2025-10-19 18:07:14 +02:00
55f671c354 Refactor backend options handling and validation 2025-10-19 17:41:08 +02:00
2a7010d0e1 Flatten backends package structure 2025-10-19 15:50:42 +02:00
f209bc88b6 Update .gitignore and launch configuration for dev environment 2025-10-19 15:50:30 +02:00
3fffcc5b37 Merge pull request #67 from lordmathis/refactor/instance-split
refactor: Split instance struct into status, options, logger, proxy and process for better maintenance
2025-10-18 13:23:50 +02:00
851c73f058 Add tests for status change callback and options preservation 2025-10-18 13:19:01 +02:00
8ac4b370c9 Unexport struct methods 2025-10-18 11:25:26 +02:00
a7740000d2 Refactor instance creation to initialize logger, proxy, and process only for local instances 2025-10-18 10:39:04 +02:00
b13f8c471d Split off process struct 2025-10-18 10:28:15 +02:00
3f834004a8 Rename NewInstance to New 2025-10-18 00:34:18 +02:00
113b51eda2 Refactor instance node handling to use a map 2025-10-18 00:33:16 +02:00
7bf0809122 Fix test compilation after merge
Update instance tests to use correct type names:
- CreateInstanceOptions -> Options
- InstanceStatus -> Status

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 00:13:53 +02:00
a1ffdb02a4 Merge main into refactor/instance-split
Resolved conflicts in:
- pkg/instance/instance.go: Combined remote detection logic from main with refactored structure
- pkg/manager/manager_test.go: Updated manager initialization to include localNodeName parameter
- pkg/manager/remote_ops.go: Removed stripNodesFromOptions function that was deleted in main
- pkg/manager/remote_ops_test.go: Removed file that was deleted in main

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 00:10:09 +02:00
eb5abae173 Merge pull request #66 from lordmathis/fix/disable-node-edit
fix: Prevent node change on update
2025-10-16 22:37:59 +02:00
696a2cb18b Prevent node change on update 2025-10-16 22:35:29 +02:00
e7402f0029 Merge pull request #65 from lordmathis/fix/local-node
fix: Detect local instances based on local node in nodes array
2025-10-16 22:28:01 +02:00
5c9a397746 Fix get local proxy 2025-10-16 22:11:29 +02:00
e97ca727d1 Clarify node configuration in docs 2025-10-16 21:50:06 +02:00
9f3c01384b Remove stripNodesFromOptions function 2025-10-16 21:29:27 +02:00
c5097e59be Fix local instance detection 2025-10-16 21:26:04 +02:00
4b30791be2 Refactor instance options structure and related code 2025-10-16 20:53:24 +02:00
a96ed4d797 Fix status json tag static check 2025-10-16 20:22:12 +02:00
5afc22924f Refactor Status struct 2025-10-16 20:15:22 +02:00
e0ec00d141 Remove rendundant instance prefix from status 2025-10-16 19:40:03 +02:00
80ca0cbd4f Rename Process to Instance 2025-10-16 19:38:44 +02:00
964c6345ef Refactor backend host/port retrieval and remove redundant code for health checks 2025-10-14 22:16:26 +02:00
92a76bc84b Move proxy to separate struct 2025-10-14 22:01:09 +02:00
02909c5153 Remove redundant instance prefix from logger 2025-10-14 19:46:43 +02:00
ef3478e2a3 Move logging to separate struct 2025-10-14 19:32:15 +02:00
cf20f304b3 Merge pull request #61 from lordmathis/fix/docs-formatting
fix: Add MkDocs hook to fix line endings in markdown files
2025-10-09 23:28:09 +02:00
72eba48b80 Add MkDocs hook to fix line endings in markdown files 2025-10-09 23:23:17 +02:00
c3037f914d Merge pull request #60 from lordmathis/lordmathis-patch-1
Update docs.yaml
2025-10-09 22:31:38 +02:00
81266b4bc4 Update docs.yaml 2025-10-09 22:29:23 +02:00
a31af94e7b Merge pull request #59 from lordmathis/feat/multi-host
feat: Implement multi node support
2025-10-09 22:23:27 +02:00
9ee0a184b3 Re-validate instance name in DeleteInstance for improved security 2025-10-09 22:18:53 +02:00
5436c28a1f Add instance name validation before deletion for security 2025-10-09 22:10:40 +02:00
73b9dd5bc7 Rename workflows for consistency 2025-10-09 21:53:14 +02:00
f61e8dad5c Add User Docs badge to README 2025-10-09 21:51:38 +02:00
ab2770bdd9 Add documentation for remote node deployment and configuration 2025-10-09 21:50:39 +02:00
e7a6a7003e Skip remote instances in checkAllTimeouts and EvictLRUInstance methods 2025-10-09 21:13:38 +02:00
2b950ee649 Implement updateLocalInstanceFromRemote to preserve Nodes field when syncing remote instance data 2025-10-09 20:39:21 +02:00
b965b77c18 Prevent remote instances from using local proxy in GetProxy method 2025-10-09 20:24:54 +02:00
8a16a195de Fix getting remote instance logs 2025-10-09 20:22:32 +02:00
9684a8a09b Enhance instance management to preserve local state for remote instances 2025-10-09 19:34:52 +02:00
9d5f01d4ae Auto-select first node in InstanceSettingsCard if none is selected 2025-10-09 19:13:58 +02:00
e281708b20 Enhance auto-start logic to differentiate between remote and local instances 2025-10-09 18:56:23 +02:00
8d9b0c0621 Initialize timeProvider and logger in UnmarshalJSON for Process 2025-10-09 18:56:12 +02:00
6c1a76691d Improve cleanup of options in InstanceDialog to skip empty strings and arrays 2025-10-09 18:49:36 +02:00
5d958ed283 Fix backend_options cleanup to exclude empty arrays in InstanceDialog 2025-10-09 18:38:33 +02:00
56b95d1243 Refactor InstanceSettingsCard and API types to use NodesMap 2025-10-08 19:52:39 +02:00
688b815ca7 Add LocalNode configuration 2025-10-08 19:43:53 +02:00
7f6725da96 Refactor NodeConfig handling to use a map 2025-10-08 19:24:24 +02:00
3418735204 Add stripNodesFromOptions function to prevent routing loops in remote requests 2025-10-07 20:27:31 +02:00
2f1cf5acdc Refactor CreateRemoteInstance and UpdateRemoteInstance to directly use options parameter in API requests 2025-10-07 19:57:21 +02:00
01380e6641 Update instance manager tests to use empty NodeConfig slice 2025-10-07 19:18:13 +02:00
6298b03636 Refactor RemoteOpenAIProxy to use cached proxies and restore request body handling 2025-10-07 18:57:08 +02:00
aae3f84d49 Implement caching for remote instance proxies and enhance proxy request handling 2025-10-07 18:44:23 +02:00
554796391b Remove test config file 2025-10-07 18:05:30 +02:00
16b28bac05 Merge branch 'main' into feat/multi-host 2025-10-07 18:04:24 +02:00
1892dc8315 Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy
feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`
2025-10-06 20:23:44 +02:00
Anuruth Lertpiya
997bd1b063 Changed status code to StatusBadRequest (400) if requested invalid model name. 2025-10-05 14:53:20 +00:00
Anuruth Lertpiya
fa43f9e967 Added support for proxying llama.cpp native API endpoints via /llama-cpp/{name}/ 2025-10-05 14:28:33 +00:00
db9eebeb8b Merge pull request #56 from lordmathis/fix/body-already-read
Fix double read of json response when content-length header is missing
2025-10-04 22:28:22 +02:00
bd062f8ca0 Mock Response.clone for tests 2025-10-04 22:22:25 +02:00
8ebdb1a183 Fix double read of json response when content-length header is missing 2025-10-04 22:16:28 +02:00
7272212081 Merge pull request #55 from lordmathis/fix/auto-restart
fix: Set status to Stopped for instances with auto-restart disabled
2025-10-04 21:45:12 +02:00
035e184789 Merge branch 'main' into fix/auto-restart 2025-10-04 21:22:50 +02:00
d15976e7aa Implement auto-stop for instances with auto-restart disabled and add corresponding tests 2025-10-04 21:17:55 +02:00
4fa75d9801 Merge pull request #52 from BobbyL2k/feat/config-cors-headers
feat: Added support for configuring access-control-request-headers for CORS
2025-10-04 20:45:27 +02:00
Anuruth Lertpiya
0e1bc8a352 Added support for configuring CORS headers 2025-10-04 09:13:40 +00:00
b728a7c6b2 Fix fetchNodes call to ensure proper handling of promise 2025-10-03 10:53:29 +02:00
a491f29483 Add node selection functionality to InstanceSettingsCard and define Node API 2025-10-02 23:18:33 +02:00
670f8ff81b Split up handlers 2025-10-02 23:11:20 +02:00
da56456504 Add node management endpoints to handle listing and retrieving node details 2025-10-02 22:51:41 +02:00
c30053e51c Enhance instance loading to support remote instances and handle node configuration 2025-10-01 22:59:45 +02:00
347c58e15f Enhance instance manager to persist remote instances and update tracking on modifications 2025-10-01 22:58:57 +02:00
2ed67eb672 Add remote instance proxying functionality to handler 2025-10-01 22:17:19 +02:00
0188f82306 Implement remote instance creation and deletion in instance manager 2025-10-01 22:05:18 +02:00
e0f176de10 Enhance instance manager to support remote instance management and update related tests 2025-10-01 20:25:06 +02:00
2759be65a5 Add remote instance management functionality and configuration support 2025-09-30 21:09:05 +02:00
1e5e86d2c3 Merge pull request #50 from lordmathis/feat/docker-image
feat: Add Dockerfiles for running llamactl in docker
2025-09-29 21:26:23 +02:00
25d3d70707 Update README and installation guide to reflect Dockerfile paths and add source build instructions 2025-09-29 21:18:13 +02:00
e54cfd006d Add Dockerfile for building from source 2025-09-29 21:17:40 +02:00
7d39e7ee86 Move docker stuff to a dedicated folder 2025-09-29 21:16:51 +02:00
222d913b4a Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support
Added support for serving behind a reverse proxy
2025-09-29 20:32:11 +02:00
Anuruth Lertpiya
03a7a5d139 Update configration.md with reverse proxy related information 2025-09-29 13:54:15 +00:00
Anuruth Lertpiya
e50660c379 Fixed broken webui tests 2025-09-29 13:38:24 +00:00
Anuruth Lertpiya
5906d89f8d Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-09-29 12:43:10 +00:00
cb2d95139f Setup data dir in Docker and docker-compose 2025-09-28 22:17:38 +02:00
889a8707e7 Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands 2025-09-28 22:17:38 +02:00
070c91787d Add environment variable for llamactl command in Dockerfile 2025-09-28 22:17:38 +02:00
169ee422ec Update README and installation guide to clarify Docker support and CUDA configuration 2025-09-28 22:17:38 +02:00
bb0176b7f5 Update Dockerfile to use server-cuda image for improved performance 2025-09-28 22:17:38 +02:00
291ec7995f Update Docker run commands to use cached directories and remove unnecessary environment variables 2025-09-28 22:17:38 +02:00
b940b38e46 Initial support for docker 2025-09-28 22:17:38 +02:00
92cb57e816 Merge pull request #48 from lordmathis/fix/command-environment
fix: Pass host environment to instances
2025-09-28 21:40:50 +02:00
0ecd55c354 Start with host environment for instances 2025-09-28 21:37:48 +02:00
b4c17194eb Merge pull request #47 from lordmathis/fix/nil-context
fix: Initialize context before building command
2025-09-28 20:59:30 +02:00
808092decf Initialize context in Start method for command execution 2025-09-28 20:51:11 +02:00
12bbf34236 Merge pull request #46 from lordmathis/feat/env-vars
feat: Add support for passing env vars to instances
2025-09-28 15:42:02 +02:00
9a7255a52d Refactor Docker support section in README for clarity and conciseness 2025-09-28 15:31:50 +02:00
97a7c9a4e3 Detail env var support in docs 2025-09-28 15:29:43 +02:00
fa9335663a Parse backend env vars from env vars 2025-09-28 15:22:01 +02:00
d092518114 Update documentation 2025-09-28 15:10:35 +02:00
ffa0a0c161 Remove ZodFormField and BasicInstanceFields components 2025-09-28 14:42:10 +02:00
1fbf809a2d Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard 2025-09-28 14:42:10 +02:00
c984d95723 Add environment variable support to instance options and command building 2025-09-28 14:42:10 +02:00
50e1355205 Add environment field to BackendSettings for improved configuration 2025-09-28 14:42:10 +02:00
7994fd05b3 Merge pull request #44 from BobbyL2k/fix/rel-dir-config
fix: InstancesDir and LogsDir not being relative path to DataDir when not set
2025-09-27 21:33:00 +02:00
Anuruth Lertpiya
f496a28f04 fix: InstancesDir and LogsDir not being relative path to DataDir when not set 2025-09-27 18:14:25 +00:00
f9371e876d Merge pull request #43 from BobbyL2k/fix/config-path
fix: llamactl reads config file per documentation
2025-09-27 19:32:13 +02:00
Anuruth Lertpiya
3a979da815 fix: llamactl reads config file per documentation
- Added logging to track config file reading operations
- llamactl now properly reads config files from the expected locations ("llamactl.yaml" and "config.yaml" under current directory)
2025-09-27 17:03:54 +00:00
a824f066ec Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
2025-09-25 23:07:24 +02:00
2cd9d374a7 Add Docker badge to UI 2025-09-25 23:04:24 +02:00
031d6c7017 Update Docker command arguments for llama-server and vllm with volume mounts 2025-09-25 22:51:51 +02:00
282344af23 Fix docker command args building 2025-09-25 22:51:40 +02:00
bc9e0535c3 Refactor command building and argument handling 2025-09-25 22:05:46 +02:00
2d925b473d Add Docker support documentation and configuration for backends 2025-09-24 22:15:21 +02:00
ba0f877185 Fix tests 2025-09-24 21:35:44 +02:00
840a7bc650 Add Docker command handling for backend options and refactor command building 2025-09-24 21:34:54 +02:00
76ac93bedc Implement Docker command handling for Llama, MLX, and vLLM backends 2025-09-24 21:31:58 +02:00
72d2a601c8 Update Docker args in LoadConfig and tests to include 'run --rm' prefix 2025-09-24 21:27:51 +02:00
9a56660f68 Refactor backend configuration to use structured settings and update environment variable handling 2025-09-24 20:31:20 +02:00
164 changed files with 22329 additions and 10552 deletions

45
.dockerignore Normal file
View File

@@ -0,0 +1,45 @@
# Git and version control
.git/
.gitignore
# Documentation
*.md
docs/
# Development files
.vscode/
.idea/
# Build artifacts
webui/node_modules/
webui/dist/
webui/.next/
*.log
*.tmp
# Data directories
data/
models/
logs/
# Test files
*_test.go
**/*_test.go
# CI/CD
.github/
# Local configuration
llamactl.yaml
config.yaml
.env
.env.local
# OS files
.DS_Store
Thumbs.db
# Backup files
*.bak
*.backup
*~

40
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
version: 2
updates:
# Go modules
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"
day: "monday"
open-pull-requests-limit: 5
commit-message:
prefix: "chore"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]
groups:
go-dependencies:
patterns:
- "*"
labels:
- "dependencies"
# npm dependencies for webui
- package-ecosystem: "npm"
directory: "/webui"
schedule:
interval: "weekly"
day: "monday"
open-pull-requests-limit: 5
commit-message:
prefix: "chore"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]
groups:
npm-production:
dependency-type: "production"
npm-development:
dependency-type: "development"
labels:
- "dependencies"

View File

@@ -1,4 +1,4 @@
name: Build and Deploy Documentation
name: User Docs
on:
push:

View File

@@ -45,15 +45,23 @@ jobs:
build:
name: Build Binaries
needs: build-webui
runs-on: ubuntu-latest
runs-on: ${{ matrix.runner }}
strategy:
matrix:
goos: [linux, windows, darwin]
goarch: [amd64, arm64]
exclude:
# Windows ARM64 support is limited
- goos: windows
include:
- goos: linux
goarch: amd64
runner: ubuntu-latest
- goos: linux
goarch: arm64
runner: ubuntu-latest
cc: aarch64-linux-gnu-gcc
- goos: darwin
goarch: arm64
runner: macos-latest
- goos: windows
goarch: amd64
runner: windows-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
@@ -70,11 +78,19 @@ jobs:
name: webui-dist
path: webui/dist/
- name: Install cross-compilation tools (Linux ARM64 only)
if: matrix.cc != ''
run: |
sudo apt-get update
sudo apt-get install -y gcc-aarch64-linux-gnu
- name: Build binary
env:
GOOS: ${{ matrix.goos }}
GOARCH: ${{ matrix.goarch }}
CGO_ENABLED: 0
CGO_ENABLED: 1
CC: ${{ matrix.cc }}
shell: bash
run: |
# Set binary extension for Windows
BINARY_NAME="llamactl"
@@ -91,8 +107,10 @@ jobs:
ARCHIVE_OS="macos"
fi
ARCHIVE_NAME="llamactl-${{ github.ref_name }}-${ARCHIVE_OS}-${{ matrix.goarch }}"
if [ "${{ matrix.goos }}" = "windows" ]; then
zip "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
# Use 7z on Windows (pre-installed)
7z a "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
echo "ASSET_PATH=${ARCHIVE_NAME}.zip" >> $GITHUB_ENV
else
tar -czf "${ARCHIVE_NAME}.tar.gz" "${BINARY_NAME}"
@@ -179,4 +197,4 @@ jobs:
with:
files: assets/checksums.txt
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

13
.gitignore vendored
View File

@@ -34,4 +34,15 @@ go.work.sum
node_modules/
dist/
__pycache__/
__pycache__/
site/
# Dev config
llamactl.dev.yaml
# Debug files
__debug*
# Binary
llamactl-*

5
.vscode/launch.json vendored
View File

@@ -9,11 +9,12 @@
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"program": "${workspaceFolder}/cmd/server",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
"LLAMACTL_CONFIG_PATH": "${workspaceFolder}/llamactl.dev.yaml"
},
"console": "integratedTerminal",
}
]
}

View File

@@ -86,7 +86,7 @@ go install github.com/swaggo/swag/cmd/swag@latest
# Update Swagger comments in pkg/server/handlers.go
# Then regenerate docs
swag init -g cmd/server/main.go -o apidocs
swag init -g cmd/server/main.go
```
## Pull Request Guidelines

252
README.md
View File

@@ -1,104 +1,35 @@
# llamactl
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
## Features
### 🚀 Easy Model Management
- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
- **State Persistence**: Ensure instances remain intact across server restarts
### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
### 🌐 User-Friendly Interface
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **API Key Authentication**: Separate keys for management vs inference access
### ⚡ Smart Operations
- **Instance Monitoring**: Health checks, auto-restart, log management
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
📚 **[Full Documentation →](https://llamactl.org)**
![Dashboard Screenshot](docs/images/dashboard.png)
## Features
**🚀 Easy Model Management**
- **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality)
- **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits
- **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs
**🔗 Flexible Integration**
- **OpenAI API Compatible**: Drop-in replacement - route requests to different models by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
- **Docker Ready**: Run backends in containers with full GPU support
**🌐 Distributed Deployment**
- **Remote Instances**: Deploy instances on remote hosts
- **Central Management**: Manage everything from a single dashboard with automatic routing
## Quick Start
```bash
# 1. Install backend (one-time setup)
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
# For MLX on macOS: pip install mlx-lm
# For vLLM: pip install vllm
# 2. Download and run llamactl
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# 3. Start the server
llamactl
# Access dashboard at http://localhost:8080
```
## Usage
### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Set model path and backend-specific options
5. Start or stop the instance
### Or use the REST API:
```bash
# Create llama.cpp instance
curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
# Create MLX instance (macOS)
curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
# Create vLLM instance
curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-H "Authorization: Bearer your-key" \
-d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
# Use with OpenAI SDK
curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \
-d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
```
## Installation
### Option 1: Download Binary (Recommended)
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from the releases page:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
1. Install a backend (llama.cpp, MLX, or vLLM) - see [Prerequisites](#prerequisites) below
2. [Download llamactl](#installation) for your platform
3. Run `llamactl` and open http://localhost:8080
4. Create an instance and start inferencing!
## Prerequisites
@@ -112,6 +43,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
brew install llama.cpp
# Or build from source - see llama.cpp docs
# Or use Docker - no local installation required
```
**For MLX backend (macOS only):**
@@ -139,9 +71,76 @@ python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
# Or use Docker - no local installation required
```
### Docker Support
llamactl can run backends in Docker containers, eliminating the need for local backend installation:
```yaml
backends:
llama-cpp:
docker:
enabled: true
vllm:
docker:
enabled: true
```
## Installation
### Option 1: Download Binary (Recommended)
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from the releases page:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Docker (No local backend installation required)
```bash
# Clone repository and build Docker images
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
mkdir -p data/llamacpp data/vllm models
# Build and start llamactl with llama.cpp CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Build and start llamactl with vLLM CUDA backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
# Build from source using multi-stage build
docker build -f docker/Dockerfile.source -t llamactl:source .
```
**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
### Option 3: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
## Usage
1. Open http://localhost:8080
2. Click "Create Instance"
3. Choose backend type (llama.cpp, MLX, or vLLM)
4. Configure your model and options (ports and API keys are auto-assigned)
5. Start the instance and use it with any OpenAI-compatible client
## Configuration
llamactl works out of the box with sensible defaults.
@@ -151,32 +150,63 @@ server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
vllm_executable: vllm # Path to vllm executable
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "-v", "~/.local/share/llamactl/llama.cpp:/root/.cache/llama.cpp"]
environment: {} # Environment variables for the container
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g", "-v", "~/.local/share/llamactl/huggingface:/root/.cache/huggingface"]
environment: {} # Environment variables for the container
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
data_dir: ~/.local/share/llamactl # Main data directory (database, instances, logs), default varies by OS
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
port_range: [8000, 9000] # Port range for instances
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory (platform dependent) [deprecated]
logs_dir: ~/.local/share/llamactl/logs # Logs directory (platform dependent)
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
log_rotation_enabled: true # Enable log rotation (default: true)
log_rotation_max_size: 100 # Max log file size in MB before rotation (default: 100)
log_rotation_compress: false # Compress rotated log files (default: false)
database:
path: ~/.local/share/llamactl/llamactl.db # Database file path (platform dependent)
max_open_connections: 25 # Maximum open database connections
max_idle_connections: 5 # Maximum idle database connections
connection_max_lifetime: 5m # Connection max lifetime
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_inference_auth: true # Require auth for inference endpoints, API keys are created in web UI
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```

View File

@@ -1,972 +0,0 @@
// Package apidocs Code generated by swaggo/swag. DO NOT EDIT
package apidocs
import "github.com/swaggo/swag"
const docTemplate = `{
"schemes": {{ marshal .Schemes }},
"swagger": "2.0",
"info": {
"description": "{{escape .Description}}",
"title": "{{.Title}}",
"contact": {},
"license": {
"name": "MIT License",
"url": "https://opensource.org/license/mit/"
},
"version": "{{.Version}}"
},
"host": "{{.Host}}",
"basePath": "{{.BasePath}}",
"paths": {
"/backends/llama-cpp/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"backends"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"backends"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/llama-cpp/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"backends"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all instances managed by the server",
"tags": [
"instances"
],
"summary": "List all instances",
"responses": {
"200": {
"description": "List of instances",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/instance.Process"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the details of a specific instance by name",
"tags": [
"instances"
],
"summary": "Get details of a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"put": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Updates the configuration of a specific instance by name",
"consumes": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Update an instance's configuration",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
"responses": {
"200": {
"description": "Updated instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Creates a new instance with the provided configuration options",
"consumes": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Create and start a new instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
"responses": {
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid request body",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops and removes a specific instance by name",
"tags": [
"instances"
],
"summary": "Delete an instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"204": {
"description": "No Content"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/logs": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [
"instances"
],
"summary": "Get logs from a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Number of lines to retrieve (default: all lines)",
"name": "lines",
"in": "query"
}
],
"responses": {
"200": {
"description": "Instance logs",
"schema": {
"type": "string"
}
},
"400": {
"description": "Invalid name format or lines parameter",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/proxy": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [
"instances"
],
"summary": "Proxy requests to a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Request successfully proxied to instance"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
},
"503": {
"description": "Instance is not running",
"schema": {
"type": "string"
}
}
}
},
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [
"instances"
],
"summary": "Proxy requests to a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Request successfully proxied to instance"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
},
"503": {
"description": "Instance is not running",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/restart": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Restarts a specific instance by name",
"tags": [
"instances"
],
"summary": "Restart a running instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Restarted instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/start": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Starts a specific instance by name",
"tags": [
"instances"
],
"summary": "Start a stopped instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Started instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/stop": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops a specific instance by name",
"tags": [
"instances"
],
"summary": "Stop a running instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Stopped instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the ` + "`" + `Authorization` + "`" + ` header.",
"consumes": [
"application/json"
],
"tags": [
"openai"
],
"summary": "OpenAI-compatible proxy endpoint",
"responses": {
"200": {
"description": "OpenAI response"
},
"400": {
"description": "Invalid request body or instance name",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/models": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of instances in a format compatible with OpenAI API",
"tags": [
"openai"
],
"summary": "List instances in OpenAI-compatible format",
"responses": {
"200": {
"description": "List of OpenAI-compatible instances",
"schema": {
"$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
}
},
"definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"backend_options": {
"type": "object",
"additionalProperties": {}
},
"backend_type": {
"$ref": "#/definitions/backends.BackendType"
},
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"restart_delay": {
"description": "seconds",
"type": "integer"
}
}
},
"instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object",
"properties": {
"created": {
"description": "Creation time",
"type": "integer"
},
"name": {
"type": "string"
},
"status": {
"description": "Status",
"allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
}
}
},
"server.OpenAIInstance": {
"type": "object",
"properties": {
"created": {
"type": "integer"
},
"id": {
"type": "string"
},
"object": {
"type": "string"
},
"owned_by": {
"type": "string"
}
}
},
"server.OpenAIListInstancesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/definitions/server.OpenAIInstance"
}
},
"object": {
"type": "string"
}
}
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
}
}
}`
// SwaggerInfo holds exported Swagger Info so clients can modify it
var SwaggerInfo = &swag.Spec{
Version: "1.0",
Host: "",
BasePath: "/api/v1",
Schemes: []string{},
Title: "llamactl API",
Description: "llamactl is a control server for managing Llama Server instances.",
InfoInstanceName: "swagger",
SwaggerTemplate: docTemplate,
LeftDelim: "{{",
RightDelim: "}}",
}
func init() {
swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
}

View File

@@ -1,947 +0,0 @@
{
"swagger": "2.0",
"info": {
"description": "llamactl is a control server for managing Llama Server instances.",
"title": "llamactl API",
"contact": {},
"license": {
"name": "MIT License",
"url": "https://opensource.org/license/mit/"
},
"version": "1.0"
},
"basePath": "/api/v1",
"paths": {
"/backends/llama-cpp/devices": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server",
"tags": [
"backends"
],
"summary": "List available devices for llama server",
"responses": {
"200": {
"description": "List of devices",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/help": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command",
"tags": [
"backends"
],
"summary": "Get help for llama server",
"responses": {
"200": {
"description": "Help text",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/llama-cpp/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a llama-server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse llama-server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/llama-cpp/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command",
"tags": [
"backends"
],
"summary": "Get version of llama server",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/backends/mlx/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses MLX-LM server command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse mlx_lm.server command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/backends/vllm/parse-command": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Parses a vLLM serve command string into instance options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"backends"
],
"summary": "Parse vllm serve command",
"parameters": [
{
"description": "Command to parse",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.ParseCommandRequest"
}
}
],
"responses": {
"200": {
"description": "Parsed options",
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
},
"400": {
"description": "Invalid request or command",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/instances": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all instances managed by the server",
"tags": [
"instances"
],
"summary": "List all instances",
"responses": {
"200": {
"description": "List of instances",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/instance.Process"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the details of a specific instance by name",
"tags": [
"instances"
],
"summary": "Get details of a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"put": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Updates the configuration of a specific instance by name",
"consumes": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Update an instance's configuration",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
"responses": {
"200": {
"description": "Updated instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Creates a new instance with the provided configuration options",
"consumes": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Create and start a new instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/instance.CreateInstanceOptions"
}
}
],
"responses": {
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid request body",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops and removes a specific instance by name",
"tags": [
"instances"
],
"summary": "Delete an instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"204": {
"description": "No Content"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/logs": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [
"instances"
],
"summary": "Get logs from a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Number of lines to retrieve (default: all lines)",
"name": "lines",
"in": "query"
}
],
"responses": {
"200": {
"description": "Instance logs",
"schema": {
"type": "string"
}
},
"400": {
"description": "Invalid name format or lines parameter",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/proxy": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [
"instances"
],
"summary": "Proxy requests to a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Request successfully proxied to instance"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
},
"503": {
"description": "Instance is not running",
"schema": {
"type": "string"
}
}
}
},
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [
"instances"
],
"summary": "Proxy requests to a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Request successfully proxied to instance"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
},
"503": {
"description": "Instance is not running",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/restart": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Restarts a specific instance by name",
"tags": [
"instances"
],
"summary": "Restart a running instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Restarted instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/start": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Starts a specific instance by name",
"tags": [
"instances"
],
"summary": "Start a stopped instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Started instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/stop": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops a specific instance by name",
"tags": [
"instances"
],
"summary": "Stop a running instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Stopped instance details",
"schema": {
"$ref": "#/definitions/instance.Process"
}
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.",
"consumes": [
"application/json"
],
"tags": [
"openai"
],
"summary": "OpenAI-compatible proxy endpoint",
"responses": {
"200": {
"description": "OpenAI response"
},
"400": {
"description": "Invalid request body or instance name",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/v1/models": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of instances in a format compatible with OpenAI API",
"tags": [
"openai"
],
"summary": "List instances in OpenAI-compatible format",
"responses": {
"200": {
"description": "List of OpenAI-compatible instances",
"schema": {
"$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
}
},
"definitions": {
"backends.BackendType": {
"type": "string",
"enum": [
"llama_cpp",
"mlx_lm",
"vllm"
],
"x-enum-varnames": [
"BackendTypeLlamaCpp",
"BackendTypeMlxLm",
"BackendTypeVllm"
]
},
"instance.CreateInstanceOptions": {
"type": "object",
"properties": {
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"backend_options": {
"type": "object",
"additionalProperties": {}
},
"backend_type": {
"$ref": "#/definitions/backends.BackendType"
},
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"restart_delay": {
"description": "seconds",
"type": "integer"
}
}
},
"instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object",
"properties": {
"created": {
"description": "Creation time",
"type": "integer"
},
"name": {
"type": "string"
},
"status": {
"description": "Status",
"allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
}
}
},
"server.OpenAIInstance": {
"type": "object",
"properties": {
"created": {
"type": "integer"
},
"id": {
"type": "string"
},
"object": {
"type": "string"
},
"owned_by": {
"type": "string"
}
}
},
"server.OpenAIListInstancesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/definitions/server.OpenAIInstance"
}
},
"object": {
"type": "string"
}
}
},
"server.ParseCommandRequest": {
"type": "object",
"properties": {
"command": {
"type": "string"
}
}
}
}
}

View File

@@ -1,606 +0,0 @@
basePath: /api/v1
definitions:
backends.BackendType:
enum:
- llama_cpp
- mlx_lm
- vllm
type: string
x-enum-varnames:
- BackendTypeLlamaCpp
- BackendTypeMlxLm
- BackendTypeVllm
instance.CreateInstanceOptions:
properties:
auto_restart:
description: Auto restart
type: boolean
backend_options:
additionalProperties: {}
type: object
backend_type:
$ref: '#/definitions/backends.BackendType'
idle_timeout:
description: Idle timeout
type: integer
max_restarts:
type: integer
on_demand_start:
description: On demand start
type: boolean
restart_delay:
description: seconds
type: integer
type: object
instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process:
properties:
created:
description: Creation time
type: integer
name:
type: string
status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status
type: object
server.OpenAIInstance:
properties:
created:
type: integer
id:
type: string
object:
type: string
owned_by:
type: string
type: object
server.OpenAIListInstancesResponse:
properties:
data:
items:
$ref: '#/definitions/server.OpenAIInstance'
type: array
object:
type: string
type: object
server.ParseCommandRequest:
properties:
command:
type: string
type: object
info:
contact: {}
description: llamactl is a control server for managing Llama Server instances.
license:
name: MIT License
url: https://opensource.org/license/mit/
title: llamactl API
version: "1.0"
paths:
/backends/llama-cpp/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- backends
/backends/llama-cpp/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- backends
/backends/llama-cpp/parse-command:
post:
consumes:
- application/json
description: Parses a llama-server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
"500":
description: Internal Server Error
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse llama-server command
tags:
- backends
/backends/llama-cpp/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- backends
/backends/mlx/parse-command:
post:
consumes:
- application/json
description: Parses MLX-LM server command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse mlx_lm.server command
tags:
- backends
/backends/vllm/parse-command:
post:
consumes:
- application/json
description: Parses a vLLM serve command string into instance options
parameters:
- description: Command to parse
in: body
name: request
required: true
schema:
$ref: '#/definitions/server.ParseCommandRequest'
produces:
- application/json
responses:
"200":
description: Parsed options
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
"400":
description: Invalid request or command
schema:
additionalProperties:
type: string
type: object
security:
- ApiKeyAuth: []
summary: Parse vllm serve command
tags:
- backends
/instances:
get:
description: Returns a list of all instances managed by the server
responses:
"200":
description: List of instances
schema:
items:
$ref: '#/definitions/instance.Process'
type: array
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List all instances
tags:
- instances
/instances/{name}:
delete:
description: Stops and removes a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"204":
description: No Content
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Delete an instance
tags:
- instances
get:
description: Returns the details of a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Instance details
schema:
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get details of a specific instance
tags:
- instances
post:
consumes:
- application/json
description: Creates a new instance with the provided configuration options
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Instance configuration options
in: body
name: options
required: true
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/instance.Process'
"400":
description: Invalid request body
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Create and start a new instance
tags:
- instances
put:
consumes:
- application/json
description: Updates the configuration of a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Instance configuration options
in: body
name: options
required: true
schema:
$ref: '#/definitions/instance.CreateInstanceOptions'
responses:
"200":
description: Updated instance details
schema:
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Update an instance's configuration
tags:
- instances
/instances/{name}/logs:
get:
description: Returns the logs from a specific instance by name with optional
line limit
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: 'Number of lines to retrieve (default: all lines)'
in: query
name: lines
type: string
responses:
"200":
description: Instance logs
schema:
type: string
"400":
description: Invalid name format or lines parameter
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get logs from a specific instance
tags:
- instances
/instances/{name}/proxy:
get:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Request successfully proxied to instance
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
"503":
description: Instance is not running
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance
tags:
- instances
post:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Request successfully proxied to instance
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
"503":
description: Instance is not running
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance
tags:
- instances
/instances/{name}/restart:
post:
description: Restarts a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Restarted instance details
schema:
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Restart a running instance
tags:
- instances
/instances/{name}/start:
post:
description: Starts a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Started instance details
schema:
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Start a stopped instance
tags:
- instances
/instances/{name}/stop:
post:
description: Stops a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Stopped instance details
schema:
$ref: '#/definitions/instance.Process'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Stop a running instance
tags:
- instances
/v1/:
post:
consumes:
- application/json
description: Handles all POST requests to /v1/*, routing to the appropriate
instance based on the request body. Requires API key authentication via the
`Authorization` header.
responses:
"200":
description: OpenAI response
"400":
description: Invalid request body or instance name
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: OpenAI-compatible proxy endpoint
tags:
- openai
/v1/models:
get:
description: Returns a list of instances in a format compatible with OpenAI
API
responses:
"200":
description: List of OpenAI-compatible instances
schema:
$ref: '#/definitions/server.OpenAIListInstancesResponse'
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List instances in OpenAI-compatible format
tags:
- openai
/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- version
swagger: "2.0"

View File

@@ -1,14 +1,18 @@
package main
import (
"context"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)
// version is set at build time using -ldflags "-X main.version=1.0.0"
@@ -22,6 +26,9 @@ var buildTime string = "unknown"
// @license.name MIT License
// @license.url https://opensource.org/license/mit/
// @basePath /api/v1
// @securityDefinitions.apikey ApiKeyAuth
// @in header
// @name X-API-Key
func main() {
// --version flag to print the version
@@ -35,8 +42,7 @@ func main() {
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.")
log.Printf("Error loading config: %v\nUsing default configuration.", err)
}
// Set version information
@@ -44,24 +50,50 @@ func main() {
cfg.CommitHash = commitHash
cfg.BuildTime = buildTime
// Create the data directory if it doesn't exist
// Create data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
// Create the main data directory
if err := os.MkdirAll(cfg.DataDir, 0755); err != nil {
log.Printf("Error creating data directory %s: %v\nData persistence may not be available.", cfg.DataDir, err)
}
// Create instances directory
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
log.Printf("Error creating instances directory %s: %v\nPersistence will not be available.", cfg.Instances.InstancesDir, err)
}
// Create logs directory
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err)
}
}
// Initialize the instance manager
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
// Initialize database
db, err := database.Open(&database.Config{
Path: cfg.Database.Path,
MaxOpenConnections: cfg.Database.MaxOpenConnections,
MaxIdleConnections: cfg.Database.MaxIdleConnections,
ConnMaxLifetime: cfg.Database.ConnMaxLifetime,
})
if err != nil {
log.Fatalf("Failed to open database: %v", err)
}
// Run database migrations
if err := database.RunMigrations(db); err != nil {
log.Fatalf("Failed to run database migrations: %v", err)
}
// Migrate from JSON files if needed (one-time migration)
if err := migrateFromJSON(&cfg, db); err != nil {
log.Printf("Warning: Failed to migrate from JSON: %v", err)
}
// Initialize the instance manager with dependency injection
instanceManager := manager.New(&cfg, db)
// Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg)
handler := server.NewHandler(instanceManager, cfg, db)
// Setup the router with the handler
r := server.SetupRouter(handler)
@@ -78,7 +110,7 @@ func main() {
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
log.Printf("Error starting server: %v\n", err)
}
}()
@@ -86,14 +118,23 @@ func main() {
<-stop
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
// Create shutdown context with timeout
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
// Shutdown HTTP server gracefully
if err := server.Shutdown(shutdownCtx); err != nil {
log.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}
// Wait for all instances to stop
// Stop all instances and cleanup
instanceManager.Shutdown()
if err := db.Close(); err != nil {
log.Printf("Error closing database: %v\n", err)
}
fmt.Println("Exiting llamactl.")
}

View File

@@ -0,0 +1,87 @@
package main
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
)
// migrateFromJSON migrates instances from JSON files to SQLite database
// This is a one-time migration that runs on first startup with existing JSON files.
// Migrated files are moved to a migrated subdirectory to avoid re-importing.
func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
instancesDir := cfg.Instances.InstancesDir
if instancesDir == "" {
return nil // No instances directory configured
}
// Check if instances directory exists
if _, err := os.Stat(instancesDir); os.IsNotExist(err) {
return nil // No instances directory, nothing to migrate
}
// Find all JSON files
files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
if err != nil {
return fmt.Errorf("failed to list instance files: %w", err)
}
if len(files) == 0 {
return nil // No JSON files to migrate
}
log.Printf("Migrating %d instances from JSON to SQLite...", len(files))
// Create migrated directory
migratedDir := filepath.Join(instancesDir, "migrated")
if err := os.MkdirAll(migratedDir, 0755); err != nil {
return fmt.Errorf("failed to create migrated directory: %w", err)
}
// Migrate each JSON file
var migrated int
for _, file := range files {
if err := migrateJSONFile(file, db); err != nil {
log.Printf("Failed to migrate %s: %v", file, err)
continue
}
// Move the file to the migrated directory
destPath := filepath.Join(migratedDir, filepath.Base(file))
if err := os.Rename(file, destPath); err != nil {
log.Printf("Warning: Failed to move %s to migrated directory: %v", file, err)
// Don't fail the migration if we can't move the file
}
migrated++
}
log.Printf("Successfully migrated %d/%d instances to SQLite", migrated, len(files))
return nil
}
// migrateJSONFile migrates a single JSON file to the database
func migrateJSONFile(filename string, db database.InstanceStore) error {
data, err := os.ReadFile(filename)
if err != nil {
return fmt.Errorf("failed to read file: %w", err)
}
var inst instance.Instance
if err := json.Unmarshal(data, &inst); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
if err := db.Save(&inst); err != nil {
return fmt.Errorf("failed to save instance to database: %w", err)
}
log.Printf("Migrated instance %s from JSON to SQLite", inst.Name)
return nil
}

View File

@@ -0,0 +1,23 @@
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

64
docker/Dockerfile.source Normal file
View File

@@ -0,0 +1,64 @@
# WebUI build stage
FROM node:20-alpine AS webui-builder
WORKDIR /webui
# Copy webui package files
COPY webui/package*.json ./
# Install dependencies
RUN npm ci
# Copy webui source
COPY webui/ ./
# Build webui
RUN npm run build
# Go build stage
FROM golang:1.24-alpine AS builder
# Install build dependencies
RUN apk add --no-cache git ca-certificates
# Set working directory
WORKDIR /build
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY cmd/ ./cmd/
COPY pkg/ ./pkg/
COPY docs/ ./docs/
COPY webui/webui.go ./webui/
# Copy built webui from webui-builder
COPY --from=webui-builder /webui/dist ./webui/dist
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
# Final stage
FROM alpine:latest
# Install runtime dependencies
RUN apk --no-cache add ca-certificates
# Create data directory
RUN mkdir -p /data
# Set working directory
WORKDIR /data
# Copy binary from builder
COPY --from=builder /build/llamactl /usr/local/bin/llamactl
# Expose the default port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

20
docker/Dockerfile.vllm Normal file
View File

@@ -0,0 +1,20 @@
FROM vllm/vllm-openai:latest
# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
mv llamactl /usr/local/bin/ && \
chmod +x /usr/local/bin/llamactl
# Set working directory
RUN mkdir -p /data
WORKDIR /data
# Expose the default llamactl port
EXPOSE 8080
# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]

56
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,56 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped

View File

@@ -1,5 +1,6 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4
mike==2.0.0
mkdocs-material==9.6.22
mkdocs==1.6.1
pymdown-extensions==10.16.1
mkdocs-git-revision-date-localized-plugin==1.4.7
mike==2.1.3
neoteroi-mkdocs==1.1.3

1
docs/api-reference.md Normal file
View File

@@ -0,0 +1 @@
[OAD(swagger.yaml)]

326
docs/configuration.md Normal file
View File

@@ -0,0 +1,326 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
allowed_headers: ["*"] # Allowed CORS headers (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
data_dir: ~/.local/share/llamactl # Main data directory (database, instances, logs), default varies by OS
instances:
port_range: [8000, 9000] # Port range for instances
configs_dir: data_dir/instances # Instance configs directory
logs_dir: data_dir/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
database:
path: data_dir/llamactl.db # Database file path
max_open_connections: 25 # Maximum open database connections
max_idle_connections: 5 # Maximum idle database connections
connection_max_lifetime: 5m # Connection max lifetime
auth:
require_inference_auth: true # Require auth for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration for multi-node deployment
main: # Default local node (empty config)
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence, first found is used):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
allowed_headers: ["*"] # CORS allowed headers (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration
```yaml
backends:
llama-cpp:
command: "llama-server"
args: []
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {}
response_headers: {} # Additional response headers to send with responses
vllm:
command: "vllm"
args: ["serve"]
environment: {} # Environment variables for the backend process
docker:
enabled: false # Enable Docker runtime (default: false)
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {}
response_headers: {} # Additional response headers to send with responses
mlx:
command: "mlx_lm.server"
args: []
environment: {} # Environment variables for the backend process
# MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
```
**Backend Configuration Fields:**
- `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use
- `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional)
> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
**Environment Variables:**
**LlamaCpp Backend:**
- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
**MLX Backend:**
- `LLAMACTL_MLX_COMMAND` - MLX executable command
- `LLAMACTL_MLX_ARGS` - Space-separated default arguments
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Data Directory Configuration
```yaml
data_dir: "~/.local/share/llamactl" # Main data directory for database, instances, and logs (default varies by OS)
```
**Environment Variables:**
- `LLAMACTL_DATA_DIRECTORY` - Main data directory path
**Default Data Directory by Platform:**
- **Linux**: `~/.local/share/llamactl`
- **macOS**: `~/Library/Application Support/llamactl`
- **Windows**: `%LOCALAPPDATA%\llamactl` or `%PROGRAMDATA%\llamactl`
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
configs_dir: "instances" # Directory for instance configs, default: data_dir/instances
logs_dir: "logs" # Directory for instance logs, default: data_dir/logs
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
log_rotation_enabled: true # Enable log rotation (default: true)
log_rotation_max_size: 100 # Max log file size in MB before rotation (default: 100)
log_rotation_compress: false # Compress rotated log files (default: false)
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
- `LLAMACTL_LOG_ROTATION_ENABLED` - Enable log rotation (true/false)
- `LLAMACTL_LOG_ROTATION_MAX_SIZE` - Max log file size in MB
- `LLAMACTL_LOG_ROTATION_COMPRESS` - Compress rotated logs (true/false)
### Database Configuration
```yaml
database:
path: "llamactl.db" # Database file path, default: data_dir/llamactl.db
max_open_connections: 25 # Maximum open database connections (default: 25)
max_idle_connections: 5 # Maximum idle database connections (default: 5)
connection_max_lifetime: 5m # Connection max lifetime (default: 5m)
```
**Environment Variables:**
- `LLAMACTL_DATABASE_PATH` - Database file path (relative to data_dir or absolute)
- `LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS` - Maximum open database connections
- `LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS` - Maximum idle database connections
- `LLAMACTL_DATABASE_CONN_MAX_LIFETIME` - Connection max lifetime (e.g., "5m", "1h")
### Authentication Configuration
llamactl supports two types of authentication:
- **Management API Keys**: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables.
- **Inference API Keys**: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database.
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Managing Inference API Keys:**
Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys:
1. Open the web UI and log in with a management API key
2. Navigate to **Settings → API Keys**
3. Click **Create API Key**
4. Configure the key:
- **Name**: A descriptive name for the key
- **Expiration**: Optional expiration date
- **Permissions**: Grant access to all instances or specific instances only
5. Copy the generated key - it won't be shown again
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
### Remote Node Configuration
llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
```yaml
local_node: "main" # Name of the local node (default: "main")
nodes: # Node configuration map
main: # Local node (empty address means local)
address: "" # Not used for local node
api_key: "" # Not used for local node
worker1: # Remote worker node
address: "http://192.168.1.10:8080"
api_key: "worker1-api-key" # Management API key for authentication
```
**Node Configuration Fields:**
- `local_node`: Specifies which node in the `nodes` map represents the local node. Must match exactly what other nodes call this node.
- `nodes`: Map of node configurations
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
- `api_key`: Management API key for authenticating with the remote node
**Environment Variables:**
- `LLAMACTL_LOCAL_NODE` - Name of the local node

1814
docs/css/css-v1.1.3.css Normal file

File diff suppressed because it is too large Load Diff

2228
docs/docs.go Normal file

File diff suppressed because it is too large Load Diff

60
docs/fix_line_endings.py Normal file
View File

@@ -0,0 +1,60 @@
"""
MkDocs hook to fix line endings for proper rendering.
Automatically adds two spaces at the end of lines that need line breaks.
"""
import re
def on_page_markdown(markdown, page, config, **kwargs):
"""
Fix line endings in markdown content for proper MkDocs rendering.
Adds two spaces at the end of lines that need line breaks.
"""
lines = markdown.split('\n')
processed_lines = []
in_code_block = False
for i, line in enumerate(lines):
stripped = line.strip()
# Track code blocks
if stripped.startswith('```'):
in_code_block = not in_code_block
processed_lines.append(line)
continue
# Skip processing inside code blocks
if in_code_block:
processed_lines.append(line)
continue
# Skip empty lines
if not stripped:
processed_lines.append(line)
continue
# Skip lines that shouldn't have line breaks:
# - Headers (# ## ###)
# - Blockquotes (>)
# - Table rows (|)
# - Lines already ending with two spaces
# - YAML front matter and HTML tags
# - Standalone punctuation lines
if (stripped.startswith('#') or
stripped.startswith('>') or
'|' in stripped or
line.endswith(' ') or
stripped.startswith('---') or
stripped.startswith('<') or
stripped.endswith('>') or
stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
processed_lines.append(line)
continue
# Add two spaces to lines that end with regular text or most punctuation
if stripped and not in_code_block:
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -1,166 +0,0 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
backends:
llama_executable: llama-server # Path to llama-server executable
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
vllm_executable: vllm # Path to vllm executable
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Backend Configuration
```yaml
backends:
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
vllm_executable: "vllm" # Path to vllm executable (default: "vllm")
```
**Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
View all available command line options:
```bash
llamactl --help
```
You can also override configuration using command line flags when starting llamactl.

View File

@@ -1,105 +0,0 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install via pip (requires Python 3.8+, GPU required)
pip install vllm
# Or in a virtual environment (recommended)
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
# For production deployments, consider container-based installation
```
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -1,175 +0,0 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or identifier for your chosen backend
- **Additional Options**: Backend-specific parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configurations
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"ctx_size": 2048,
"gpu_layers": 32
}
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
}
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
}
}
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

After

Width:  |  Height:  |  Size: 45 KiB

View File

@@ -14,20 +14,20 @@ Welcome to the Llamactl documentation!
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
- [Installation Guide](installation.md) - Get Llamactl up and running
- [Configuration Guide](configuration.md) - Detailed configuration options
- [Quick Start](quick-start.md) - Your first steps with Llamactl
- [Managing Instances](managing-instances.md) - Instance lifecycle management
- [API Reference](api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Check the [Troubleshooting](troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
- Review the [Configuration Guide](configuration.md) for advanced settings
## License

174
docs/installation.md Normal file
View File

@@ -0,0 +1,174 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
### Backend Dependencies
llamactl supports multiple backends. Install at least one:
**For llama.cpp backend (all platforms):**
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
**For MLX backend (macOS only):**
MLX provides optimized inference on Apple Silicon. Install MLX-LM:
```bash
# Install via pip (requires Python 3.8+)
pip install mlx-lm
# Or in a virtual environment (recommended)
python -m venv mlx-env
source mlx-env/bin/activate
pip install mlx-lm
```
Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
**For vLLM backend:**
vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
```bash
# Install in a virtual environment
python -m venv vllm-env
source vllm-env/bin/activate
pip install vllm
```
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Docker
llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
**Available Dockerfiles (CUDA):**
- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
**Using Docker Compose**
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Create directories for data and models
mkdir -p data/llamacpp data/vllm models
# Start llamactl with llama.cpp backend
docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
# Or start llamactl with vLLM backend
docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
```
Access the dashboard at:
- llamactl with llama.cpp: http://localhost:8080
- llamactl with vLLM: http://localhost:8081
**Using Docker Build and Run**
1. llamactl with llama.cpp CUDA:
```bash
docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
docker run -d \
--name llamactl-llamacpp \
--gpus all \
-p 8080:8080 \
-v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
llamactl:llamacpp-cuda
```
2. llamactl with vLLM CUDA:
```bash
docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
docker run -d \
--name llamactl-vllm \
--gpus all \
-p 8080:8080 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
llamactl:vllm-cuda
```
3. llamactl built from source:
```bash
docker build -f docker/Dockerfile.source -t llamactl:source .
docker run -d \
--name llamactl \
-p 8080:8080 \
llamactl:source
```
### Option 3: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Remote Node Installation
For deployments with remote nodes:
- Install llamactl on each node using any of the methods above
- Configure API keys for authentication between nodes
- Ensure node names are consistent across all configurations
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.

254
docs/managing-instances.md Normal file
View File

@@ -0,0 +1,254 @@
# Managing Instances
Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](images/dashboard.png)
### Authentication
Llamactl uses a **Management API Key** to authenticate requests to the management API (creating, starting, stopping instances). All curl examples below use `<token>` as a placeholder - replace this with your actual Management API Key.
By default, authentication is required. If you don't configure a management API key in your configuration file, llamactl will auto-generate one and print it to the terminal on startup. See the [Configuration](configuration.md) guide for details.
For Web UI access:
1. Navigate to the web UI
2. Enter your Management API Key
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
**Via Web UI**
![Create Instance Screenshot](images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. *Optional*: Click **"Import"** to load a previously exported configuration
**Instance Settings:**
3. Enter a unique **Instance Name** (required)
4. **Select Node**: Choose which node to deploy the instance to
5. Configure **Auto Restart** settings:
- Enable automatic restart on failure
- Set max restarts and delay between attempts
6. Configure basic instance options:
- **Idle Timeout**: Minutes before stopping idle instance
- **On Demand Start**: Start instance only when needed
**Backend Configuration:**
7. **Select Backend Type**:
- **Llama Server**: For GGUF models using llama-server
- **MLX LM**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
8. *Optional*: Click **"Parse Command"** to import settings from an existing backend command
9. Configure **Execution Context**:
- **Enable Docker**: Run backend in Docker container
- **Command Override**: Custom path to backend executable
- **Environment Variables**: Custom environment variables
!!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
10. Configure **Basic Backend Options** (varies by backend):
- **llama.cpp**: Model path, threads, context size, GPU layers, etc.
- **MLX**: Model identifier, temperature, max tokens, etc.
- **vLLM**: Model identifier, tensor parallel size, GPU memory utilization, etc.
11. *Optional*: Expand **Advanced Backend Options** for additional settings
12. *Optional*: Add **Extra Args** as key-value pairs for custom command-line arguments
13. Click **"Create"** to save the instance
**Via API**
```bash
# Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096,
"gpu_layers": 32,
"flash_attn": "on"
},
"auto_restart": true,
"max_restarts": 3,
"docker_enabled": false,
"command_override": "/opt/llama-server-dev",
"nodes": ["main"]
}'
# Create vLLM instance with environment variables
curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1"
},
"nodes": ["worker1", "worker2"]
}'
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
},
"nodes": ["main"]
}'
```
## Start Instance
**Via Web UI**
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
**Via API**
```bash
curl -X POST http://localhost:8080/api/v1/instances/{name}/start \
-H "Authorization: Bearer <token>"
```
## Stop Instance
**Via Web UI**
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
**Via API**
```bash
curl -X POST http://localhost:8080/api/v1/instances/{name}/stop \
-H "Authorization: Bearer <token>"
```
## Edit Instance
**Via Web UI**
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
**Via API**
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/v1/instances/{name} \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## Export Instance
**Via Web UI**
1. Click the **"More actions"** button (three dots) on an instance card
2. Click **"Export"** to download the instance configuration as a JSON file
## View Logs
**Via Web UI**
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
**Via API**
Check instance status in real-time:
```bash
# Get instance logs
curl http://localhost:8080/api/v1/instances/{name}/logs \
-H "Authorization: Bearer <token>"
```
## Delete Instance
**Via Web UI**
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
**Via API**
```bash
curl -X DELETE http://localhost:8080/api/v1/instances/{name} \
-H "Authorization: Bearer <token>"
```
## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash
# Proxy requests to the instance
curl http://localhost:8080/api/v1/instances/{name}/proxy/ \
-H "Authorization: Bearer <token>"
```
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health
**Via Web UI**
1. The health status badge is displayed on each instance card
**Via API**
Check the health status of your instances:
```bash
curl http://localhost:8080/api/v1/instances/{name}/proxy/health \
-H "Authorization: Bearer <token>"
```

274
docs/quick-start.md Normal file
View File

@@ -0,0 +1,274 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
**Before you begin:** Ensure you have at least one backend installed (llama.cpp, MLX, or vLLM). See the [Installation Guide](installation.md#prerequisites) for backend setup.
## Core Concepts
Before you start, let's clarify a few key terms:
- **Instance**: A running backend server that serves a specific model. Each instance has a unique name and runs independently.
- **Backend**: The inference engine that actually runs the model (llama.cpp, MLX, or vLLM). You need at least one backend installed before creating instances.
- **Node**: In multi-machine setups, a node represents one machine. Most users will just use the default "main" node for single-machine deployments.
- **Proxy Architecture**: Llamactl acts as a proxy in front of your instances. You make requests to llamactl (e.g., `http://localhost:8080/v1/chat/completions`), and it routes them to the appropriate backend instance. This means you don't need to track individual instance ports or endpoints.
## Authentication
Llamactl uses two types of API keys:
- **Management API Key**: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal.
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). These are created and managed via the web UI.
By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the [Configuration](configuration.md) guide.
## Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
```
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ MANAGEMENT AUTHENTICATION REQUIRED
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
🔑 Generated Management API Key:
sk-management-...
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ IMPORTANT
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
• This key is auto-generated and will change on restart
• For production, add explicit management_keys to your configuration
• Copy this key before it disappears from the terminal
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Llamactl server listening on 0.0.0.0:8080
```
Copy the **Management API Key** from the terminal - you'll need it to access the web UI.
By default, Llamactl will start on `http://localhost:8080`.
## Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key from the terminal output.
You should see the Llamactl web interface.
## Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Node**: Select which node to deploy the instance to (defaults to "main" for single-node setups)
- **Backend Type**: Choose from llama.cpp, MLX, or vLLM
- **Model**: Model path or huggingface repo
- **Additional Options**: Backend-specific parameters
!!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values.
!!! note "Remote Node Deployment"
If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
3. Click "Create Instance"
## Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Create an Inference API Key
To make inference requests to your instances, you'll need an inference API key:
1. In the web UI, click the **Settings** icon (gear icon in the top-right)
2. Navigate to the **API Keys** tab
3. Click **Create API Key**
4. Configure your key:
- **Name**: Give it a descriptive name (e.g., "Production Key", "Development Key")
- **Expiration**: Optionally set an expiration date for the key
- **Permissions**: Choose whether the key can access all instances or only specific ones
5. Click **Create**
6. **Copy the generated key** - it will only be shown once!
The key will look like: `llamactl-...`
You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances).
## Example Configurations
Here are basic example configurations for each backend:
**llama.cpp backend:**
```json
{
"name": "llama2-7b",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/llama-2-7b-chat.gguf",
"threads": 4,
"ctx_size": 2048,
"gpu_layers": 32
},
"nodes": ["main"]
}
```
**MLX backend (macOS only):**
```json
{
"name": "mistral-mlx",
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"max_tokens": 2048
},
"nodes": ["main"]
}
```
**vLLM backend:**
```json
{
"name": "dialogpt-vllm",
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"nodes": ["main"]
}
```
**Remote node deployment example:**
```json
{
"name": "distributed-model",
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}
```
## Docker Support
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
```yaml
backends:
vllm:
command: "vllm"
args: ["serve"]
docker:
enabled: true
image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/v1/instances
# Create a new llama.cpp instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf"
}
}'
# Start an instance
curl -X POST http://localhost:8080/api/v1/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="your-inference-api-key" # Use the inference API key from terminal or config
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
!!! note "API Key"
If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key you created via the web UI (Settings → API Keys).
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](managing-instances.md)
- Explore the [API Reference](api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

2203
docs/swagger.json Normal file

File diff suppressed because it is too large Load Diff

1441
docs/swagger.yaml Normal file

File diff suppressed because it is too large Load Diff

193
docs/troubleshooting.md Normal file
View File

@@ -0,0 +1,193 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Instance Fails to Start
**Problem:** Instance fails to start or immediately stops
**Solutions:**
1. **Check instance logs** to see the actual error:
```bash
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
2. **Verify backend is installed:**
- **llama.cpp**: Ensure `llama-server` is in PATH
- **MLX**: Ensure `mlx-lm` Python package is installed
- **vLLM**: Ensure `vllm` Python package is installed
3. **Check model path and format:**
- Use absolute paths to model files
- Verify model format matches backend (GGUF for llama.cpp, etc.)
4. **Verify backend command configuration:**
- Check that the backend `command` is correctly configured in the global config
- For virtual environments, specify the full path to the command (e.g., `/path/to/venv/bin/mlx_lm.server`)
- See the [Configuration Guide](configuration.md) for backend configuration details
- Test the backend directly (see [Backend-Specific Issues](#backend-specific-issues) below)
### Backend-Specific Issues
**Problem:** Model loading, memory, GPU, or performance issues
Most model-specific issues (memory, GPU configuration, performance tuning) are backend-specific and should be resolved by consulting the respective backend documentation:
**llama.cpp:**
- [llama.cpp GitHub](https://github.com/ggml-org/llama.cpp)
- [llama-server README](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
**MLX:**
- [MLX-LM GitHub](https://github.com/ml-explore/mlx-lm)
- [MLX-LM Server Guide](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
**vLLM:**
- [vLLM Documentation](https://docs.vllm.ai/en/stable/)
- [OpenAI Compatible Server](https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html)
- [vllm serve Command](https://docs.vllm.ai/en/stable/cli/serve.html#vllm-serve)
**Testing backends directly:**
Testing your model and configuration directly with the backend helps determine if the issue is with llamactl or the backend itself:
```bash
# llama.cpp
llama-server --model /path/to/model.gguf --port 8081
# MLX
mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --port 8081
# vLLM
vllm serve microsoft/DialoGPT-medium --port 8081
```
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure management API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
```
For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key.
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Remote Node Issues
### Node Configuration
**Problem:** Remote instances not appearing or cannot be managed
**Solutions:**
1. **Verify node configuration:**
```yaml
local_node: "main" # Must match a key in nodes map
nodes:
main:
address: "" # Empty for local node
worker1:
address: "http://worker1.internal:8080"
api_key: "secure-key" # Must match worker1's management key
```
2. **Check node name consistency:**
- `local_node` on each node must match what other nodes call it
- Node names are case-sensitive
3. **Test remote node connectivity:**
```bash
curl -H "Authorization: Bearer remote-node-key" \
http://remote-node:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

View File

@@ -1,508 +0,0 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopped",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing instance name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"model": "/models/llama-2-7b.gguf"
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Backend-Specific Endpoints
### Parse Commands
Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
#### Parse Llama.cpp Command
Parse a llama-server command string into instance options.
```http
POST /api/v1/backends/llama-cpp/parse-command
```
**Request Body:**
```json
{
"command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
}
```
**Response:**
```json
{
"backend_type": "llama_cpp",
"llama_server_options": {
"model": "/path/to/model.gguf",
"ctx_size": 2048,
"port": 8080
}
}
```
#### Parse MLX-LM Command
Parse an MLX-LM server command string into instance options.
```http
POST /api/v1/backends/mlx/parse-command
```
**Request Body:**
```json
{
"command": "mlx_lm.server --model /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "mlx_lm",
"mlx_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
#### Parse vLLM Command
Parse a vLLM serve command string into instance options.
```http
POST /api/v1/backends/vllm/parse-command
```
**Request Body:**
```json
{
"command": "vllm serve /path/to/model --port 8080"
}
```
**Response:**
```json
{
"backend_type": "vllm",
"vllm_server_options": {
"model": "/path/to/model",
"port": 8080
}
}
```
**Error Responses for Parse Commands:**
- `400 Bad Request`: Invalid request body, empty command, or parse error
- `500 Internal Server Error`: Encoding error
## Auto-Generated Documentation
The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -1,223 +0,0 @@
# Managing Instances
Learn how to effectively manage your llama.cpp, MLX, and vLLM instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
### Authentication
If authentication is enabled:
1. Navigate to the web UI
2. Enter your credentials
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
### Via Web UI
![Create Instance Screenshot](../images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
4. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
5. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
6. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
7. Click **"Create"** to save the instance
### Via API
```bash
# Create llama.cpp instance with local model file
curl -X POST http://localhost:8080/api/instances/my-llama-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096,
"gpu_layers": 32
}
}'
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "mlx_lm",
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"top_p": 0.9,
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
}
}'
```
## Start Instance
### Via Web UI
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
```
## Stop Instance
### Via Web UI
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
```
## Edit Instance
### Via Web UI
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
-H "Content-Type: application/json" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## View Logs
### Via Web UI
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
```
## Delete Instance
### Via Web UI
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
```
## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
All backends provide OpenAI-compatible endpoints. Check the respective documentation:
- [llama-server docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
- [MLX-LM docs](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
- [vLLM docs](https://docs.vllm.ai/en/latest/)
### Instance Health
#### Via Web UI
1. The health status badge is displayed on each instance card
#### Via API
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -1,160 +0,0 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

14
go.mod
View File

@@ -3,10 +3,14 @@ module llamactl
go 1.24.5
require (
github.com/DeRuina/timberjack v1.3.9
github.com/go-chi/chi/v5 v5.2.2
github.com/go-chi/cors v1.2.2
github.com/golang-migrate/migrate/v4 v4.19.1
github.com/mattn/go-sqlite3 v1.14.24
github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.5
golang.org/x/crypto v0.46.0
gopkg.in/yaml.v3 v3.0.1
)
@@ -17,10 +21,12 @@ require (
github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/swag v0.23.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/swaggo/files v1.0.1 // indirect
golang.org/x/mod v0.26.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/tools v0.35.0 // indirect
golang.org/x/mod v0.29.0 // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.39.0 // indirect
golang.org/x/tools v0.38.0 // indirect
)

40
go.sum
View File

@@ -1,7 +1,11 @@
github.com/DeRuina/timberjack v1.3.9 h1:6UXZ1I7ExPGTX/1UNYawR58LlOJUHKBPiYC7WQ91eBo=
github.com/DeRuina/timberjack v1.3.9/go.mod h1:RLoeQrwrCGIEF8gO5nV5b/gMD0QIy7bzQhBUgpp1EqE=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618=
github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE=
@@ -14,18 +18,26 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU=
github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0=
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
@@ -39,25 +51,29 @@ github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -68,8 +84,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

View File

@@ -25,8 +25,8 @@ theme:
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.tabs.sticky
- toc.integrate
- navigation.top
- search.highlight
- search.share
@@ -49,14 +49,12 @@ markdown_extensions:
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
- Installation: installation.md
- Quick Start: quick-start.md
- Configuration: configuration.md
- Managing Instances: managing-instances.md
- API Reference: api-reference.md
- Troubleshooting: troubleshooting.md
plugins:
- search
@@ -66,9 +64,12 @@ plugins:
css_dir: css
javascript_dir: js
canonical_version: null
- neoteroi.mkdocsoad:
use_pymdownx: true
hooks:
- docs/readme_sync.py
- docs/fix_line_endings.py
extra:
version:
@@ -77,3 +78,6 @@ extra:
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl
extra_css:
- css/css-v1.1.3.css

73
pkg/auth/hash.go Normal file
View File

@@ -0,0 +1,73 @@
package auth
import (
"crypto/rand"
"crypto/subtle"
"encoding/base64"
"fmt"
"strings"
"golang.org/x/crypto/argon2"
)
const (
// Argon2 parameters
time uint32 = 1
memory uint32 = 64 * 1024 // 64 MB
threads uint8 = 4
keyLen uint32 = 32
saltLen uint32 = 16
)
// HashKey hashes an API key using Argon2id
func HashKey(plainTextKey string) (string, error) {
// Generate random salt
salt := make([]byte, saltLen)
if _, err := rand.Read(salt); err != nil {
return "", fmt.Errorf("failed to generate salt: %w", err)
}
// Derive key using Argon2id
hash := argon2.IDKey([]byte(plainTextKey), salt, time, memory, threads, keyLen)
// Format: $argon2id$v=19$m=65536,t=1,p=4$<base64-salt>$<base64-hash>
saltB64 := base64.RawStdEncoding.EncodeToString(salt)
hashB64 := base64.RawStdEncoding.EncodeToString(hash)
return fmt.Sprintf("$argon2id$v=19$m=%d,t=%d,p=%d$%s$%s", memory, time, threads, saltB64, hashB64), nil
}
// VerifyKey verifies a plain-text key against an Argon2id hash
func VerifyKey(plainTextKey, hash string) bool {
// Parse the hash format
parts := strings.Split(hash, "$")
if len(parts) != 6 || parts[1] != "argon2id" {
return false
}
// Extract parameters
var version, time, memory, threads int
if _, err := fmt.Sscanf(parts[2], "v=%d", &version); err != nil || version != 19 {
return false
}
if _, err := fmt.Sscanf(parts[3], "m=%d,t=%d,p=%d", &memory, &time, &threads); err != nil {
return false
}
// Decode salt and hash
salt, err := base64.RawStdEncoding.DecodeString(parts[4])
if err != nil {
return false
}
expectedHash, err := base64.RawStdEncoding.DecodeString(parts[5])
if err != nil {
return false
}
// Compute hash of the provided key
computedHash := argon2.IDKey([]byte(plainTextKey), salt, uint32(time), uint32(memory), uint8(threads), uint32(len(expectedHash)))
// Compare hashes using constant-time comparison
return subtle.ConstantTimeCompare(computedHash, expectedHash) == 1
}

46
pkg/auth/key.go Normal file
View File

@@ -0,0 +1,46 @@
package auth
import (
"crypto/rand"
"encoding/hex"
"fmt"
)
type PermissionMode string
const (
PermissionModeAllowAll PermissionMode = "allow_all"
PermissionModePerInstance PermissionMode = "per_instance"
)
type APIKey struct {
ID int
KeyHash string
Name string
UserID string
PermissionMode PermissionMode
ExpiresAt *int64
CreatedAt int64
UpdatedAt int64
LastUsedAt *int64
}
type KeyPermission struct {
KeyID int
InstanceID int
}
// GenerateKey generates a cryptographically secure API key with the given prefix
func GenerateKey(prefix string) (string, error) {
// Generate 32 random bytes
bytes := make([]byte, 32)
_, err := rand.Read(bytes)
if err != nil {
return "", fmt.Errorf("failed to generate random bytes: %w", err)
}
// Convert to hex (64 characters)
hexStr := hex.EncodeToString(bytes)
return fmt.Sprintf("%s-%s", prefix, hexStr), nil
}

View File

@@ -1,10 +1,276 @@
package backends
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/validation"
"maps"
)
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm"
// BackendTypeMlxVlm BackendType = "mlx_vlm" // Future expansion
)
type backend interface {
BuildCommandArgs() []string
BuildDockerArgs() []string
GetPort() int
SetPort(int)
GetHost() string
Validate() error
ParseCommand(string) (any, error)
}
var backendConstructors = map[BackendType]func() backend{
BackendTypeLlamaCpp: func() backend { return &LlamaServerOptions{} },
BackendTypeMlxLm: func() backend { return &MlxServerOptions{} },
BackendTypeVllm: func() backend { return &VllmServerOptions{} },
}
type Options struct {
BackendType BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// Backend-specific options
LlamaServerOptions *LlamaServerOptions `json:"-"`
MlxServerOptions *MlxServerOptions `json:"-"`
VllmServerOptions *VllmServerOptions `json:"-"`
}
func (o *Options) UnmarshalJSON(data []byte) error {
type Alias Options
aux := &struct {
*Alias
}{
Alias: (*Alias)(o),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Create backend from constructor map
if o.BackendOptions != nil {
constructor, exists := backendConstructors[o.BackendType]
if !exists {
return fmt.Errorf("unsupported backend type: %s", o.BackendType)
}
backend := constructor()
optionsData, err := json.Marshal(o.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
if err := json.Unmarshal(optionsData, backend); err != nil {
return fmt.Errorf("failed to unmarshal backend options: %w", err)
}
// Store in the appropriate typed field for backward compatibility
o.setBackendOptions(backend)
}
return nil
}
func (o *Options) MarshalJSON() ([]byte, error) {
// Get backend and marshal it
var backendOptions map[string]any
backend := o.getBackend()
if backend != nil {
optionsData, err := json.Marshal(backend)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
// Create a new map to avoid concurrent map writes
backendOptions = make(map[string]any)
if err := json.Unmarshal(optionsData, &backendOptions); err != nil {
return nil, fmt.Errorf("failed to unmarshal backend options to map: %w", err)
}
}
return json.Marshal(&struct {
BackendType BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
}{
BackendType: o.BackendType,
BackendOptions: backendOptions,
})
}
// setBackendOptions stores the backend in the appropriate typed field
func (o *Options) setBackendOptions(bcknd backend) {
switch v := bcknd.(type) {
case *LlamaServerOptions:
o.LlamaServerOptions = v
case *MlxServerOptions:
o.MlxServerOptions = v
case *VllmServerOptions:
o.VllmServerOptions = v
}
}
func (o *Options) getBackendSettings(backendConfig *config.BackendConfig) *config.BackendSettings {
switch o.BackendType {
case BackendTypeLlamaCpp:
return &backendConfig.LlamaCpp
case BackendTypeMlxLm:
return &backendConfig.MLX
case BackendTypeVllm:
return &backendConfig.VLLM
default:
return nil
}
}
// getBackend returns the actual backend implementation
func (o *Options) getBackend() backend {
switch o.BackendType {
case BackendTypeLlamaCpp:
return o.LlamaServerOptions
case BackendTypeMlxLm:
return o.MlxServerOptions
case BackendTypeVllm:
return o.VllmServerOptions
default:
return nil
}
}
// isDockerEnabled checks if Docker is enabled with an optional override
func (o *Options) isDockerEnabled(backend *config.BackendSettings, dockerEnabledOverride *bool) bool {
// Check if backend supports Docker
if backend.Docker == nil {
return false
}
// MLX doesn't support Docker
if o.BackendType == BackendTypeMlxLm {
return false
}
// Check for instance-level override
if dockerEnabledOverride != nil {
return *dockerEnabledOverride
}
// Fall back to config value
return backend.Docker.Enabled
}
func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig, dockerEnabled *bool) bool {
backendSettings := o.getBackendSettings(backendConfig)
return o.isDockerEnabled(backendSettings, dockerEnabled)
}
// GetCommand builds the command to run the backend
func (o *Options) GetCommand(backendConfig *config.BackendConfig, dockerEnabled *bool, commandOverride string) string {
backendSettings := o.getBackendSettings(backendConfig)
// Determine if Docker is enabled
useDocker := o.isDockerEnabled(backendSettings, dockerEnabled)
if useDocker {
return "docker"
}
// Check for command override (only applies when not in Docker mode)
if commandOverride != "" {
return commandOverride
}
// Fall back to config command
return backendSettings.Command
}
// buildCommandArgs builds command line arguments for the backend
func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig, dockerEnabled *bool) []string {
var args []string
backendSettings := o.getBackendSettings(backendConfig)
backend := o.getBackend()
if backend == nil {
return args
}
if o.isDockerEnabled(backendSettings, dockerEnabled) {
// For Docker, start with Docker args
args = append(args, backendSettings.Docker.Args...)
args = append(args, backendSettings.Docker.Image)
args = append(args, backend.BuildDockerArgs()...)
} else {
// For native execution, start with backend args
args = append(args, backendSettings.Args...)
args = append(args, backend.BuildCommandArgs()...)
}
return args
}
// BuildEnvironment builds the environment variables for the backend process
func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, dockerEnabled *bool, environment map[string]string) map[string]string {
backendSettings := o.getBackendSettings(backendConfig)
env := map[string]string{}
if backendSettings.Environment != nil {
maps.Copy(env, backendSettings.Environment)
}
if o.isDockerEnabled(backendSettings, dockerEnabled) {
if backendSettings.Docker.Environment != nil {
maps.Copy(env, backendSettings.Docker.Environment)
}
}
if environment != nil {
maps.Copy(env, environment)
}
return env
}
func (o *Options) GetPort() int {
backend := o.getBackend()
if backend != nil {
return backend.GetPort()
}
return 0
}
func (o *Options) SetPort(port int) {
backend := o.getBackend()
if backend != nil {
backend.SetPort(port)
}
}
func (o *Options) GetHost() string {
backend := o.getBackend()
if backend != nil {
return backend.GetHost()
}
return "localhost"
}
func (o *Options) GetResponseHeaders(backendConfig *config.BackendConfig) map[string]string {
backendSettings := o.getBackendSettings(backendConfig)
return backendSettings.ResponseHeaders
}
// ValidateInstanceOptions performs validation based on backend type
func (o *Options) ValidateInstanceOptions() error {
backend := o.getBackend()
if backend == nil {
return validation.ValidationError(fmt.Errorf("backend options cannot be nil for backend type %s", o.BackendType))
}
return backend.Validate()
}

View File

@@ -1,13 +1,15 @@
package backends
import (
"fmt"
"llamactl/pkg/config"
"reflect"
"strconv"
"strings"
)
// BuildCommandArgs converts a struct to command line arguments
func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
func BuildCommandArgs(options any, multipleFlags map[string]struct{}) []string {
var args []string
v := reflect.ValueOf(options).Elem()
@@ -26,9 +28,10 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
continue
}
// Get flag name from JSON tag
flagName := strings.Split(jsonTag, ",")[0]
flagName = strings.ReplaceAll(flagName, "_", "-")
// Get flag name from JSON tag (snake_case)
jsonFieldName := strings.Split(jsonTag, ",")[0]
// Convert to kebab-case for CLI flags
flagName := strings.ReplaceAll(jsonFieldName, "_", "-")
switch field.Kind() {
case reflect.Bool:
@@ -49,7 +52,8 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
if multipleFlags[flagName] {
// Use jsonFieldName (snake_case) for multipleFlags lookup
if _, isMultiValue := multipleFlags[jsonFieldName]; isMultiValue {
// Multiple flags: --flag value1 --flag value2
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
@@ -68,3 +72,43 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
return args
}
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
// Start with configured Docker arguments (should include "run", "--rm", etc.)
dockerArgs := make([]string, len(backendConfig.Docker.Args))
copy(dockerArgs, backendConfig.Docker.Args)
// Add environment variables
for key, value := range backendConfig.Docker.Environment {
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image name
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
// Add backend args and instance args
dockerArgs = append(dockerArgs, backendConfig.Args...)
dockerArgs = append(dockerArgs, instanceArgs...)
return "docker", dockerArgs, nil
}
// convertExtraArgsToFlags converts map[string]string to command flags
// Empty values become boolean flags: {"flag": ""} → ["--flag"]
// Non-empty values: {"flag": "value"} → ["--flag", "value"]
func convertExtraArgsToFlags(extraArgs map[string]string) []string {
var args []string
for key, value := range extraArgs {
if value == "" {
// Boolean flag
args = append(args, "--"+key)
} else {
// Value flag
args = append(args, "--"+key, value)
}
}
return args
}

View File

@@ -1,12 +1,25 @@
package llamacpp
package backends
import (
"encoding/json"
"llamactl/pkg/backends"
"fmt"
"llamactl/pkg/validation"
"reflect"
"strconv"
)
// llamaMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
var llamaMultiValuedFlags = map[string]struct{}{
"override_tensor": {},
"override_kv": {},
"lora": {},
"lora_scaled": {},
"control_vector": {},
"control_vector_scaled": {},
"dry_sequence_breaker": {},
"logit_bias": {},
}
type LlamaServerOptions struct {
// Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
@@ -27,7 +40,7 @@ type LlamaServerOptions struct {
BatchSize int `json:"batch_size,omitempty"`
UBatchSize int `json:"ubatch_size,omitempty"`
Keep int `json:"keep,omitempty"`
FlashAttn bool `json:"flash_attn,omitempty"`
FlashAttn string `json:"flash_attn,omitempty"`
NoPerf bool `json:"no_perf,omitempty"`
Escape bool `json:"escape,omitempty"`
NoEscape bool `json:"no_escape,omitempty"`
@@ -173,6 +186,10 @@ type LlamaServerOptions struct {
FIMQwen7BDefault bool `json:"fim_qwen_7b_default,omitempty"`
FIMQwen7BSpec bool `json:"fim_qwen_7b_spec,omitempty"`
FIMQwen14BSpec bool `json:"fim_qwen_14b_spec,omitempty"`
// ExtraArgs are additional command line arguments.
// Example: {"verbose": "", "log-file": "/logs/llama.log"}
ExtraArgs map[string]string `json:"extra_args,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
@@ -195,6 +212,15 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// Copy to our struct
*o = LlamaServerOptions(temp)
// Track which fields we've processed
processedFields := make(map[string]bool)
// Get all known canonical field names from struct tags
knownFields := getKnownFieldNames(o)
for field := range knownFields {
processedFields[field] = true
}
// Handle alternative field names
fieldMappings := map[string]string{
// Common params
@@ -206,7 +232,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict N
"n-predict": "predict", // --n-predict N
"n_predict": "predict", // -n-predict N
"b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn
@@ -220,7 +246,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
"n_gpu_layers": "gpu_layers", // --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX
@@ -236,9 +262,9 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose
"log-verbose": "verbose", // --log-verbose
"log_verbose": "verbose", // --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"log-verbosity": "verbosity", // --log-verbosity N
"log_verbosity": "verbosity", // --log-verbosity N
// Sampling params
"s": "seed", // -s, --seed SEED
@@ -255,21 +281,23 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"rerank": "reranking", // --reranking
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"draft": "draft-max", // -draft, --draft-max N
"draft-n": "draft-max", // --draft-n-max N
"draft-n-min": "draft_min", // --draft-n-min N
"draft": "draft_max", // -draft, --draft-max N
"draft_n": "draft_max", // --draft-n-max N
"draft_n_min": "draft_min", // --draft-n-min N
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"n_gpu_layers_draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
}
// Process alternative field names
// Process alternative field names and mark them as processed
for altName, canonicalName := range fieldMappings {
processedFields[altName] = true // Mark alternatives as known
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
@@ -280,32 +308,59 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
setFieldValue(field, value)
}
}
}
// Collect unknown fields into ExtraArgs
if o.ExtraArgs == nil {
o.ExtraArgs = make(map[string]string)
}
for key, value := range raw {
if !processedFields[key] {
o.ExtraArgs[key] = fmt.Sprintf("%v", value)
}
}
return nil
}
func (o *LlamaServerOptions) GetPort() int {
return o.Port
}
func (o *LlamaServerOptions) SetPort(port int) {
o.Port = port
}
func (o *LlamaServerOptions) GetHost() string {
return o.Host
}
func (o *LlamaServerOptions) Validate() error {
if o == nil {
return validation.ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validation.ValidateStructStrings(o, ""); err != nil {
return err
}
// Basic network validation for port
if o.Port < 0 || o.Port > 65535 {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
// Validate extra_args keys and values
for key, value := range o.ExtraArgs {
if err := validation.ValidateStringForInjection(key); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args key %q: %w", key, err))
}
if value != "" {
if err := validation.ValidateStringForInjection(value); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args value for %q: %w", key, err))
}
}
}
@@ -316,41 +371,33 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
// Llama uses multiple flags for arrays by default (not comma-separated)
multipleFlags := map[string]bool{
"override-tensor": true,
"override-kv": true,
"lora": true,
"lora-scaled": true,
"control-vector": true,
"control-vector-scaled": true,
"dry-sequence-breaker": true,
"logit-bias": true,
}
return backends.BuildCommandArgs(o, multipleFlags)
// Use package-level llamaMultiValuedFlags variable
args := BuildCommandArgs(o, llamaMultiValuedFlags)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
func (o *LlamaServerOptions) BuildDockerArgs() []string {
// For llama, Docker args are the same as normal args
return o.BuildCommandArgs()
}
// ParseCommand parses a llama-server command string into LlamaServerOptions
// Supports multiple formats:
// 1. Full command: "llama-server --model file.gguf"
// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
// 3. Args only: "--model file.gguf --gpu-layers 32"
// 4. Multiline commands with backslashes
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
func (o *LlamaServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"llama-server"}
var subcommandNames []string // Llama has no subcommands
multiValuedFlags := map[string]bool{
"override_tensor": true,
"override_kv": true,
"lora": true,
"lora_scaled": true,
"control_vector": true,
"control_vector_scaled": true,
"dry_sequence_breaker": true,
"logit_bias": true,
}
// Use package-level llamaMultiValuedFlags variable
var llamaOptions LlamaServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
if err := parseCommand(command, executableNames, subcommandNames, llamaMultiValuedFlags, &llamaOptions); err != nil {
return nil, err
}

628
pkg/backends/llama_test.go Normal file
View File

@@ -0,0 +1,628 @@
package backends_test
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/testutil"
"reflect"
"testing"
)
func TestLlamaCppBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options backends.LlamaServerOptions
expected []string
excluded []string
}{
{
name: "verbose true",
options: backends.LlamaServerOptions{
Verbose: true,
},
expected: []string{"--verbose"},
},
{
name: "verbose false",
options: backends.LlamaServerOptions{
Verbose: false,
},
excluded: []string{"--verbose"},
},
{
name: "multiple booleans",
options: backends.LlamaServerOptions{
Verbose: true,
Mlock: false,
NoMmap: true,
},
expected: []string{"--verbose", "--no-mmap"},
excluded: []string{"--mlock"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !testutil.Contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if testutil.Contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
})
}
}
func TestLlamaCppBuildCommandArgs_ZeroValues(t *testing.T) {
options := backends.LlamaServerOptions{
Port: 0, // Should be excluded
Threads: 0, // Should be excluded
Temperature: 0, // Should be excluded
Model: "", // Should be excluded
Verbose: false, // Should be excluded
}
args := options.BuildCommandArgs()
// Zero values should not appear in arguments
excludedArgs := []string{
"--port", "0",
"--threads", "0",
"--temperature", "0",
"--model", "",
"--verbose",
}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
}
func TestLlamaCppBuildCommandArgs_ArrayFields(t *testing.T) {
options := backends.LlamaServerOptions{
Lora: []string{"adapter1.bin", "adapter2.bin"},
OverrideTensor: []string{"tensor1", "tensor2", "tensor3"},
DrySequenceBreaker: []string{".", "!", "?"},
}
args := options.BuildCommandArgs()
// Check that each array value appears with its flag
expectedOccurrences := map[string][]string{
"--lora": {"adapter1.bin", "adapter2.bin"},
"--override-tensor": {"tensor1", "tensor2", "tensor3"},
"--dry-sequence-breaker": {".", "!", "?"},
}
for flag, values := range expectedOccurrences {
for _, value := range values {
if !testutil.ContainsFlagWithValue(args, flag, value) {
t.Errorf("Expected %s %s, not found in %v", flag, value, args)
}
}
}
}
func TestLlamaCppBuildCommandArgs_EmptyArrays(t *testing.T) {
options := backends.LlamaServerOptions{
Lora: []string{}, // Empty array should not generate args
OverrideTensor: []string{}, // Empty array should not generate args
}
args := options.BuildCommandArgs()
excludedArgs := []string{"--lora", "--override-tensor"}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
}
}
}
func TestLlamaCppUnmarshalJSON_StandardFields(t *testing.T) {
jsonData := `{
"model": "/path/to/model.gguf",
"port": 8080,
"host": "localhost",
"verbose": true,
"ctx_size": 4096,
"gpu_layers": 32,
"temp": 0.7
}`
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if options.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", options.Model)
}
if options.Port != 8080 {
t.Errorf("Expected port 8080, got %d", options.Port)
}
if options.Host != "localhost" {
t.Errorf("Expected host 'localhost', got %q", options.Host)
}
if !options.Verbose {
t.Error("Expected verbose to be true")
}
if options.CtxSize != 4096 {
t.Errorf("Expected ctx_size 4096, got %d", options.CtxSize)
}
if options.GPULayers != 32 {
t.Errorf("Expected gpu_layers 32, got %d", options.GPULayers)
}
if options.Temperature != 0.7 {
t.Errorf("Expected temperature 0.7, got %f", options.Temperature)
}
}
func TestLlamaCppUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
tests := []struct {
name string
jsonData string
checkFn func(backends.LlamaServerOptions) error
}{
{
name: "threads alternatives",
jsonData: `{"t": 4, "tb": 2}`,
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.Threads != 4 {
return fmt.Errorf("expected threads 4, got %d", opts.Threads)
}
if opts.ThreadsBatch != 2 {
return fmt.Errorf("expected threads_batch 2, got %d", opts.ThreadsBatch)
}
return nil
},
},
{
name: "context size alternatives",
jsonData: `{"c": 2048}`,
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.CtxSize != 2048 {
return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
return nil
},
},
{
name: "gpu layers alternatives",
jsonData: `{"ngl": 16}`,
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.GPULayers != 16 {
return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
return nil
},
},
{
name: "model alternatives",
jsonData: `{"m": "/path/model.gguf"}`,
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.Model != "/path/model.gguf" {
return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
}
return nil
},
},
{
name: "temperature alternatives",
jsonData: `{"temp": 0.8}`,
checkFn: func(opts backends.LlamaServerOptions) error {
if opts.Temperature != 0.8 {
return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
}
return nil
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(tt.jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if err := tt.checkFn(options); err != nil {
t.Error(err)
}
})
}
}
func TestLlamaCppUnmarshalJSON_InvalidJSON(t *testing.T) {
invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(invalidJSON), &options)
if err == nil {
t.Error("Expected error for invalid JSON")
}
}
func TestLlamaCppUnmarshalJSON_ArrayFields(t *testing.T) {
jsonData := `{
"lora": ["adapter1.bin", "adapter2.bin"],
"override_tensor": ["tensor1", "tensor2"],
"dry_sequence_breaker": [".", "!", "?"]
}`
var options backends.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
expectedLora := []string{"adapter1.bin", "adapter2.bin"}
if !reflect.DeepEqual(options.Lora, expectedLora) {
t.Errorf("Expected lora %v, got %v", expectedLora, options.Lora)
}
expectedTensors := []string{"tensor1", "tensor2"}
if !reflect.DeepEqual(options.OverrideTensor, expectedTensors) {
t.Errorf("Expected override_tensor %v, got %v", expectedTensors, options.OverrideTensor)
}
expectedBreakers := []string{".", "!", "?"}
if !reflect.DeepEqual(options.DrySequenceBreaker, expectedBreakers) {
t.Errorf("Expected dry_sequence_breaker %v, got %v", expectedBreakers, options.DrySequenceBreaker)
}
}
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.LlamaServerOptions)
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
},
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
}
if opts.CtxSize != 4096 {
t.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
},
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/model.gguf" {
t.Errorf("expected model '/path/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", opts.GPULayers)
}
if !opts.Verbose {
t.Errorf("expected verbose to be true")
}
},
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.APIKey != "sk-1234567890abcdef" {
t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", opts.APIKey)
}
},
},
{
name: "multiple value types",
command: "llama-server --model /test/model.gguf --n-gpu-layers 32 --temp 0.7 --verbose --no-mmap",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
if opts.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", opts.Temperature)
}
if !opts.Verbose {
t.Errorf("expected verbose to be true")
}
if !opts.NoMmap {
t.Errorf("expected no_mmap to be true")
}
},
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.LlamaServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.LlamaServerOptions)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
if tt.validate != nil {
tt.validate(t, result)
}
})
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
var opts backends.LlamaServerOptions
resultAny, err := opts.ParseCommand(command)
result, _ := resultAny.(*backends.LlamaServerOptions)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
func TestLlamaCppBuildCommandArgs_ExtraArgs(t *testing.T) {
options := backends.LlamaServerOptions{
Model: "/models/test.gguf",
ExtraArgs: map[string]string{
"flash-attn": "", // boolean flag
"log-file": "/logs/test.log", // value flag
},
}
args := options.BuildCommandArgs()
// Check that extra args are present
if !testutil.Contains(args, "--flash-attn") {
t.Error("Expected --flash-attn flag not found")
}
if !testutil.Contains(args, "--log-file") || !testutil.Contains(args, "/logs/test.log") {
t.Error("Expected --log-file flag or value not found")
}
}
func TestParseLlamaCommand_ExtraArgs(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.LlamaServerOptions)
}{
{
name: "extra args with known fields",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32 --unknown-flag value --another-bool-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["unknown_flag"]; !ok || val != "value" {
t.Errorf("expected extra_args[unknown_flag]='value', got '%s'", val)
}
if val, ok := opts.ExtraArgs["another_bool_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[another_bool_flag]='true', got '%s'", val)
}
},
},
{
name: "extra args with alternative field names",
command: "llama-server -m /model.gguf -ngl 16 --custom-arg test --new-feature",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
// Check that alternative names worked for known fields
if opts.Model != "/model.gguf" {
t.Errorf("expected model '/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", opts.GPULayers)
}
// Check that unknown args went to ExtraArgs
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["custom_arg"]; !ok || val != "test" {
t.Errorf("expected extra_args[custom_arg]='test', got '%s'", val)
}
if val, ok := opts.ExtraArgs["new_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[new_feature]='true', got '%s'", val)
}
},
},
{
name: "only extra args",
command: "llama-server --experimental-feature --beta-mode enabled",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["experimental_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[experimental_feature]='true', got '%s'", val)
}
if val, ok := opts.ExtraArgs["beta_mode"]; !ok || val != "enabled" {
t.Errorf("expected extra_args[beta_mode]='enabled', got '%s'", val)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.LlamaServerOptions
result, err := opts.ParseCommand(tt.command)
if tt.expectErr && err == nil {
t.Error("expected error but got none")
return
}
if !tt.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if !tt.expectErr && tt.validate != nil {
llamaOpts, ok := result.(*backends.LlamaServerOptions)
if !ok {
t.Fatal("result is not *LlamaServerOptions")
}
tt.validate(t, llamaOpts)
}
})
}
}
func TestLlamaCppGetCommand_WithOverrides(t *testing.T) {
tests := []struct {
name string
dockerInConfig bool
dockerEnabled *bool
commandOverride string
expected string
}{
{
name: "no overrides - use config command",
dockerInConfig: false,
dockerEnabled: nil,
commandOverride: "",
expected: "/usr/bin/llama-server",
},
{
name: "override to enable docker",
dockerInConfig: false,
dockerEnabled: boolPtr(true),
commandOverride: "",
expected: "docker",
},
{
name: "override to disable docker",
dockerInConfig: true,
dockerEnabled: boolPtr(false),
commandOverride: "",
expected: "/usr/bin/llama-server",
},
{
name: "command override",
dockerInConfig: false,
dockerEnabled: nil,
commandOverride: "/custom/llama-server",
expected: "/custom/llama-server",
},
{
name: "docker takes precedence over command override",
dockerInConfig: false,
dockerEnabled: boolPtr(true),
commandOverride: "/custom/llama-server",
expected: "docker",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "/usr/bin/llama-server",
Docker: &config.DockerSettings{
Enabled: tt.dockerInConfig,
Image: "test-image",
},
},
}
opts := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "test-model.gguf",
},
}
result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride)
if result != tt.expected {
t.Errorf("GetCommand() = %v, want %v", result, tt.expected)
}
})
}
}
// Helper function to create bool pointer
func boolPtr(b bool) *bool {
return &b
}

View File

@@ -1,511 +0,0 @@
package llamacpp_test
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends/llamacpp"
"reflect"
"slices"
"testing"
)
func TestBuildCommandArgs_BasicFields(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
Host: "localhost",
Verbose: true,
CtxSize: 4096,
GPULayers: 32,
}
args := options.BuildCommandArgs()
// Check individual arguments
expectedPairs := map[string]string{
"--model": "/path/to/model.gguf",
"--port": "8080",
"--host": "localhost",
"--ctx-size": "4096",
"--gpu-layers": "32",
}
for flag, expectedValue := range expectedPairs {
if !containsFlagWithValue(args, flag, expectedValue) {
t.Errorf("Expected %s %s, not found in %v", flag, expectedValue, args)
}
}
// Check standalone boolean flag
if !contains(args, "--verbose") {
t.Errorf("Expected --verbose flag not found in %v", args)
}
}
func TestBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options llamacpp.LlamaServerOptions
expected []string
excluded []string
}{
{
name: "verbose true",
options: llamacpp.LlamaServerOptions{
Verbose: true,
},
expected: []string{"--verbose"},
},
{
name: "verbose false",
options: llamacpp.LlamaServerOptions{
Verbose: false,
},
excluded: []string{"--verbose"},
},
{
name: "multiple booleans",
options: llamacpp.LlamaServerOptions{
Verbose: true,
FlashAttn: true,
Mlock: false,
NoMmap: true,
},
expected: []string{"--verbose", "--flash-attn", "--no-mmap"},
excluded: []string{"--mlock"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
})
}
}
func TestBuildCommandArgs_NumericFields(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Port: 8080,
Threads: 4,
CtxSize: 2048,
GPULayers: 16,
Temperature: 0.7,
TopK: 40,
TopP: 0.9,
}
args := options.BuildCommandArgs()
expectedPairs := map[string]string{
"--port": "8080",
"--threads": "4",
"--ctx-size": "2048",
"--gpu-layers": "16",
"--temp": "0.7",
"--top-k": "40",
"--top-p": "0.9",
}
for flag, expectedValue := range expectedPairs {
if !containsFlagWithValue(args, flag, expectedValue) {
t.Errorf("Expected %s %s, not found in %v", flag, expectedValue, args)
}
}
}
func TestBuildCommandArgs_ZeroValues(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Port: 0, // Should be excluded
Threads: 0, // Should be excluded
Temperature: 0, // Should be excluded
Model: "", // Should be excluded
Verbose: false, // Should be excluded
}
args := options.BuildCommandArgs()
// Zero values should not appear in arguments
excludedArgs := []string{
"--port", "0",
"--threads", "0",
"--temperature", "0",
"--model", "",
"--verbose",
}
for _, excludedArg := range excludedArgs {
if contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
}
func TestBuildCommandArgs_ArrayFields(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Lora: []string{"adapter1.bin", "adapter2.bin"},
OverrideTensor: []string{"tensor1", "tensor2", "tensor3"},
DrySequenceBreaker: []string{".", "!", "?"},
}
args := options.BuildCommandArgs()
// Check that each array value appears with its flag
expectedOccurrences := map[string][]string{
"--lora": {"adapter1.bin", "adapter2.bin"},
"--override-tensor": {"tensor1", "tensor2", "tensor3"},
"--dry-sequence-breaker": {".", "!", "?"},
}
for flag, values := range expectedOccurrences {
for _, value := range values {
if !containsFlagWithValue(args, flag, value) {
t.Errorf("Expected %s %s, not found in %v", flag, value, args)
}
}
}
}
func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
options := llamacpp.LlamaServerOptions{
Lora: []string{}, // Empty array should not generate args
OverrideTensor: []string{}, // Empty array should not generate args
}
args := options.BuildCommandArgs()
excludedArgs := []string{"--lora", "--override-tensor"}
for _, excludedArg := range excludedArgs {
if contains(args, excludedArg) {
t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
}
}
}
func TestBuildCommandArgs_FieldNameConversion(t *testing.T) {
// Test snake_case to kebab-case conversion
options := llamacpp.LlamaServerOptions{
CtxSize: 4096,
GPULayers: 32,
ThreadsBatch: 2,
FlashAttn: true,
TopK: 40,
TopP: 0.9,
}
args := options.BuildCommandArgs()
// Check that field names are properly converted
expectedFlags := []string{
"--ctx-size", // ctx_size -> ctx-size
"--gpu-layers", // gpu_layers -> gpu-layers
"--threads-batch", // threads_batch -> threads-batch
"--flash-attn", // flash_attn -> flash-attn
"--top-k", // top_k -> top-k
"--top-p", // top_p -> top-p
}
for _, flag := range expectedFlags {
if !contains(args, flag) {
t.Errorf("Expected flag %q not found in %v", flag, args)
}
}
}
func TestUnmarshalJSON_StandardFields(t *testing.T) {
jsonData := `{
"model": "/path/to/model.gguf",
"port": 8080,
"host": "localhost",
"verbose": true,
"ctx_size": 4096,
"gpu_layers": 32,
"temp": 0.7
}`
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if options.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", options.Model)
}
if options.Port != 8080 {
t.Errorf("Expected port 8080, got %d", options.Port)
}
if options.Host != "localhost" {
t.Errorf("Expected host 'localhost', got %q", options.Host)
}
if !options.Verbose {
t.Error("Expected verbose to be true")
}
if options.CtxSize != 4096 {
t.Errorf("Expected ctx_size 4096, got %d", options.CtxSize)
}
if options.GPULayers != 32 {
t.Errorf("Expected gpu_layers 32, got %d", options.GPULayers)
}
if options.Temperature != 0.7 {
t.Errorf("Expected temperature 0.7, got %f", options.Temperature)
}
}
func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
tests := []struct {
name string
jsonData string
checkFn func(llamacpp.LlamaServerOptions) error
}{
{
name: "threads alternatives",
jsonData: `{"t": 4, "tb": 2}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Threads != 4 {
return fmt.Errorf("expected threads 4, got %d", opts.Threads)
}
if opts.ThreadsBatch != 2 {
return fmt.Errorf("expected threads_batch 2, got %d", opts.ThreadsBatch)
}
return nil
},
},
{
name: "context size alternatives",
jsonData: `{"c": 2048}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.CtxSize != 2048 {
return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
}
return nil
},
},
{
name: "gpu layers alternatives",
jsonData: `{"ngl": 16}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.GPULayers != 16 {
return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
return nil
},
},
{
name: "model alternatives",
jsonData: `{"m": "/path/model.gguf"}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Model != "/path/model.gguf" {
return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
}
return nil
},
},
{
name: "temperature alternatives",
jsonData: `{"temp": 0.8}`,
checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Temperature != 0.8 {
return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
}
return nil
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(tt.jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
if err := tt.checkFn(options); err != nil {
t.Error(err)
}
})
}
}
func TestUnmarshalJSON_InvalidJSON(t *testing.T) {
invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(invalidJSON), &options)
if err == nil {
t.Error("Expected error for invalid JSON")
}
}
func TestUnmarshalJSON_ArrayFields(t *testing.T) {
jsonData := `{
"lora": ["adapter1.bin", "adapter2.bin"],
"override_tensor": ["tensor1", "tensor2"],
"dry_sequence_breaker": [".", "!", "?"]
}`
var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options)
if err != nil {
t.Fatalf("Unmarshal failed: %v", err)
}
expectedLora := []string{"adapter1.bin", "adapter2.bin"}
if !reflect.DeepEqual(options.Lora, expectedLora) {
t.Errorf("Expected lora %v, got %v", expectedLora, options.Lora)
}
expectedTensors := []string{"tensor1", "tensor2"}
if !reflect.DeepEqual(options.OverrideTensor, expectedTensors) {
t.Errorf("Expected override_tensor %v, got %v", expectedTensors, options.OverrideTensor)
}
expectedBreakers := []string{".", "!", "?"}
if !reflect.DeepEqual(options.DrySequenceBreaker, expectedBreakers) {
t.Errorf("Expected dry_sequence_breaker %v, got %v", expectedBreakers, options.DrySequenceBreaker)
}
}
func TestParseLlamaCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model.gguf --ctx-size 4096",
expectErr: false,
},
{
name: "mixed flag formats",
command: "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
expectErr: false,
},
{
name: "quoted strings",
command: `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `llama-server --model test.gguf --api-key "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "llama-server ---model test.gguf",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := llamacpp.ParseLlamaCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseLlamaCommandValues(t *testing.T) {
command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.gguf" {
t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
}
if result.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
}
if result.Temperature != 0.7 {
t.Errorf("expected temperature 0.7, got %f", result.Temperature)
}
if !result.Verbose {
t.Errorf("expected verbose to be true")
}
if !result.NoMmap {
t.Errorf("expected no_mmap to be true")
}
}
func TestParseLlamaCommandArrays(t *testing.T) {
command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
result, err := llamacpp.ParseLlamaCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Lora) != 2 {
t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
}
expected := []string{"adapter1.bin", "adapter2.bin"}
for i, v := range expected {
if result.Lora[i] != v {
t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
}
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag {
// Check if there's a next argument and it matches the expected value
if i+1 < len(args) && args[i+1] == value {
return true
}
}
}
return false
}

148
pkg/backends/mlx.go Normal file
View File

@@ -0,0 +1,148 @@
package backends
import (
"encoding/json"
"fmt"
"llamactl/pkg/validation"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
DraftModel string `json:"draft_model,omitempty"`
NumDraftTokens int `json:"num_draft_tokens,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
// ExtraArgs are additional command line arguments.
// Example: {"verbose": "", "log-file": "/logs/mlx.log"}
ExtraArgs map[string]string `json:"extra_args,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to collect unknown fields into ExtraArgs
func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to capture all fields
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions MlxServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = MlxServerOptions(temp)
// Get all known canonical field names from struct tags
knownFields := getKnownFieldNames(o)
// Collect unknown fields into ExtraArgs
if o.ExtraArgs == nil {
o.ExtraArgs = make(map[string]string)
}
for key, value := range raw {
if !knownFields[key] {
o.ExtraArgs[key] = fmt.Sprintf("%v", value)
}
}
return nil
}
func (o *MlxServerOptions) GetPort() int {
return o.Port
}
func (o *MlxServerOptions) SetPort(port int) {
o.Port = port
}
func (o *MlxServerOptions) GetHost() string {
return o.Host
}
func (o *MlxServerOptions) Validate() error {
if o == nil {
return validation.ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
}
if err := validation.ValidateStructStrings(o, ""); err != nil {
return err
}
// Basic network validation for port
if o.Port < 0 || o.Port > 65535 {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
// Validate extra_args keys and values
for key, value := range o.ExtraArgs {
if err := validation.ValidateStringForInjection(key); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args key %q: %w", key, err))
}
if value != "" {
if err := validation.ValidateStringForInjection(value); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args value for %q: %w", key, err))
}
}
}
return nil
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]struct{}{} // MLX doesn't currently have []string fields
args := BuildCommandArgs(o, multipleFlags)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
func (o *MlxServerOptions) BuildDockerArgs() []string {
return []string{}
}
// ParseCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func (o *MlxServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]struct{}{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := parseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}
return &mlxOptions, nil
}

View File

@@ -1,56 +0,0 @@
package mlx
import (
"llamactl/pkg/backends"
)
type MlxServerOptions struct {
// Basic connection options
Model string `json:"model,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
// Model and adapter options
AdapterPath string `json:"adapter_path,omitempty"`
DraftModel string `json:"draft_model,omitempty"`
NumDraftTokens int `json:"num_draft_tokens,omitempty"`
TrustRemoteCode bool `json:"trust_remote_code,omitempty"`
// Logging and templates
LogLevel string `json:"log_level,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
UseDefaultChatTemplate bool `json:"use_default_chat_template,omitempty"`
ChatTemplateArgs string `json:"chat_template_args,omitempty"` // JSON string
// Sampling defaults
Temp float64 `json:"temp,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
return backends.BuildCommandArgs(o, multipleFlags)
}
// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
// Supports multiple formats:
// 1. Full command: "mlx_lm.server --model model/path"
// 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
// 3. Args only: "--model model/path --host 0.0.0.0"
// 4. Multiline commands with backslashes
func ParseMlxCommand(command string) (*MlxServerOptions, error) {
executableNames := []string{"mlx_lm.server"}
var subcommandNames []string // MLX has no subcommands
multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
var mlxOptions MlxServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
return nil, err
}
return &mlxOptions, nil
}

View File

@@ -1,157 +0,0 @@
package mlx_test
import (
"llamactl/pkg/backends/mlx"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
},
{
name: "quoted strings",
command: `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := mlx.ParseMlxCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseMlxCommandValues(t *testing.T) {
command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
result, err := mlx.ParseMlxCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
}
if result.Port != 8080 {
t.Errorf("expected port 8080, got %d", result.Port)
}
if result.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", result.Temp)
}
if !result.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if result.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := &mlx.MlxServerOptions{
Model: "/test/model.mlx",
Host: "127.0.0.1",
Port: 8080,
Temp: 0.7,
TopP: 0.9,
TopK: 40,
MaxTokens: 2048,
TrustRemoteCode: true,
LogLevel: "DEBUG",
ChatTemplate: "custom template",
}
args := options.BuildCommandArgs()
// Check that all expected flags are present
expectedFlags := map[string]string{
"--model": "/test/model.mlx",
"--host": "127.0.0.1",
"--port": "8080",
"--log-level": "DEBUG",
"--chat-template": "custom template",
"--temp": "0.7",
"--top-p": "0.9",
"--top-k": "40",
"--max-tokens": "2048",
}
for i := 0; i < len(args); i++ {
if args[i] == "--trust-remote-code" {
continue // Boolean flag with no value
}
if args[i] == "--use-default-chat-template" {
continue // Boolean flag with no value
}
if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
if args[i+1] != expectedValue {
t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
}
}
}
// Check boolean flags
foundTrustRemoteCode := false
for _, arg := range args {
if arg == "--trust-remote-code" {
foundTrustRemoteCode = true
}
}
if !foundTrustRemoteCode {
t.Errorf("expected --trust-remote-code flag to be present")
}
}

331
pkg/backends/mlx_test.go Normal file
View File

@@ -0,0 +1,331 @@
package backends_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/testutil"
"testing"
)
func TestParseMlxCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.MlxServerOptions)
}{
{
name: "basic command",
command: "mlx_lm.server --model /path/to/model --host 0.0.0.0",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.Host != "0.0.0.0" {
t.Errorf("expected host '0.0.0.0', got '%s'", opts.Host)
}
},
},
{
name: "args only",
command: "--model /path/to/model --port 8080",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("expected port 8080, got %d", opts.Port)
}
},
},
{
name: "mixed flag formats",
command: "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/model" {
t.Errorf("expected model '/path/model', got '%s'", opts.Model)
}
if opts.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", opts.Temp)
}
if !opts.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
},
},
{
name: "multiple value types",
command: "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/test/model.mlx" {
t.Errorf("expected model '/test/model.mlx', got '%s'", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("expected port 8080, got %d", opts.Port)
}
if opts.Temp != 0.7 {
t.Errorf("expected temp 0.7, got %f", opts.Temp)
}
if !opts.TrustRemoteCode {
t.Errorf("expected trust_remote_code to be true")
}
if opts.LogLevel != "DEBUG" {
t.Errorf("expected log_level 'DEBUG', got '%s'", opts.LogLevel)
}
},
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `mlx_lm.server --model test.mlx --chat-template "unterminated`,
expectErr: true,
},
{
name: "malformed flag",
command: "mlx_lm.server ---model test.mlx",
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.MlxServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.MlxServerOptions)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
if tt.validate != nil {
tt.validate(t, result)
}
})
}
}
func TestMlxBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options backends.MlxServerOptions
expected []string
excluded []string
}{
{
name: "trust_remote_code true",
options: backends.MlxServerOptions{
TrustRemoteCode: true,
},
expected: []string{"--trust-remote-code"},
},
{
name: "trust_remote_code false",
options: backends.MlxServerOptions{
TrustRemoteCode: false,
},
excluded: []string{"--trust-remote-code"},
},
{
name: "multiple booleans",
options: backends.MlxServerOptions{
TrustRemoteCode: true,
UseDefaultChatTemplate: true,
},
expected: []string{"--trust-remote-code", "--use-default-chat-template"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !testutil.Contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if testutil.Contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
})
}
}
func TestMlxBuildCommandArgs_ZeroValues(t *testing.T) {
options := backends.MlxServerOptions{
Port: 0, // Should be excluded
TopK: 0, // Should be excluded
Temp: 0, // Should be excluded
Model: "", // Should be excluded
LogLevel: "", // Should be excluded
TrustRemoteCode: false, // Should be excluded
}
args := options.BuildCommandArgs()
// Zero values should not appear in arguments
excludedArgs := []string{
"--port", "0",
"--top-k", "0",
"--temp", "0",
"--model", "",
"--log-level", "",
"--trust-remote-code",
}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
}
func TestParseMlxCommand_ExtraArgs(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.MlxServerOptions)
}{
{
name: "extra args with known fields",
command: "mlx_lm.server --model /path/to/model --port 8080 --unknown-flag value --new-bool-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("expected port 8080, got %d", opts.Port)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["unknown_flag"]; !ok || val != "value" {
t.Errorf("expected extra_args[unknown_flag]='value', got '%s'", val)
}
if val, ok := opts.ExtraArgs["new_bool_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[new_bool_flag]='true', got '%s'", val)
}
},
},
{
name: "only extra args",
command: "mlx_lm.server --experimental-feature --custom-param test",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["experimental_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[experimental_feature]='true', got '%s'", val)
}
if val, ok := opts.ExtraArgs["custom_param"]; !ok || val != "test" {
t.Errorf("expected extra_args[custom_param]='test', got '%s'", val)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.MlxServerOptions
result, err := opts.ParseCommand(tt.command)
if tt.expectErr && err == nil {
t.Error("expected error but got none")
return
}
if !tt.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if !tt.expectErr && tt.validate != nil {
mlxOpts, ok := result.(*backends.MlxServerOptions)
if !ok {
t.Fatal("result is not *MlxServerOptions")
}
tt.validate(t, mlxOpts)
}
})
}
}
func TestMlxGetCommand_NoDocker(t *testing.T) {
// MLX backend should never use Docker
backendConfig := &config.BackendConfig{
MLX: config.BackendSettings{
Command: "/usr/bin/mlx-server",
Docker: &config.DockerSettings{
Enabled: true, // Even if enabled in config
Image: "test-image",
},
},
}
opts := backends.Options{
BackendType: backends.BackendTypeMlxLm,
MlxServerOptions: &backends.MlxServerOptions{
Model: "test-model",
},
}
tests := []struct {
name string
dockerEnabled *bool
commandOverride string
expected string
}{
{
name: "ignores docker in config",
dockerEnabled: nil,
commandOverride: "",
expected: "/usr/bin/mlx-server",
},
{
name: "ignores docker override",
dockerEnabled: boolPtr(true),
commandOverride: "",
expected: "/usr/bin/mlx-server",
},
{
name: "respects command override",
dockerEnabled: nil,
commandOverride: "/custom/mlx-server",
expected: "/custom/mlx-server",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride)
if result != tt.expected {
t.Errorf("GetCommand() = %v, want %v", result, tt.expected)
}
})
}
}

View File

@@ -4,13 +4,14 @@ import (
"encoding/json"
"fmt"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
)
// ParseCommand parses a command string into a target struct
func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
// parseCommand parses a command string into a target struct
func parseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]struct{}, target any) error {
// Normalize multiline commands
command = normalizeCommand(command)
if command == "" {
@@ -125,7 +126,7 @@ func extractArgs(command string, executableNames []string, subcommandNames []str
}
// parseFlags parses command line flags into a map
func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
func parseFlags(args []string, multiValuedFlags map[string]struct{}) (map[string]any, error) {
options := make(map[string]any)
for i := 0; i < len(args); i++ {
@@ -163,7 +164,7 @@ func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any
if hasValue {
// Handle multi-valued flags
if multiValuedFlags[flagName] {
if _, isMultiValue := multiValuedFlags[flagName]; isMultiValue {
if existing, ok := options[flagName].([]string); ok {
options[flagName] = append(existing, value)
} else {
@@ -211,3 +212,65 @@ func parseValue(value string) any {
// Return as string
return value
}
// setFieldValue sets a field value using reflection, handling type conversions
// Used by UnmarshalJSON implementations to handle alternative field names
func setFieldValue(field reflect.Value, value any) {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
case reflect.Slice:
// Handle string slices
if field.Type().Elem().Kind() == reflect.String {
if slice, ok := value.([]any); ok {
strSlice := make([]string, 0, len(slice))
for _, v := range slice {
if s, ok := v.(string); ok {
strSlice = append(strSlice, s)
}
}
field.Set(reflect.ValueOf(strSlice))
}
}
}
}
// getKnownFieldNames extracts all known field names from struct json tags
// Used by UnmarshalJSON implementations to identify unknown fields for ExtraArgs
func getKnownFieldNames(v any) map[string]bool {
fields := make(map[string]bool)
t := reflect.TypeOf(v).Elem()
for i := 0; i < t.NumField(); i++ {
field := t.Field(i)
jsonTag := field.Tag.Get("json")
if jsonTag != "" && jsonTag != "-" {
// Handle "name,omitempty" format
name := strings.Split(jsonTag, ",")[0]
fields[name] = true
}
}
return fields
}

View File

@@ -1,9 +1,24 @@
package vllm
package backends
import (
"llamactl/pkg/backends"
"encoding/json"
"fmt"
"llamactl/pkg/validation"
)
// vllmMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
// Based on vLLM's CLI argument definitions with action='append' or List types
// Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
var vllmMultiValuedFlags = map[string]struct{}{
"api_key": {}, // --api-key (action='append')
"allowed_origins": {}, // --allowed-origins (List type)
"allowed_methods": {}, // --allowed-methods (List type)
"allowed_headers": {}, // --allowed-headers (List type)
"middleware": {}, // --middleware (action='append')
"lora_modules": {}, // --lora-modules (custom LoRAParserAction, accepts multiple)
"prompt_adapters": {}, // --prompt-adapters (similar to lora-modules, accepts multiple)
}
type VllmServerOptions struct {
// Basic connection options (auto-assigned by llamactl)
Host string `json:"host,omitempty"`
@@ -128,60 +143,141 @@ type VllmServerOptions struct {
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
// ExtraArgs are additional command line arguments.
// Example: {"verbose": "", "log-file": "/logs/vllm.log"}
ExtraArgs map[string]string `json:"extra_args,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to collect unknown fields into ExtraArgs
func (o *VllmServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to capture all fields
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions VllmServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = VllmServerOptions(temp)
// Get all known canonical field names from struct tags
knownFields := getKnownFieldNames(o)
// Collect unknown fields into ExtraArgs
if o.ExtraArgs == nil {
o.ExtraArgs = make(map[string]string)
}
for key, value := range raw {
if !knownFields[key] {
o.ExtraArgs[key] = fmt.Sprintf("%v", value)
}
}
return nil
}
func (o *VllmServerOptions) GetPort() int {
return o.Port
}
func (o *VllmServerOptions) SetPort(port int) {
o.Port = port
}
func (o *VllmServerOptions) GetHost() string {
return o.Host
}
func (o *VllmServerOptions) Validate() error {
if o == nil {
return validation.ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
}
// Use reflection to check all string fields for injection patterns
if err := validation.ValidateStructStrings(o, ""); err != nil {
return err
}
// Basic network validation for port
if o.Port < 0 || o.Port > 65535 {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
// Validate extra_args keys and values
for key, value := range o.ExtraArgs {
if err := validation.ValidateStringForInjection(key); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args key %q: %w", key, err))
}
if value != "" {
if err := validation.ValidateStringForInjection(value); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args value for %q: %w", key, err))
}
}
}
return nil
}
// BuildCommandArgs converts VllmServerOptions to command line arguments
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
// For vLLM native, model is a positional argument after "serve"
func (o *VllmServerOptions) BuildCommandArgs() []string {
var args []string
// Add model as positional argument if specified
// Add model as positional argument if specified (for native execution)
if o.Model != "" {
args = append(args, o.Model)
}
// Create a copy of the options without the Model field to avoid including it as --model flag
// Create a copy without Model field to avoid --model flag
optionsCopy := *o
optionsCopy.Model = "" // Clear model field so it won't be included as a flag
optionsCopy.Model = ""
multipleFlags := map[string]bool{
"api-key": true,
"allowed-origins": true,
"allowed-methods": true,
"allowed-headers": true,
"middleware": true,
}
// Use package-level multipleFlags variable
// Build the rest of the arguments as flags
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
flagArgs := BuildCommandArgs(&optionsCopy, vllmMultiValuedFlags)
args = append(args, flagArgs...)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
func (o *VllmServerOptions) BuildDockerArgs() []string {
var args []string
// Use package-level multipleFlags variable
flagArgs := BuildCommandArgs(o, vllmMultiValuedFlags)
args = append(args, flagArgs...)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
// ParseCommand parses a vLLM serve command string into VllmServerOptions
// Supports multiple formats:
// 1. Full command: "vllm serve --model MODEL_NAME --other-args"
// 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
// 3. Serve only: "serve --model MODEL_NAME --other-args"
// 4. Args only: "--model MODEL_NAME --other-args"
// 5. Multiline commands with backslashes
func ParseVllmCommand(command string) (*VllmServerOptions, error) {
func (o *VllmServerOptions) ParseCommand(command string) (any, error) {
executableNames := []string{"vllm"}
subcommandNames := []string{"serve"}
multiValuedFlags := map[string]bool{
"middleware": true,
"api_key": true,
"allowed_origins": true,
"allowed_methods": true,
"allowed_headers": true,
"lora_modules": true,
"prompt_adapters": true,
}
var vllmOptions VllmServerOptions
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
if err := parseCommand(command, executableNames, subcommandNames, vllmMultiValuedFlags, &vllmOptions); err != nil {
return nil, err
}

View File

@@ -1,153 +0,0 @@
package vllm_test
import (
"llamactl/pkg/backends/vllm"
"slices"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := vllm.ParseVllmCommand(tt.command)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
}
})
}
}
func TestParseVllmCommandValues(t *testing.T) {
command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
result, err := vllm.ParseVllmCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", result.Model)
}
if result.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
}
if result.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
}
if !result.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
}
}
func TestBuildCommandArgs(t *testing.T) {
options := vllm.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !contains(args, "--host") {
t.Errorf("Expected --host not found in %v", args)
}
if !contains(args, "--port") {
t.Errorf("Expected --port not found in %v", args)
}
// Check array handling (multiple flags)
allowedOriginsCount := 0
for i := range args {
if args[i] == "--allowed-origins" {
allowedOriginsCount++
}
}
if allowedOriginsCount != 2 {
t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
}
}
// Helper functions
func contains(slice []string, item string) bool {
return slices.Contains(slice, item)
}
func containsFlagWithValue(args []string, flag, value string) bool {
for i, arg := range args {
if arg == flag && i+1 < len(args) && args[i+1] == value {
return true
}
}
return false
}

414
pkg/backends/vllm_test.go Normal file
View File

@@ -0,0 +1,414 @@
package backends_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/testutil"
"testing"
)
func TestParseVllmCommand(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.VllmServerOptions)
}{
{
name: "basic vllm serve command",
command: "vllm serve microsoft/DialoGPT-medium",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "microsoft/DialoGPT-medium" {
t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
}
},
},
{
name: "serve only command",
command: "serve microsoft/DialoGPT-medium",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "microsoft/DialoGPT-medium" {
t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
}
},
},
{
name: "positional model with flags",
command: "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "microsoft/DialoGPT-medium" {
t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
}
if opts.TensorParallelSize != 2 {
t.Errorf("expected tensor_parallel_size 2, got %d", opts.TensorParallelSize)
}
},
},
{
name: "model with path",
command: "vllm serve /path/to/model --gpu-memory-utilization 0.8",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", opts.GPUMemoryUtilization)
}
},
},
{
name: "multiple value types",
command: "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "test-model" {
t.Errorf("expected model 'test-model', got '%s'", opts.Model)
}
if opts.TensorParallelSize != 4 {
t.Errorf("expected tensor_parallel_size 4, got %d", opts.TensorParallelSize)
}
if opts.GPUMemoryUtilization != 0.8 {
t.Errorf("expected gpu_memory_utilization 0.8, got %f", opts.GPUMemoryUtilization)
}
if !opts.EnableLogOutputs {
t.Errorf("expected enable_log_outputs true, got %v", opts.EnableLogOutputs)
}
},
},
{
name: "empty command",
command: "",
expectErr: true,
},
{
name: "unterminated quote",
command: `vllm serve "unterminated`,
expectErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.VllmServerOptions
resultAny, err := opts.ParseCommand(tt.command)
result, _ := resultAny.(*backends.VllmServerOptions)
if tt.expectErr {
if err == nil {
t.Errorf("expected error but got none")
}
return
}
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if result == nil {
t.Errorf("expected result but got nil")
return
}
if tt.validate != nil {
tt.validate(t, result)
}
})
}
}
func TestParseVllmCommandArrays(t *testing.T) {
command := "vllm serve test-model --middleware auth.py --middleware=cors.py --api-key key1 --api-key key2"
var opts backends.VllmServerOptions
resultAny, err := opts.ParseCommand(command)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
result, ok := resultAny.(*backends.VllmServerOptions)
if !ok {
t.Fatalf("expected *VllmServerOptions, got %T", resultAny)
}
expectedMiddleware := []string{"auth.py", "cors.py"}
if len(result.Middleware) != len(expectedMiddleware) {
t.Errorf("expected %d middleware items, got %d", len(expectedMiddleware), len(result.Middleware))
}
for i, v := range expectedMiddleware {
if i >= len(result.Middleware) || result.Middleware[i] != v {
t.Errorf("expected middleware[%d]=%s got %s", i, v, result.Middleware[i])
}
}
expectedAPIKeys := []string{"key1", "key2"}
if len(result.APIKey) != len(expectedAPIKeys) {
t.Errorf("expected %d api keys, got %d", len(expectedAPIKeys), len(result.APIKey))
}
for i, v := range expectedAPIKeys {
if i >= len(result.APIKey) || result.APIKey[i] != v {
t.Errorf("expected api_key[%d]=%s got %s", i, v, result.APIKey[i])
}
}
}
func TestVllmBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct {
name string
options backends.VllmServerOptions
expected []string
excluded []string
}{
{
name: "enable_log_outputs true",
options: backends.VllmServerOptions{
EnableLogOutputs: true,
},
expected: []string{"--enable-log-outputs"},
},
{
name: "enable_log_outputs false",
options: backends.VllmServerOptions{
EnableLogOutputs: false,
},
excluded: []string{"--enable-log-outputs"},
},
{
name: "multiple booleans",
options: backends.VllmServerOptions{
EnableLogOutputs: true,
TrustRemoteCode: true,
EnablePrefixCaching: true,
DisableLogStats: false,
},
expected: []string{"--enable-log-outputs", "--trust-remote-code", "--enable-prefix-caching"},
excluded: []string{"--disable-log-stats"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := tt.options.BuildCommandArgs()
for _, expectedArg := range tt.expected {
if !testutil.Contains(args, expectedArg) {
t.Errorf("Expected argument %q not found in %v", expectedArg, args)
}
}
for _, excludedArg := range tt.excluded {
if testutil.Contains(args, excludedArg) {
t.Errorf("Excluded argument %q found in %v", excludedArg, args)
}
}
})
}
}
func TestVllmBuildCommandArgs_ZeroValues(t *testing.T) {
options := backends.VllmServerOptions{
Port: 0, // Should be excluded
TensorParallelSize: 0, // Should be excluded
GPUMemoryUtilization: 0, // Should be excluded
Model: "", // Should be excluded (positional arg)
Host: "", // Should be excluded
EnableLogOutputs: false, // Should be excluded
}
args := options.BuildCommandArgs()
// Zero values should not appear in arguments
excludedArgs := []string{
"--port", "0",
"--tensor-parallel-size", "0",
"--gpu-memory-utilization", "0",
"--host", "",
"--enable-log-outputs",
}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
}
}
// Model should not be present as positional arg when empty
if len(args) > 0 && args[0] == "" {
t.Errorf("Empty model should not be present as positional argument")
}
}
func TestVllmBuildCommandArgs_ArrayFields(t *testing.T) {
options := backends.VllmServerOptions{
AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
AllowedMethods: []string{"GET", "POST"},
Middleware: []string{"middleware1", "middleware2", "middleware3"},
}
args := options.BuildCommandArgs()
// Check that each array value appears with its flag
expectedOccurrences := map[string][]string{
"--allowed-origins": {"http://localhost:3000", "https://example.com"},
"--allowed-methods": {"GET", "POST"},
"--middleware": {"middleware1", "middleware2", "middleware3"},
}
for flag, values := range expectedOccurrences {
for _, value := range values {
if !testutil.ContainsFlagWithValue(args, flag, value) {
t.Errorf("Expected %s %s, not found in %v", flag, value, args)
}
}
}
}
func TestVllmBuildCommandArgs_EmptyArrays(t *testing.T) {
options := backends.VllmServerOptions{
AllowedOrigins: []string{}, // Empty array should not generate args
Middleware: []string{}, // Empty array should not generate args
}
args := options.BuildCommandArgs()
excludedArgs := []string{"--allowed-origins", "--middleware"}
for _, excludedArg := range excludedArgs {
if testutil.Contains(args, excludedArg) {
t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
}
}
}
func TestVllmBuildCommandArgs_PositionalModel(t *testing.T) {
options := backends.VllmServerOptions{
Model: "microsoft/DialoGPT-medium",
Port: 8080,
Host: "localhost",
TensorParallelSize: 2,
GPUMemoryUtilization: 0.8,
EnableLogOutputs: true,
}
args := options.BuildCommandArgs()
// Check that model is the first positional argument (not a --model flag)
if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
}
// Check that --model flag is NOT present (since model should be positional)
if testutil.Contains(args, "--model") {
t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
}
// Check other flags
if !testutil.ContainsFlagWithValue(args, "--tensor-parallel-size", "2") {
t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
}
if !testutil.ContainsFlagWithValue(args, "--gpu-memory-utilization", "0.8") {
t.Errorf("Expected --gpu-memory-utilization 0.8 not found in %v", args)
}
if !testutil.Contains(args, "--enable-log-outputs") {
t.Errorf("Expected --enable-log-outputs not found in %v", args)
}
if !testutil.ContainsFlagWithValue(args, "--host", "localhost") {
t.Errorf("Expected --host localhost not found in %v", args)
}
if !testutil.ContainsFlagWithValue(args, "--port", "8080") {
t.Errorf("Expected --port 8080 not found in %v", args)
}
}
func TestParseVllmCommand_ExtraArgs(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.VllmServerOptions)
}{
{
name: "extra args with known fields",
command: "vllm serve llama-model --tensor-parallel-size 2 --unknown-flag value --new-bool-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "llama-model" {
t.Errorf("expected model 'llama-model', got '%s'", opts.Model)
}
if opts.TensorParallelSize != 2 {
t.Errorf("expected tensor_parallel_size 2, got %d", opts.TensorParallelSize)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["unknown_flag"]; !ok || val != "value" {
t.Errorf("expected extra_args[unknown_flag]='value', got '%s'", val)
}
if val, ok := opts.ExtraArgs["new_bool_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[new_bool_flag]='true', got '%s'", val)
}
},
},
{
name: "only extra args",
command: "vllm serve model --experimental-feature --custom-param test",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["experimental_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[experimental_feature]='true', got '%s'", val)
}
if val, ok := opts.ExtraArgs["custom_param"]; !ok || val != "test" {
t.Errorf("expected extra_args[custom_param]='test', got '%s'", val)
}
},
},
{
name: "extra args without model positional",
command: "vllm serve --model my-model --new-feature enabled --beta-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "my-model" {
t.Errorf("expected model 'my-model', got '%s'", opts.Model)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["new_feature"]; !ok || val != "enabled" {
t.Errorf("expected extra_args[new_feature]='enabled', got '%s'", val)
}
if val, ok := opts.ExtraArgs["beta_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[beta_flag]='true', got '%s'", val)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.VllmServerOptions
result, err := opts.ParseCommand(tt.command)
if tt.expectErr && err == nil {
t.Error("expected error but got none")
return
}
if !tt.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if !tt.expectErr && tt.validate != nil {
vllmOpts, ok := result.(*backends.VllmServerOptions)
if !ok {
t.Fatal("result is not *VllmServerOptions")
}
tt.validate(t, vllmOpts)
}
})
}
}

View File

@@ -1,164 +1,62 @@
package config
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// BackendConfig contains backend executable configurations
type BackendConfig struct {
// Path to llama-server executable (llama.cpp backend)
LlamaExecutable string `yaml:"llama_executable"`
// Path to mlx_lm executable (MLX-LM backend)
MLXLMExecutable string `yaml:"mlx_lm_executable"`
// Path to vllm executable (vLLM backend)
VllmExecutable string `yaml:"vllm_executable"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
// Server port to bind to
Port int `yaml:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where all llamactl data will be stored (instances.json, logs, etc.)
DataDir string `yaml:"data_dir"`
// Instance config directory override
InstancesDir string `yaml:"configs_dir"`
// Logs directory override
LogsDir string `yaml:"logs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (AppConfig, error) {
// 1. Start with defaults
cfg := AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
},
Backends: BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
VllmExecutable: "vllm",
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
defaultDataDir := getDefaultDataDir()
cfg := getDefaultConfig(defaultDataDir)
// 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil {
return cfg, err
}
// If local node is not defined in nodes, add it with default config
if _, ok := cfg.Nodes[cfg.LocalNode]; !ok {
cfg.Nodes[cfg.LocalNode] = NodeConfig{}
}
// 3. Override with environment variables
loadEnvVars(&cfg)
// Log warning if deprecated inference keys are present
if len(cfg.Auth.InferenceKeys) > 0 {
log.Println("⚠️ Config-based inference keys are no longer supported and will be ignored.")
log.Println(" Please create inference keys in web UI or via management API.")
}
// Set default directories if not specified
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.DataDir, "instances")
} else {
// Log deprecation warning if using custom instances dir
log.Println("⚠️ Instances directory is deprecated and will be removed in future versions. Instances are persisted in the database.")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.DataDir, "logs")
}
if cfg.Database.Path == "" {
cfg.Database.Path = filepath.Join(cfg.DataDir, "llamactl.db")
}
// Validate port range
if cfg.Instances.PortRange[0] <= 0 || cfg.Instances.PortRange[1] <= 0 || cfg.Instances.PortRange[0] >= cfg.Instances.PortRange[1] {
return AppConfig{}, fmt.Errorf("invalid port range: %v", cfg.Instances.PortRange)
}
return cfg, nil
}
@@ -179,6 +77,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
log.Printf("Read config at %s", path)
return nil
}
}
@@ -186,203 +85,30 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
return nil
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
// SanitizedCopy returns a copy of the AppConfig with sensitive information removed
func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
// Deep copy via JSON marshal/unmarshal to avoid concurrent map access
data, err := json.Marshal(cfg)
if err != nil {
log.Printf("Failed to marshal config for sanitization: %v", err)
return AppConfig{}, err
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.Instances.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
var sanitized AppConfig
if err := json.Unmarshal(data, &sanitized); err != nil {
log.Printf("Failed to unmarshal config for sanitization: %v", err)
return AppConfig{}, err
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Backends.LlamaExecutable = llamaExec
}
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
cfg.Backends.MLXLMExecutable = mlxLMExec
}
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
cfg.Backends.VllmExecutable = vllmExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
// Clear sensitive information
sanitized.Auth.InferenceKeys = []string{}
sanitized.Auth.ManagementKeys = []string{}
// Clear API keys from nodes
for nodeName, node := range sanitized.Nodes {
node.APIKey = ""
sanitized.Nodes[nodeName] = node
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
default:
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
locations = append(locations, "/etc/llamactl/config.yaml")
}
return locations
return sanitized, nil
}

View File

@@ -7,6 +7,20 @@ import (
"testing"
)
// GetBackendSettings resolves backend settings
func getBackendSettings(bc *config.BackendConfig, backendType string) config.BackendSettings {
switch backendType {
case "llama-cpp":
return bc.LlamaCpp
case "vllm":
return bc.VLLM
case "mlx":
return bc.MLX
default:
return config.BackendSettings{}
}
}
func TestLoadConfig_Defaults(t *testing.T) {
// Test loading config when no file exists and no env vars set
cfg, err := config.LoadConfig("nonexistent-file.yaml")
@@ -64,8 +78,8 @@ server:
port: 9090
instances:
port_range: [7000, 8000]
logs_dir: "/custom/logs"
max_instances: 5
logs_dir: "/custom/logs"
llama_executable: "/usr/bin/llama-server"
default_auto_restart: false
default_max_restarts: 10
@@ -117,7 +131,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -150,8 +163,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
}
if cfg.Backends.LlamaExecutable != "/env/llama-server" {
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
if cfg.Backends.LlamaCpp.Command != "llama-server" {
t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
}
if cfg.Instances.DefaultAutoRestart {
t.Error("Expected auto restart to be false")
@@ -206,30 +219,6 @@ instances:
}
}
func TestLoadConfig_InvalidYAML(t *testing.T) {
// Create a temporary config file with invalid YAML
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "invalid-config.yaml")
invalidContent := `
server:
host: "localhost"
port: not-a-number
instances:
[invalid yaml structure
`
err := os.WriteFile(configFile, []byte(invalidContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
_, err = config.LoadConfig(configFile)
if err == nil {
t.Error("Expected LoadConfig to return error for invalid YAML")
}
}
func TestParsePortRange(t *testing.T) {
tests := []struct {
name string
@@ -258,94 +247,256 @@ func TestParsePortRange(t *testing.T) {
}
}
// Remove the getDefaultConfigLocations test entirely
func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
// Test that environment variables are properly converted to correct types
testCases := []struct {
envVar string
envValue string
checkFn func(*config.AppConfig) bool
desc string
}{
{
envVar: "LLAMACTL_PORT",
envValue: "invalid-port",
checkFn: func(c *config.AppConfig) bool { return c.Server.Port == 8080 }, // Should keep default
desc: "invalid port number should keep default",
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "custom-llama",
Args: []string{"--verbose"},
Docker: &config.DockerSettings{
Enabled: true,
Image: "custom-llama:latest",
Args: []string{"--gpus", "all"},
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
},
},
{
envVar: "LLAMACTL_MAX_INSTANCES",
envValue: "not-a-number",
checkFn: func(c *config.AppConfig) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
desc: "invalid max instances should keep default",
VLLM: config.BackendSettings{
Command: "custom-vllm",
Args: []string{"serve", "--debug"},
},
{
envVar: "LLAMACTL_DEFAULT_AUTO_RESTART",
envValue: "invalid-bool",
checkFn: func(c *config.AppConfig) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
desc: "invalid boolean should keep default",
},
{
envVar: "LLAMACTL_INSTANCE_PORT_RANGE",
envValue: "invalid-range",
checkFn: func(c *config.AppConfig) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
desc: "invalid port range should keep default",
MLX: config.BackendSettings{
Command: "custom-mlx",
Args: []string{},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
os.Setenv(tc.envVar, tc.envValue)
defer os.Unsetenv(tc.envVar)
// Test llama-cpp with Docker
settings := getBackendSettings(bc, "llama-cpp")
if settings.Command != "custom-llama" {
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
}
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
}
if settings.Docker == nil || !settings.Docker.Enabled {
t.Error("Expected Docker to be enabled")
}
if settings.Docker.Image != "custom-llama:latest" {
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Test vLLM without Docker
settings = getBackendSettings(bc, "vllm")
if settings.Command != "custom-vllm" {
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
}
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
}
if settings.Docker != nil && settings.Docker.Enabled {
t.Error("Expected Docker to be disabled or nil")
}
if !tc.checkFn(&cfg) {
t.Errorf("Test failed: %s", tc.desc)
}
})
// Test MLX
settings = getBackendSettings(bc, "mlx")
if settings.Command != "custom-mlx" {
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
}
}
func TestLoadConfig_PartialFile(t *testing.T) {
// Test that partial config files work correctly (missing sections should use defaults)
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "partial-config.yaml")
// Only specify server config, instances should use defaults
configContent := `
server:
host: "partial-host"
port: 7777
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly
envVars := map[string]string{
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
"LLAMACTL_VLLM_COMMAND": "env-vllm",
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
"LLAMACTL_MLX_COMMAND": "env-mlx",
}
cfg, err := config.LoadConfig(configFile)
// Set env vars and ensure cleanup
for key, value := range envVars {
os.Setenv(key, value)
defer os.Unsetenv(key)
}
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
// Server config should be from file
if cfg.Server.Host != "partial-host" {
t.Errorf("Expected host 'partial-host', got %q", cfg.Server.Host)
// Verify llama-cpp environment overrides
if cfg.Backends.LlamaCpp.Command != "env-llama" {
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
}
if cfg.Server.Port != 7777 {
t.Errorf("Expected port 7777, got %d", cfg.Server.Port)
expectedArgs := []string{"--verbose", "--threads", "4"}
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
}
if !cfg.Backends.LlamaCpp.Docker.Enabled {
t.Error("Expected llama Docker to be enabled")
}
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
}
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
}
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
}
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
}
// Instances config should be defaults
if cfg.Instances.PortRange != [2]int{8000, 9000} {
t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange)
// Verify vLLM environment overrides
if cfg.Backends.VLLM.Command != "env-vllm" {
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
}
if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
if cfg.Backends.VLLM.Docker.Enabled {
t.Error("Expected vLLM Docker to be disabled")
}
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
}
// Verify MLX environment overrides
if cfg.Backends.MLX.Command != "env-mlx" {
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
}
}
func TestLoadConfig_LocalNode(t *testing.T) {
t.Run("default local node", func(t *testing.T) {
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "main" {
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
}
})
t.Run("local node from file", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "worker1"
nodes:
worker1:
address: ""
worker2:
address: "http://192.168.1.10:8080"
api_key: "test-key"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "worker1" {
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
}
// Verify nodes map (includes default "main" + worker1 + worker2)
if len(cfg.Nodes) != 2 {
t.Errorf("Expected 2 nodes (default worker1 + worker2), got %d", len(cfg.Nodes))
}
// Verify local node exists and is empty
localNode, exists := cfg.Nodes["worker1"]
if !exists {
t.Error("Expected local node 'worker1' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
if localNode.APIKey != "" {
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
}
// Verify remote node
remoteNode, exists := cfg.Nodes["worker2"]
if !exists {
t.Error("Expected remote node 'worker2' to exist in nodes map")
}
if remoteNode.Address != "http://192.168.1.10:8080" {
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
}
// Verify default main node still exists
_, exists = cfg.Nodes["main"]
if exists {
t.Error("Default 'main' node should not exist when local_node is overridden")
}
})
t.Run("custom local node name in config", func(t *testing.T) {
tempDir := t.TempDir()
configFile := filepath.Join(tempDir, "test-config.yaml")
configContent := `
local_node: "primary"
nodes:
primary:
address: ""
worker1:
address: "http://192.168.1.10:8080"
`
err := os.WriteFile(configFile, []byte(configContent), 0644)
if err != nil {
t.Fatalf("Failed to write test config file: %v", err)
}
cfg, err := config.LoadConfig(configFile)
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "primary" {
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
}
// Verify nodes map includes default "main" + primary + worker1
if len(cfg.Nodes) != 2 {
t.Errorf("Expected 2 nodes (primary + worker1), got %d", len(cfg.Nodes))
}
localNode, exists := cfg.Nodes["primary"]
if !exists {
t.Error("Expected local node 'primary' to exist in nodes map")
}
if localNode.Address != "" {
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
}
})
t.Run("local node from environment variable", func(t *testing.T) {
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil {
t.Fatalf("LoadConfig failed: %v", err)
}
if cfg.LocalNode != "custom-node" {
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
}
})
}

154
pkg/config/defaults.go Normal file
View File

@@ -0,0 +1,154 @@
package config
import (
"os"
"path/filepath"
"runtime"
"time"
)
func getDefaultConfig(dataDir string) AppConfig {
return AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false,
},
LocalNode: "main",
Nodes: map[string]NodeConfig{},
DataDir: dataDir,
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(dataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(dataDir, "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
// NOTE: empty string is set as placeholder value since InstancesDir
// should be relative path to DataDir if not explicitly set.
InstancesDir: "",
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
LogsDir: "", // Will be set to data_dir/logs if empty
LogRotationEnabled: true,
LogRotationMaxSize: 100,
LogRotationCompress: false,
},
Database: DatabaseConfig{
Path: "", // Will be set to data_dir/llamactl.db if empty
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
}
// getDefaultDataDir returns platform-specific default data directory
func getDefaultDataDir() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
default:
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
locations = append(locations, "/etc/llamactl/config.yaml")
}
return locations
}

325
pkg/config/env.go Normal file
View File

@@ -0,0 +1,325 @@
package config
import (
"os"
"strconv"
"strings"
"time"
)
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
// LlamaCpp backend
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
}
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
// Database config
if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
cfg.Database.Path = dbPath
}
if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
if m, err := strconv.Atoi(maxOpenConns); err == nil {
cfg.Database.MaxOpenConnections = m
}
}
if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
if m, err := strconv.Atoi(maxIdleConns); err == nil {
cfg.Database.MaxIdleConnections = m
}
}
if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
if d, err := time.ParseDuration(connMaxLifetime); err == nil {
cfg.Database.ConnMaxLifetime = d
}
}
// Log rotation config
if logRotationEnabled := os.Getenv("LLAMACTL_LOG_ROTATION_ENABLED"); logRotationEnabled != "" {
if b, err := strconv.ParseBool(logRotationEnabled); err == nil {
cfg.Instances.LogRotationEnabled = b
}
}
if logRotationMaxSize := os.Getenv("LLAMACTL_LOG_ROTATION_MAX_SIZE"); logRotationMaxSize != "" {
if m, err := strconv.Atoi(logRotationMaxSize); err == nil {
cfg.Instances.LogRotationMaxSize = m
}
}
if logRotationCompress := os.Getenv("LLAMACTL_LOG_ROTATION_COMPRESS"); logRotationCompress != "" {
if b, err := strconv.ParseBool(logRotationCompress); err == nil {
cfg.Instances.LogRotationCompress = b
}
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}

149
pkg/config/types.go Normal file
View File

@@ -0,0 +1,149 @@
package config
import "time"
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command" json:"command"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty" json:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled" json:"enabled"`
Image string `yaml:"image" json:"image"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm" json:"vllm"`
MLX BackendSettings `yaml:"mlx" json:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server" json:"server"`
Backends BackendConfig `yaml:"backends" json:"backends"`
Instances InstancesConfig `yaml:"instances" json:"instances"`
Database DatabaseConfig `yaml:"database" json:"database"`
Auth AuthConfig `yaml:"auth" json:"auth"`
LocalNode string `yaml:"local_node,omitempty" json:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
// Directory where all llamactl data will be stored (database, instances, logs, etc.)
DataDir string `yaml:"data_dir" json:"data_dir"`
Version string `yaml:"-" json:"version"`
CommitHash string `yaml:"-" json:"commit_hash"`
BuildTime string `yaml:"-" json:"build_time"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host" json:"host"`
// Server port to bind to
Port int `yaml:"port" json:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DatabaseConfig contains database configuration settings
type DatabaseConfig struct {
// Database file path (relative to the top-level data_dir or absolute)
Path string `yaml:"path" json:"path"`
// Connection settings
MaxOpenConnections int `yaml:"max_open_connections" json:"max_open_connections"`
MaxIdleConnections int `yaml:"max_idle_connections" json:"max_idle_connections"`
ConnMaxLifetime time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range" json:"port_range"`
// Instance config directory override (relative to data_dir if not absolute)
InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances" json:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
// Logs directory override (relative to data_dir if not absolute)
LogsDir string `yaml:"logs_dir" json:"logs_dir"`
// Log rotation enabled
LogRotationEnabled bool `yaml:"log_rotation_enabled" default:"true"`
// Maximum log file size in MB before rotation
LogRotationMaxSize int `yaml:"log_rotation_max_size" default:"100"`
// Whether to compress rotated log files
LogRotationCompress bool `yaml:"log_rotation_compress" default:"false"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
}
type NodeConfig struct {
Address string `yaml:"address" json:"address"`
APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
}

211
pkg/database/apikeys.go Normal file
View File

@@ -0,0 +1,211 @@
package database
import (
"context"
"database/sql"
"fmt"
"llamactl/pkg/auth"
"time"
)
// CreateKey inserts a new API key with permissions (transactional)
func (db *sqliteDB) CreateKey(ctx context.Context, key *auth.APIKey, permissions []auth.KeyPermission) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()
// Insert the API key
query := `
INSERT INTO api_keys (key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
`
var expiresAt sql.NullInt64
if key.ExpiresAt != nil {
expiresAt = sql.NullInt64{Int64: *key.ExpiresAt, Valid: true}
}
result, err := tx.ExecContext(ctx, query,
key.KeyHash, key.Name, key.UserID, key.PermissionMode,
expiresAt, key.CreatedAt, key.UpdatedAt,
)
if err != nil {
return fmt.Errorf("failed to insert API key: %w", err)
}
keyID, err := result.LastInsertId()
if err != nil {
return fmt.Errorf("failed to get last insert ID: %w", err)
}
key.ID = int(keyID)
// Insert permissions if per-instance mode
if key.PermissionMode == auth.PermissionModePerInstance {
for _, perm := range permissions {
query := `
INSERT INTO key_permissions (key_id, instance_id)
VALUES (?, ?)
`
_, err := tx.ExecContext(ctx, query, key.ID, perm.InstanceID)
if err != nil {
return fmt.Errorf("failed to insert permission for instance %d: %w", perm.InstanceID, err)
}
}
}
return tx.Commit()
}
// GetKeyByID retrieves an API key by ID
func (db *sqliteDB) GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error) {
query := `
SELECT id, key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at, last_used_at
FROM api_keys
WHERE id = ?
`
var key auth.APIKey
var expiresAt sql.NullInt64
var lastUsedAt sql.NullInt64
err := db.QueryRowContext(ctx, query, id).Scan(
&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
&expiresAt, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
)
if err != nil {
if err == sql.ErrNoRows {
return nil, fmt.Errorf("API key not found")
}
return nil, fmt.Errorf("failed to query API key: %w", err)
}
if expiresAt.Valid {
key.ExpiresAt = &expiresAt.Int64
}
if lastUsedAt.Valid {
key.LastUsedAt = &lastUsedAt.Int64
}
return &key, nil
}
// GetUserKeys retrieves all API keys for a user
func (db *sqliteDB) GetUserKeys(ctx context.Context, userID string) ([]*auth.APIKey, error) {
query := `
SELECT id, key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at, last_used_at
FROM api_keys
WHERE user_id = ?
ORDER BY created_at DESC
`
rows, err := db.QueryContext(ctx, query, userID)
if err != nil {
return nil, fmt.Errorf("failed to query API keys: %w", err)
}
defer rows.Close()
var keys []*auth.APIKey
for rows.Next() {
var key auth.APIKey
var expiresAt sql.NullInt64
var lastUsedAt sql.NullInt64
err := rows.Scan(
&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
&expiresAt, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
)
if err != nil {
return nil, fmt.Errorf("failed to scan API key: %w", err)
}
if expiresAt.Valid {
key.ExpiresAt = &expiresAt.Int64
}
if lastUsedAt.Valid {
key.LastUsedAt = &lastUsedAt.Int64
}
keys = append(keys, &key)
}
return keys, nil
}
// GetActiveKeys retrieves all non-expired API keys
func (db *sqliteDB) GetActiveKeys(ctx context.Context) ([]*auth.APIKey, error) {
query := `
SELECT id, key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at, last_used_at
FROM api_keys
WHERE expires_at IS NULL OR expires_at > ?
ORDER BY created_at DESC
`
now := time.Now().Unix()
rows, err := db.QueryContext(ctx, query, now)
if err != nil {
return nil, fmt.Errorf("failed to query active API keys: %w", err)
}
defer rows.Close()
var keys []*auth.APIKey
for rows.Next() {
var key auth.APIKey
var expiresAt sql.NullInt64
var lastUsedAt sql.NullInt64
err := rows.Scan(
&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
&expiresAt, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
)
if err != nil {
return nil, fmt.Errorf("failed to scan API key: %w", err)
}
if expiresAt.Valid {
key.ExpiresAt = &expiresAt.Int64
}
if lastUsedAt.Valid {
key.LastUsedAt = &lastUsedAt.Int64
}
keys = append(keys, &key)
}
return keys, nil
}
// DeleteKey removes an API key (cascades to permissions)
func (db *sqliteDB) DeleteKey(ctx context.Context, id int) error {
query := `DELETE FROM api_keys WHERE id = ?`
result, err := db.ExecContext(ctx, query, id)
if err != nil {
return fmt.Errorf("failed to delete API key: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("API key not found")
}
return nil
}
// TouchKey updates the last_used_at timestamp
func (db *sqliteDB) TouchKey(ctx context.Context, id int) error {
query := `UPDATE api_keys SET last_used_at = ?, updated_at = ? WHERE id = ?`
now := time.Now().Unix()
_, err := db.ExecContext(ctx, query, now, now, id)
if err != nil {
return fmt.Errorf("failed to update last used timestamp: %w", err)
}
return nil
}

127
pkg/database/database.go Normal file
View File

@@ -0,0 +1,127 @@
package database
import (
"context"
"database/sql"
"fmt"
"llamactl/pkg/auth"
"llamactl/pkg/instance"
"log"
"path/filepath"
"time"
_ "github.com/mattn/go-sqlite3"
)
// InstanceStore defines interface for instance persistence operations
type InstanceStore interface {
Save(inst *instance.Instance) error
Delete(name string) error
LoadAll() ([]*instance.Instance, error)
Close() error
}
// AuthStore defines the interface for authentication operations
type AuthStore interface {
CreateKey(ctx context.Context, key *auth.APIKey, permissions []auth.KeyPermission) error
GetUserKeys(ctx context.Context, userID string) ([]*auth.APIKey, error)
GetActiveKeys(ctx context.Context) ([]*auth.APIKey, error)
GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error)
DeleteKey(ctx context.Context, id int) error
TouchKey(ctx context.Context, id int) error
GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPermission, error)
HasPermission(ctx context.Context, keyID, instanceID int) (bool, error)
}
// Config contains database configuration settings
type Config struct {
// Database file path (relative to data_dir or absolute)
Path string
// Connection settings
MaxOpenConnections int
MaxIdleConnections int
ConnMaxLifetime time.Duration
}
// sqliteDB wraps database connection with configuration
type sqliteDB struct {
*sql.DB
config *Config
}
// Open creates a new database connection with provided configuration
func Open(config *Config) (*sqliteDB, error) {
if config == nil {
return nil, fmt.Errorf("database config cannot be nil")
}
if config.Path == "" {
return nil, fmt.Errorf("database path cannot be empty")
}
// Ensure that database directory exists
dbDir := filepath.Dir(config.Path)
if dbDir != "." && dbDir != "/" {
// Directory will be created by manager if auto_create_dirs is enabled
log.Printf("Database will be created at: %s", config.Path)
}
// Open SQLite database with proper options
// - _journal_mode=WAL: Write-Ahead Logging for better concurrency
// - _busy_timeout=5000: Wait up to 5 seconds if database is locked
// - _foreign_keys=1: Enable foreign key constraints
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=5000&_foreign_keys=1", config.Path)
sqlDB, err := sql.Open("sqlite3", dsn)
if err != nil {
return nil, fmt.Errorf("failed to open database: %w", err)
}
// Configure connection pool
if config.MaxOpenConnections > 0 {
sqlDB.SetMaxOpenConns(config.MaxOpenConnections)
}
if config.MaxIdleConnections > 0 {
sqlDB.SetMaxIdleConns(config.MaxIdleConnections)
}
if config.ConnMaxLifetime > 0 {
sqlDB.SetConnMaxLifetime(config.ConnMaxLifetime)
}
// Verify database connection
if err := sqlDB.Ping(); err != nil {
sqlDB.Close()
return nil, fmt.Errorf("failed to ping database: %w", err)
}
log.Printf("Database connection established: %s", config.Path)
return &sqliteDB{
DB: sqlDB,
config: config,
}, nil
}
// Close closes database connection
func (db *sqliteDB) Close() error {
if db.DB != nil {
log.Println("Closing database connection")
// Checkpoint WAL to merge changes back to main database file
if _, err := db.DB.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
log.Printf("Warning: Failed to checkpoint WAL: %v", err)
}
return db.DB.Close()
}
return nil
}
// HealthCheck verifies that database is accessible
func (db *sqliteDB) HealthCheck() error {
if db.DB == nil {
return fmt.Errorf("database connection is nil")
}
return db.DB.Ping()
}

328
pkg/database/instances.go Normal file
View File

@@ -0,0 +1,328 @@
package database
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"log"
"time"
)
// instanceRow represents a row in the instances table
type instanceRow struct {
ID int
Name string
Status string
CreatedAt int64
UpdatedAt int64
OptionsJSON string
OwnerUserID sql.NullString
}
// Create inserts a new instance into the database
func (db *sqliteDB) Create(ctx context.Context, inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("instance cannot be nil")
}
opts := inst.GetOptions()
if opts == nil {
return fmt.Errorf("instance options cannot be nil")
}
// Convert instance to database row
row, err := db.instanceToRow(inst)
if err != nil {
return fmt.Errorf("failed to convert instance to row: %w", err)
}
// Insert into database
query := `
INSERT INTO instances (
name, status, created_at, updated_at, options_json, owner_user_id
) VALUES (?, ?, ?, ?, ?, ?)
`
result, err := db.DB.ExecContext(ctx, query,
row.Name, row.Status, row.CreatedAt, row.UpdatedAt, row.OptionsJSON, row.OwnerUserID,
)
if err != nil {
return fmt.Errorf("failed to insert instance: %w", err)
}
// Get the auto-generated ID and set it on the instance
id, err := result.LastInsertId()
if err != nil {
return fmt.Errorf("failed to get last insert ID: %w", err)
}
inst.ID = int(id)
return nil
}
// GetByName retrieves an instance by name
func (db *sqliteDB) GetByName(ctx context.Context, name string) (*instance.Instance, error) {
query := `
SELECT id, name, status, created_at, updated_at, options_json, owner_user_id
FROM instances
WHERE name = ?
`
var row instanceRow
err := db.DB.QueryRowContext(ctx, query, name).Scan(
&row.ID, &row.Name, &row.Status, &row.CreatedAt, &row.UpdatedAt, &row.OptionsJSON, &row.OwnerUserID,
)
if err == sql.ErrNoRows {
return nil, fmt.Errorf("instance not found: %s", name)
}
if err != nil {
return nil, fmt.Errorf("failed to query instance: %w", err)
}
return db.rowToInstance(&row)
}
// GetAll retrieves all instances from the database
func (db *sqliteDB) GetAll(ctx context.Context) ([]*instance.Instance, error) {
query := `
SELECT id, name, status, created_at, updated_at, options_json, owner_user_id
FROM instances
ORDER BY created_at ASC
`
rows, err := db.DB.QueryContext(ctx, query)
if err != nil {
return nil, fmt.Errorf("failed to query instances: %w", err)
}
defer rows.Close()
var instances []*instance.Instance
for rows.Next() {
var row instanceRow
err := rows.Scan(
&row.ID, &row.Name, &row.Status, &row.CreatedAt, &row.UpdatedAt, &row.OptionsJSON, &row.OwnerUserID,
)
if err != nil {
log.Printf("Failed to scan instance row: %v", err)
continue
}
inst, err := db.rowToInstance(&row)
if err != nil {
log.Printf("Failed to convert row to instance: %v", err)
continue
}
instances = append(instances, inst)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("error iterating rows: %w", err)
}
return instances, nil
}
// Update updates an existing instance
func (db *sqliteDB) Update(ctx context.Context, inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("instance cannot be nil")
}
opts := inst.GetOptions()
if opts == nil {
return fmt.Errorf("instance options cannot be nil")
}
// Convert instance to database row
row, err := db.instanceToRow(inst)
if err != nil {
return fmt.Errorf("failed to convert instance to row: %w", err)
}
// Update in database
query := `
UPDATE instances SET
status = ?, updated_at = ?, options_json = ?
WHERE name = ?
`
result, err := db.DB.ExecContext(ctx, query,
row.Status, row.UpdatedAt, row.OptionsJSON, row.Name,
)
if err != nil {
return fmt.Errorf("failed to update instance: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("instance not found: %s", inst.Name)
}
return nil
}
// UpdateStatus updates only the status of an instance (optimized operation)
func (db *sqliteDB) UpdateStatus(ctx context.Context, name string, status instance.Status) error {
// Convert status to string
statusJSON, err := status.MarshalJSON()
if err != nil {
return fmt.Errorf("failed to marshal status: %w", err)
}
var statusStr string
if err := json.Unmarshal(statusJSON, &statusStr); err != nil {
return fmt.Errorf("failed to unmarshal status string: %w", err)
}
query := `
UPDATE instances SET
status = ?,
updated_at = ?
WHERE name = ?
`
result, err := db.DB.ExecContext(ctx, query, statusStr, time.Now().Unix(), name)
if err != nil {
return fmt.Errorf("failed to update instance status: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("instance not found: %s", name)
}
return nil
}
// DeleteInstance removes an instance from the database
func (db *sqliteDB) DeleteInstance(ctx context.Context, name string) error {
query := `DELETE FROM instances WHERE name = ?`
result, err := db.DB.ExecContext(ctx, query, name)
if err != nil {
return fmt.Errorf("failed to delete instance: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("instance not found: %s", name)
}
return nil
}
// instanceToRow converts an Instance to a database row
func (db *sqliteDB) instanceToRow(inst *instance.Instance) (*instanceRow, error) {
opts := inst.GetOptions()
if opts == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
// Marshal options to JSON using the existing MarshalJSON method
optionsJSON, err := json.Marshal(opts)
if err != nil {
return nil, fmt.Errorf("failed to marshal options: %w", err)
}
// Convert status to string
statusJSON, err := inst.GetStatus().MarshalJSON()
if err != nil {
return nil, fmt.Errorf("failed to marshal status: %w", err)
}
var statusStr string
if err := json.Unmarshal(statusJSON, &statusStr); err != nil {
return nil, fmt.Errorf("failed to unmarshal status string: %w", err)
}
return &instanceRow{
Name: inst.Name,
Status: statusStr,
CreatedAt: inst.Created,
UpdatedAt: time.Now().Unix(),
OptionsJSON: string(optionsJSON),
}, nil
}
// rowToInstance converts a database row to an Instance
func (db *sqliteDB) rowToInstance(row *instanceRow) (*instance.Instance, error) {
// Unmarshal options from JSON using the existing UnmarshalJSON method
var opts instance.Options
if err := json.Unmarshal([]byte(row.OptionsJSON), &opts); err != nil {
return nil, fmt.Errorf("failed to unmarshal options: %w", err)
}
// Build complete instance JSON with all fields
instanceJSON, err := json.Marshal(map[string]any{
"id": row.ID,
"name": row.Name,
"created": row.CreatedAt,
"status": row.Status,
"options": json.RawMessage(row.OptionsJSON),
})
if err != nil {
return nil, fmt.Errorf("failed to marshal instance: %w", err)
}
// Unmarshal into a complete Instance
var inst instance.Instance
if err := json.Unmarshal(instanceJSON, &inst); err != nil {
return nil, fmt.Errorf("failed to unmarshal instance: %w", err)
}
// The UnmarshalJSON doesn't handle BackendOptions and Nodes (they have json:"-" tags)
// So we need to explicitly set the options again to ensure they're properly set
inst.SetOptions(&opts)
return &inst, nil
}
// Database interface implementation
// Save saves an instance to the database (insert or update)
func (db *sqliteDB) Save(inst *instance.Instance) error {
ctx := context.Background()
// Try to get existing instance
existing, err := db.GetByName(ctx, inst.Name)
if err != nil {
// Instance doesn't exist, create it
return db.Create(ctx, inst)
}
// Instance exists, update it
if existing != nil {
return db.Update(ctx, inst)
}
return db.Create(ctx, inst)
}
// Delete removes an instance from the database
func (db *sqliteDB) Delete(name string) error {
ctx := context.Background()
return db.DeleteInstance(ctx, name)
}
// LoadAll loads all instances from the database
func (db *sqliteDB) LoadAll() ([]*instance.Instance, error) {
ctx := context.Background()
return db.GetAll(ctx)
}

View File

@@ -0,0 +1,78 @@
package database
import (
"embed"
"fmt"
"log"
"github.com/golang-migrate/migrate/v4"
"github.com/golang-migrate/migrate/v4/database/sqlite3"
"github.com/golang-migrate/migrate/v4/source/iofs"
)
//go:embed migrations/*.sql
var migrationFiles embed.FS
// RunMigrations applies all pending database migrations
func RunMigrations(db *sqliteDB) error {
if db == nil || db.DB == nil {
return fmt.Errorf("database connection is nil")
}
// Create migration source from embedded files
sourceDriver, err := iofs.New(migrationFiles, "migrations")
if err != nil {
return fmt.Errorf("failed to create migration source: %w", err)
}
// Create database driver for migrations
dbDriver, err := sqlite3.WithInstance(db.DB, &sqlite3.Config{})
if err != nil {
return fmt.Errorf("failed to create database driver: %w", err)
}
// Create migrator
migrator, err := migrate.NewWithInstance("iofs", sourceDriver, "sqlite3", dbDriver)
if err != nil {
return fmt.Errorf("failed to create migrator: %w", err)
}
// Get current version
currentVersion, dirty, err := migrator.Version()
if err != nil && err != migrate.ErrNilVersion {
return fmt.Errorf("failed to get current migration version: %w", err)
}
if dirty {
return fmt.Errorf("database is in dirty state at version %d - manual intervention required", currentVersion)
}
// Run migrations
log.Printf("Running database migrations (current version: %v)", currentVersionString(currentVersion, err))
if err := migrator.Up(); err != nil {
if err == migrate.ErrNoChange {
log.Println("Database schema is up to date")
return nil
}
return fmt.Errorf("failed to run migrations: %w", err)
}
// Get new version
newVersion, _, err := migrator.Version()
if err != nil {
log.Printf("Migrations completed (unable to determine new version: %v)", err)
} else {
log.Printf("Migrations completed successfully (new version: %d)", newVersion)
}
return nil
}
// currentVersionString returns a string representation of the current version
func currentVersionString(version uint, err error) string {
if err == migrate.ErrNilVersion {
return "none"
}
return fmt.Sprintf("%d", version)
}

View File

@@ -0,0 +1,11 @@
-- Drop API key related indexes and tables first
DROP INDEX IF EXISTS idx_key_permissions_instance_id;
DROP INDEX IF EXISTS idx_api_keys_expires_at;
DROP INDEX IF EXISTS idx_api_keys_user_id;
DROP TABLE IF EXISTS key_permissions;
DROP TABLE IF EXISTS api_keys;
-- Drop instance related indexes and tables
DROP INDEX IF EXISTS idx_instances_status;
DROP INDEX IF EXISTS idx_instances_name;
DROP TABLE IF EXISTS instances;

View File

@@ -0,0 +1,60 @@
-- -----------------------------------------------------------------------------
-- Instances Table: Central configuration and state for LLM backends
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS instances (
-- Primary identification
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
-- Instance state
status TEXT NOT NULL CHECK(status IN ('stopped', 'running', 'failed', 'restarting', 'shutting_down')) DEFAULT 'stopped',
-- Timestamps (created_at stored as Unix timestamp for compatibility with existing JSON format)
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
-- All instance options stored as a single JSON blob
options_json TEXT NOT NULL,
-- Future: OIDC user ID for ownership
owner_user_id TEXT NULL
);
-- -----------------------------------------------------------------------------
-- Indexes for performance
-- -----------------------------------------------------------------------------
CREATE UNIQUE INDEX IF NOT EXISTS idx_instances_name ON instances(name);
CREATE INDEX IF NOT EXISTS idx_instances_status ON instances(status);
-- -----------------------------------------------------------------------------
-- API Keys Table: Database-backed inference API keys
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS api_keys (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key_hash TEXT NOT NULL,
name TEXT NOT NULL,
user_id TEXT NOT NULL,
permission_mode TEXT NOT NULL CHECK(permission_mode IN ('allow_all', 'per_instance')) DEFAULT 'per_instance',
expires_at INTEGER NULL,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_used_at INTEGER NULL
);
-- -----------------------------------------------------------------------------
-- Key Permissions Table: Per-instance permissions for API keys
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS key_permissions (
key_id INTEGER NOT NULL,
instance_id INTEGER NOT NULL,
PRIMARY KEY (key_id, instance_id),
FOREIGN KEY (key_id) REFERENCES api_keys (id) ON DELETE CASCADE,
FOREIGN KEY (instance_id) REFERENCES instances (id) ON DELETE CASCADE
);
-- -----------------------------------------------------------------------------
-- Indexes for API keys and permissions
-- -----------------------------------------------------------------------------
CREATE INDEX IF NOT EXISTS idx_api_keys_user_id ON api_keys(user_id);
CREATE INDEX IF NOT EXISTS idx_api_keys_expires_at ON api_keys(expires_at);
CREATE INDEX IF NOT EXISTS idx_key_permissions_instance_id ON key_permissions(instance_id);

View File

@@ -0,0 +1,57 @@
package database
import (
"context"
"database/sql"
"fmt"
"llamactl/pkg/auth"
)
// GetPermissions retrieves all permissions for a key
func (db *sqliteDB) GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPermission, error) {
query := `
SELECT key_id, instance_id
FROM key_permissions
WHERE key_id = ?
ORDER BY instance_id
`
rows, err := db.QueryContext(ctx, query, keyID)
if err != nil {
return nil, fmt.Errorf("failed to query key permissions: %w", err)
}
defer rows.Close()
var permissions []auth.KeyPermission
for rows.Next() {
var perm auth.KeyPermission
err := rows.Scan(&perm.KeyID, &perm.InstanceID)
if err != nil {
return nil, fmt.Errorf("failed to scan key permission: %w", err)
}
permissions = append(permissions, perm)
}
return permissions, nil
}
// HasPermission checks if key has inference permission for instance
func (db *sqliteDB) HasPermission(ctx context.Context, keyID, instanceID int) (bool, error) {
query := `
SELECT 1
FROM key_permissions
WHERE key_id = ? AND instance_id = ?
`
var exists int
err := db.QueryRowContext(ctx, query, keyID, instanceID).Scan(&exists)
if err != nil {
if err == sql.ErrNoRows {
// No permission record found, deny access
return false, nil
}
return false, fmt.Errorf("failed to check key permission: %w", err)
}
return true, nil
}

View File

@@ -1,257 +1,331 @@
package instance
import (
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"sync/atomic"
"time"
"llamactl/pkg/config"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// Instance represents a running instance of llama server
type Instance struct {
ID int `json:"id"`
Name string `json:"name"`
Created int64 `json:"created,omitempty"` // Unix timestamp when instance was created
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
// Global configuration
globalInstanceSettings *config.InstancesConfig
globalBackendSettings *config.BackendConfig
globalNodesConfig map[string]config.NodeConfig
localNodeName string `json:"-"` // Name of the local node for remote detection
// Status
Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
status *status `json:"-"`
options *options `json:"-"`
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
// Timeout management
lastRequestTime atomic.Int64 // Unix timestamp of last request
timeProvider TimeProvider `json:"-"` // Time provider for testing
// Components (can be nil for remote instances)
process *process `json:"-"`
proxy *proxy `json:"-"`
logger *logger `json:"-"`
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// New creates a new instance with the given name, log path, options and local node name
func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusChange func(oldStatus, newStatus Status)) *Instance {
globalInstanceSettings := &globalConfig.Instances
globalBackendSettings := &globalConfig.Backends
globalNodesConfig := globalConfig.Nodes
localNodeName := globalConfig.LocalNode
// Validate and copy options
options.ValidateAndApplyDefaults(name, globalInstanceSettings)
opts.validateAndApplyDefaults(name, globalInstanceSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
// Create status wrapper
status := newStatus(Stopped)
status.onStatusChange = onStatusChange
return &Process{
// Create options wrapper
options := newOptions(opts)
instance := &Instance{
ID: 0, // Will be set by database
Name: name,
options: options,
globalInstanceSettings: globalInstanceSettings,
globalBackendSettings: globalBackendSettings,
logger: logger,
timeProvider: realTimeProvider{},
globalNodesConfig: globalNodesConfig,
localNodeName: localNodeName,
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
status: status,
}
}
func (i *Process) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
var err error
instance.proxy, err = newProxy(instance)
if err != nil {
log.Println("Warning: Failed to create proxy for instance", instance.Name, "-", err)
}
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Port
}
// Only create logger, proxy, and process for local instances
if !instance.IsRemote() {
logRotationConfig := &LogRotationConfig{
Enabled: globalInstanceSettings.LogRotationEnabled,
MaxSize: globalInstanceSettings.LogRotationMaxSize,
Compress: globalInstanceSettings.LogRotationCompress,
}
instance.logger = newLogger(
name,
globalInstanceSettings.LogsDir,
logRotationConfig,
)
instance.process = newProcess(instance)
}
return 0
return instance
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
return i.options.LlamaServerOptions.Host
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
return i.options.MlxServerOptions.Host
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
return i.options.VllmServerOptions.Host
}
}
// Start starts the instance
func (i *Instance) Start() error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be started locally)", i.Name)
}
return ""
return i.process.start()
}
func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
// Stop stops the instance
func (i *Instance) Stop() error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be stopped locally)", i.Name)
}
return i.process.stop()
}
if options == nil {
// Restart restarts the instance
func (i *Instance) Restart() error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be restarted locally)", i.Name)
}
return i.process.restart()
}
// WaitForHealthy waits for the instance to become healthy
func (i *Instance) WaitForHealthy(timeout int) error {
if i.process == nil {
return fmt.Errorf("instance %s has no process component (remote instances cannot be health checked locally)", i.Name)
}
return i.process.waitForHealthy(timeout)
}
// GetOptions returns the current options
func (i *Instance) GetOptions() *Options {
if i.options == nil {
return nil
}
return i.options.get()
}
// GetStatus returns the current status
func (i *Instance) GetStatus() Status {
if i.status == nil {
return Stopped
}
return i.status.get()
}
// SetStatus sets the status
func (i *Instance) SetStatus(s Status) {
if i.status != nil {
i.status.set(s)
}
}
// IsRunning returns true if the status is Running
func (i *Instance) IsRunning() bool {
if i.status == nil {
return false
}
return i.status.isRunning()
}
// SetOptions sets the options
func (i *Instance) SetOptions(opts *Options) {
if opts == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
// Preserve the original nodes to prevent changing instance location
if i.options != nil && i.options.get() != nil {
opts.Nodes = i.options.get().Nodes
}
// Validate and copy options
opts.validateAndApplyDefaults(i.Name, i.globalInstanceSettings)
if i.options != nil {
i.options.set(opts)
}
i.options = options
// Clear the proxy so it gets recreated with new options
i.proxy = nil
if i.proxy != nil {
i.proxy.clear()
}
}
// SetTimeProvider sets a custom time provider for testing
func (i *Process) SetTimeProvider(tp TimeProvider) {
i.timeProvider = tp
func (i *Instance) SetTimeProvider(tp TimeProvider) {
if i.proxy != nil {
i.proxy.setTimeProvider(tp)
}
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy != nil {
return i.proxy, nil
}
func (i *Instance) GetHost() string {
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
return "localhost"
}
return i.options.GetHost()
}
func (i *Instance) GetPort() int {
if i.options == nil {
return 0
}
return i.options.GetPort()
}
func (i *Instance) IsRemote() bool {
opts := i.GetOptions()
if opts == nil {
return false
}
var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
if i.options.LlamaServerOptions != nil {
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if i.options.MlxServerOptions != nil {
host = i.options.MlxServerOptions.Host
port = i.options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if i.options.VllmServerOptions != nil {
host = i.options.VllmServerOptions.Host
port = i.options.VllmServerOptions.Port
}
// If no nodes specified, it's a local instance
if len(opts.Nodes) == 0 {
return false
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
// If the local node is in the nodes map, treat it as a local instance
if _, isLocal := opts.Nodes[i.localNodeName]; isLocal {
return false
}
proxy := httputil.NewSingleHostReverseProxy(targetURL)
// Otherwise, it's a remote instance
return true
}
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
// GetLogs retrieves the last n lines of logs from the instance
func (i *Instance) GetLogs(num_lines int) (string, error) {
if i.logger == nil {
return "", fmt.Errorf("instance %s has no logger (remote instances don't have logs)", i.Name)
}
return i.logger.getLogs(num_lines)
}
// LastRequestTime returns the last request time as a Unix timestamp
func (i *Instance) LastRequestTime() int64 {
if i.proxy == nil {
return 0
}
return i.proxy.getLastRequestTime()
}
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Instance) UpdateLastRequestTime() {
if i.proxy != nil {
i.proxy.updateLastRequestTime()
}
}
// ShouldTimeout checks if the instance should timeout based on idle time
func (i *Instance) ShouldTimeout() bool {
if i.proxy == nil {
return false
}
return i.proxy.shouldTimeout()
}
// GetInflightRequests returns the current number of inflight requests
func (i *Instance) GetInflightRequests() int32 {
if i.proxy == nil {
return 0
}
return i.proxy.getInflightRequests()
}
// ServeHTTP serves HTTP requests through the proxy with request tracking and shutdown handling
func (i *Instance) ServeHTTP(w http.ResponseWriter, r *http.Request) error {
if i.proxy == nil {
return fmt.Errorf("instance %s has no proxy component", i.Name)
}
return i.proxy.serveHTTP(w, r)
}
func (i *Instance) getCommand() string {
opts := i.GetOptions()
if opts == nil {
return ""
}
return opts.BackendOptions.GetCommand(i.globalBackendSettings, opts.DockerEnabled, opts.CommandOverride)
}
func (i *Instance) buildCommandArgs() []string {
opts := i.GetOptions()
if opts == nil {
return nil
}
i.proxy = proxy
return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings, opts.DockerEnabled)
}
return i.proxy, nil
func (i *Instance) buildEnvironment() map[string]string {
opts := i.GetOptions()
if opts == nil {
return nil
}
return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.DockerEnabled, opts.Environment)
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Process) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
// Use anonymous struct to avoid recursion
type Alias Process
func (i *Instance) MarshalJSON() ([]byte, error) {
return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
ID int `json:"id"`
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
Options *options `json:"options,omitempty"`
}{
Alias: (*Alias)(i),
ID: i.ID,
Name: i.Name,
Status: i.status,
Created: i.Created,
Options: i.options,
})
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias Process
func (i *Instance) UnmarshalJSON(data []byte) error {
// Explicitly deserialize to match MarshalJSON format
aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
}{
Alias: (*Alias)(i),
}
ID int `json:"id"`
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
Options *options `json:"options,omitempty"`
}{}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Handle options with validation and defaults
if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
i.options = aux.Options
}
// Set the fields
i.ID = aux.ID
i.Name = aux.Name
i.Created = aux.Created
i.status = aux.Status
i.options = aux.Options
return nil
}

View File

@@ -3,38 +3,53 @@ package instance_test
import (
"encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"testing"
"time"
)
func TestNewInstance(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
Instances: config.InstancesConfig{
DefaultAutoRestart: true,
LogsDir: "/tmp/test",
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst := instance.New("test-instance", globalConfig, options, mockOnStatusChange)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -45,8 +60,8 @@ func TestNewInstance(t *testing.T) {
// Check that options were properly set with defaults applied
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
@@ -62,94 +77,89 @@ func TestNewInstance(t *testing.T) {
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
// Test that explicit values override defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &instance.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
optionsWithOverrides := &instance.Options{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst2 := instance.New("test-override", globalConfig, optionsWithOverrides, mockOnStatusChange)
opts2 := inst2.GetOptions()
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
if opts2.AutoRestart == nil || *opts2.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
if opts2.MaxRestarts == nil || *opts2.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts2.MaxRestarts)
}
}
func TestSetOptions(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
Instances: config.InstancesConfig{
DefaultAutoRestart: true,
LogsDir: "/tmp/test",
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
initialOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
inst := instance.New("test-instance", globalConfig, initialOptions, mockOnStatusChange)
// Update options
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
newOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
},
}
inst.SetOptions(newOptions)
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
@@ -161,83 +171,35 @@ func TestSetOptions(t *testing.T) {
}
}
func TestGetProxy(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.New("test-instance", globalConfig, options, nil)
instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance)
data, err := json.Marshal(inst)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
// Verify by unmarshaling and checking key fields
var result map[string]any
err = json.Unmarshal(data, &result)
if err != nil {
if err := json.Unmarshal(data, &result); err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
@@ -247,37 +209,9 @@ func TestMarshalJSON(t *testing.T) {
if result["status"] != "stopped" {
t.Errorf("Expected status 'stopped', got %v", result["status"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
if result["options"] == nil {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
// Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
}
}
func TestUnmarshalJSON(t *testing.T) {
@@ -295,7 +229,7 @@ func TestUnmarshalJSON(t *testing.T) {
}
}`
var inst instance.Process
var inst instance.Instance
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
@@ -312,14 +246,14 @@ func TestUnmarshalJSON(t *testing.T) {
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
if opts.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendOptions.BackendType)
}
if opts.LlamaServerOptions == nil {
if opts.BackendOptions.LlamaServerOptions == nil {
t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
@@ -332,7 +266,7 @@ func TestUnmarshalJSON(t *testing.T) {
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
func TestCreateOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
@@ -363,30 +297,45 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
},
}
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
options := &instance.Options{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
instance := instance.New("test", globalConfig, options, mockOnStatusChange)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {
@@ -403,3 +352,295 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
})
}
}
func TestStatusChangeCallback(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
var callbackOldStatus, callbackNewStatus instance.Status
callbackCalled := false
onStatusChange := func(oldStatus, newStatus instance.Status) {
callbackOldStatus = oldStatus
callbackNewStatus = newStatus
callbackCalled = true
}
inst := instance.New("test", globalConfig, options, onStatusChange)
inst.SetStatus(instance.Running)
if !callbackCalled {
t.Error("Expected status change callback to be called")
}
if callbackOldStatus != instance.Stopped {
t.Errorf("Expected old status Stopped, got %v", callbackOldStatus)
}
if callbackNewStatus != instance.Running {
t.Errorf("Expected new status Running, got %v", callbackNewStatus)
}
}
func TestSetOptions_NodesPreserved(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
tests := []struct {
name string
initialNodes map[string]struct{}
updateNodes map[string]struct{}
expectedNodes map[string]struct{}
}{
{
name: "nil nodes preserved as nil",
initialNodes: nil,
updateNodes: map[string]struct{}{"worker1": {}},
expectedNodes: nil,
},
{
name: "empty nodes preserved as empty",
initialNodes: map[string]struct{}{},
updateNodes: map[string]struct{}{"worker1": {}},
expectedNodes: map[string]struct{}{},
},
{
name: "existing nodes preserved",
initialNodes: map[string]struct{}{"worker1": {}, "worker2": {}},
updateNodes: map[string]struct{}{"worker3": {}},
expectedNodes: map[string]struct{}{"worker1": {}, "worker2": {}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.Options{
Nodes: tt.initialNodes,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst := instance.New("test", globalConfig, options, nil)
// Attempt to update nodes (should be ignored)
updateOptions := &instance.Options{
Nodes: tt.updateNodes,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
},
},
}
inst.SetOptions(updateOptions)
opts := inst.GetOptions()
// Verify nodes are preserved
if len(opts.Nodes) != len(tt.expectedNodes) {
t.Errorf("Expected %d nodes, got %d", len(tt.expectedNodes), len(opts.Nodes))
}
for node := range tt.expectedNodes {
if _, exists := opts.Nodes[node]; !exists {
t.Errorf("Expected node %s to exist", node)
}
}
// Verify other options were updated
if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model to be updated to '/path/to/new-model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
}
})
}
}
func TestProcessErrorCases(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst := instance.New("test", globalConfig, options, nil)
// Stop when not running should return error
err := inst.Stop()
if err == nil {
t.Error("Expected error when stopping non-running instance")
}
// Simulate running state
inst.SetStatus(instance.Running)
// Start when already running should return error
err = inst.Start()
if err == nil {
t.Error("Expected error when starting already running instance")
}
}
func TestRemoteInstanceOperations(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{
"remote-node": {Address: "http://remote-node:8080"},
},
LocalNode: "main",
}
options := &instance.Options{
Nodes: map[string]struct{}{"remote-node": {}}, // Remote instance
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst := instance.New("remote-test", globalConfig, options, nil)
if !inst.IsRemote() {
t.Error("Expected instance to be remote")
}
// Start should fail for remote instance
if err := inst.Start(); err == nil {
t.Error("Expected error when starting remote instance")
}
// Stop should fail for remote instance
if err := inst.Stop(); err == nil {
t.Error("Expected error when stopping remote instance")
}
// Restart should fail for remote instance
if err := inst.Restart(); err == nil {
t.Error("Expected error when restarting remote instance")
}
// GetLogs should fail for remote instance
if _, err := inst.GetLogs(10); err == nil {
t.Error("Expected error when getting logs for remote instance")
}
}
func TestIdleTimeout(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{Command: "llama-server"},
},
Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
t.Run("not running never times out", func(t *testing.T) {
timeout := 1
inst := instance.New("test", globalConfig, &instance.Options{
IdleTimeout: &timeout,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}, nil)
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
})
t.Run("no timeout configured", func(t *testing.T) {
inst := instance.New("test", globalConfig, &instance.Options{
IdleTimeout: nil, // No timeout
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}, nil)
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Error("Instance with no timeout configured should not timeout")
}
})
t.Run("timeout exceeded", func(t *testing.T) {
timeout := 1 // 1 minute
inst := instance.New("test", globalConfig, &instance.Options{
IdleTimeout: &timeout,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Host: "localhost",
Port: 8080,
},
},
}, nil)
inst.SetStatus(instance.Running)
// Use mock time provider
mockTime := &mockTimeProvider{currentTime: time.Now().Unix()}
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.currentTime = time.Now().Add(2 * time.Minute).Unix()
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when idle time exceeds configured timeout")
}
})
}
// mockTimeProvider for timeout testing
type mockTimeProvider struct {
currentTime int64 // Unix timestamp
}
func (m *mockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime, 0)
}

View File

@@ -1,374 +0,0 @@
package instance
import (
"context"
"fmt"
"log"
"net/http"
"os/exec"
"runtime"
"syscall"
"time"
"llamactl/pkg/backends"
)
// Start starts the llama server instance and returns an error if it fails.
func (i *Process) Start() error {
i.mu.Lock()
defer i.mu.Unlock()
if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name)
}
// Safety check: ensure options are valid
if i.options == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Reset restart counter when manually starting (not during auto-restart)
// We can detect auto-restart by checking if restartCancel is set
if i.restartCancel == nil {
i.restarts = 0
}
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background())
var executable string
// Get executable from global configuration
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
executable = i.globalBackendSettings.LlamaExecutable
case backends.BackendTypeMlxLm:
executable = i.globalBackendSettings.MLXLMExecutable
case backends.BackendTypeVllm:
executable = i.globalBackendSettings.VllmExecutable
default:
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
}
i.cmd = exec.CommandContext(i.ctx, executable, args...)
if runtime.GOOS != "windows" {
setProcAttrs(i.cmd)
}
var err error
i.stdout, err = i.cmd.StdoutPipe()
if err != nil {
i.logger.Close()
return fmt.Errorf("failed to get stdout pipe: %w", err)
}
i.stderr, err = i.cmd.StderrPipe()
if err != nil {
i.stdout.Close()
i.logger.Close()
return fmt.Errorf("failed to get stderr pipe: %w", err)
}
if err := i.cmd.Start(); err != nil {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
}
i.SetStatus(Running)
// Create channel for monitor completion signaling
i.monitorDone = make(chan struct{})
go i.logger.readOutput(i.stdout)
go i.logger.readOutput(i.stderr)
go i.monitorProcess()
return nil
}
// Stop terminates the subprocess
func (i *Process) Stop() error {
i.mu.Lock()
if !i.IsRunning() {
// Even if not running, cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
log.Printf("Cancelled pending restart for instance %s", i.Name)
}
i.mu.Unlock()
return fmt.Errorf("instance %s is not running", i.Name)
}
// Cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
}
// Set status to stopped first to signal intentional stop
i.SetStatus(Stopped)
// Clean up the proxy
i.proxy = nil
// Get the monitor done channel before releasing the lock
monitorDone := i.monitorDone
i.mu.Unlock()
// Stop the process with SIGINT if cmd exists
if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
}
}
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select {
case <-monitorDone:
// Process exited normally
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
}
log.Printf("Instance %s did not stop in time, force killed", i.Name)
// Wait a bit more for the monitor to finish after force kill
select {
case <-monitorDone:
// Monitor completed after force kill
case <-time.After(2 * time.Second):
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
}
}
}
i.logger.Close()
return nil
}
func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error {
if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get instance options to build the health check URL
opts := i.GetOptions()
if opts == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Build the health check URL directly
var host string
var port int
switch opts.BackendType {
case backends.BackendTypeLlamaCpp:
if opts.LlamaServerOptions != nil {
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if opts.MlxServerOptions != nil {
host = opts.MlxServerOptions.Host
port = opts.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if opts.VllmServerOptions != nil {
host = opts.VllmServerOptions.Host
port = opts.VllmServerOptions.Port
}
}
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
func (i *Process) monitorProcess() {
defer func() {
i.mu.Lock()
if i.monitorDone != nil {
close(i.monitorDone)
i.monitorDone = nil
}
i.mu.Unlock()
}()
err := i.cmd.Wait()
i.mu.Lock()
// Check if the instance was intentionally stopped
if !i.IsRunning() {
i.mu.Unlock()
return
}
i.SetStatus(Stopped)
i.logger.Close()
// Cancel any existing restart context since we're handling a new exit
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
}
// Log the exit
if err != nil {
log.Printf("Instance %s crashed with error: %v", i.Name, err)
// Handle restart while holding the lock, then release it
i.handleRestart()
} else {
log.Printf("Instance %s exited cleanly", i.Name)
i.mu.Unlock()
}
}
// handleRestart manages the restart process while holding the lock
func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock()
return
}
i.restarts++
log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
i.Name, i.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
// Create a cancellable context for the restart delay
restartCtx, cancel := context.WithCancel(context.Background())
i.restartCancel = cancel
// Release the lock before sleeping
i.mu.Unlock()
// Use context-aware sleep so it can be cancelled
select {
case <-time.After(time.Duration(restartDelay) * time.Second):
// Sleep completed normally, continue with restart
case <-restartCtx.Done():
// Restart was cancelled
log.Printf("Restart cancelled for instance %s", i.Name)
return
}
// Restart the instance
if err := i.Start(); err != nil {
log.Printf("Failed to restart instance %s: %v", i.Name, err)
} else {
log.Printf("Successfully restarted instance %s", i.Name)
// Clear the cancel function
i.mu.Lock()
i.restartCancel = nil
i.mu.Unlock()
}
}
// validateRestartConditions checks if the instance should be restarted and returns the parameters
func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
if i.options == nil {
log.Printf("Instance %s not restarting: options are nil", i.Name)
return false, 0, 0
}
if i.options.AutoRestart == nil || !*i.options.AutoRestart {
log.Printf("Instance %s not restarting: AutoRestart is disabled", i.Name)
return false, 0, 0
}
if i.options.MaxRestarts == nil {
log.Printf("Instance %s not restarting: MaxRestarts is nil", i.Name)
return false, 0, 0
}
if i.options.RestartDelay == nil {
log.Printf("Instance %s not restarting: RestartDelay is nil", i.Name)
return false, 0, 0
}
// Values are already validated during unmarshaling/SetOptions
maxRestarts = *i.options.MaxRestarts
restartDelay = *i.options.RestartDelay
if i.restarts >= maxRestarts {
log.Printf("Instance %s exceeded max restart attempts (%d)", i.Name, maxRestarts)
return false, 0, 0
}
return true, maxRestarts, restartDelay
}

150
pkg/instance/logger.go Normal file
View File

@@ -0,0 +1,150 @@
package instance
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"sync"
"time"
timber "github.com/DeRuina/timberjack"
)
// LogRotationConfig contains log rotation settings for instances
type LogRotationConfig struct {
Enabled bool
MaxSize int
Compress bool
}
type logger struct {
name string
logDir string
logFile *timber.Logger
logFilePath string
mu sync.RWMutex
cfg *LogRotationConfig
}
func newLogger(name, logDir string, cfg *LogRotationConfig) *logger {
return &logger{
name: name,
logDir: logDir,
cfg: cfg,
}
}
func (l *logger) create() error {
l.mu.Lock()
defer l.mu.Unlock()
if l.logDir == "" {
return fmt.Errorf("logDir empty for instance %s", l.name)
}
if err := os.MkdirAll(l.logDir, 0755); err != nil {
return fmt.Errorf("failed to create log directory: %w", err)
}
logPath := fmt.Sprintf("%s/%s.log", l.logDir, l.name)
l.logFilePath = logPath
// Build the timber logger
t := &timber.Logger{
Filename: logPath,
MaxSize: l.cfg.MaxSize,
MaxBackups: 0, // No limit on backups
// Compression: "gzip" if Compress is true, else "none"
Compression: func() string {
if l.cfg.Compress {
return "gzip"
}
return "none"
}(),
FileMode: 0644,
LocalTime: true,
}
// If rotation is disabled, set MaxSize to 0 so no rotation occurs
if !l.cfg.Enabled {
t.MaxSize = 0
}
l.logFile = t
// Write a startup marker
ts := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(t, "\n=== Instance %s started at %s ===\n", l.name, ts)
return nil
}
func (l *logger) readOutput(rc io.ReadCloser) {
defer rc.Close()
scanner := bufio.NewScanner(rc)
for scanner.Scan() {
line := scanner.Text()
if lg := l.logFile; lg != nil {
fmt.Fprintln(lg, line)
}
}
}
func (l *logger) close() {
l.mu.Lock()
defer l.mu.Unlock()
lg := l.logFile
if lg == nil {
return
}
ts := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(lg, "=== Instance %s stopped at %s ===\n\n", l.name, ts)
_ = lg.Close()
l.logFile = nil
}
// getLogs retrieves the last n lines of logs from the instance
func (l *logger) getLogs(num_lines int) (string, error) {
l.mu.RLock()
defer l.mu.RUnlock()
if l.logFilePath == "" {
return "", fmt.Errorf("log file not created for instance %s", l.name)
}
file, err := os.Open(l.logFilePath)
if err != nil {
return "", fmt.Errorf("failed to open log file: %w", err)
}
defer file.Close()
if num_lines <= 0 {
content, err := io.ReadAll(file)
if err != nil {
return "", fmt.Errorf("failed to read log file: %w", err)
}
return string(content), nil
}
var lines []string
scanner := bufio.NewScanner(file)
// Read all lines into a slice
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
return "", fmt.Errorf("error reading file: %w", err)
}
// Return the last N lines
start := max(len(lines)-num_lines, 0)
return strings.Join(lines[start:], "\n"), nil
}

View File

@@ -1,118 +0,0 @@
package instance
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"time"
)
type InstanceLogger struct {
name string
logDir string
logFile *os.File
logFilePath string
}
func NewInstanceLogger(name string, logDir string) *InstanceLogger {
return &InstanceLogger{
name: name,
logDir: logDir,
}
}
// Create creates and opens the log files for stdout and stderr
func (i *InstanceLogger) Create() error {
if i.logDir == "" {
return fmt.Errorf("logDir is empty for instance %s", i.name)
}
// Set up instance logs
logPath := i.logDir + "/" + i.name + ".log"
i.logFilePath = logPath
if err := os.MkdirAll(i.logDir, 0755); err != nil {
return fmt.Errorf("failed to create log directory: %w", err)
}
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return fmt.Errorf("failed to create stdout log file: %w", err)
}
i.logFile = logFile
// Write a startup marker to both files
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
return nil
}
// GetLogs retrieves the last n lines of logs from the instance
func (i *Process) GetLogs(num_lines int) (string, error) {
i.mu.RLock()
logFileName := i.logger.logFilePath
i.mu.RUnlock()
if logFileName == "" {
return "", fmt.Errorf("log file not created for instance %s", i.Name)
}
file, err := os.Open(logFileName)
if err != nil {
return "", fmt.Errorf("failed to open log file: %w", err)
}
defer file.Close()
if num_lines <= 0 {
content, err := io.ReadAll(file)
if err != nil {
return "", fmt.Errorf("failed to read log file: %w", err)
}
return string(content), nil
}
var lines []string
scanner := bufio.NewScanner(file)
// Read all lines into a slice
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
return "", fmt.Errorf("error reading file: %w", err)
}
// Return the last N lines
start := max(len(lines)-num_lines, 0)
return strings.Join(lines[start:], "\n"), nil
}
// closeLogFile closes the log files
func (i *InstanceLogger) Close() {
if i.logFile != nil {
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
i.logFile.Close()
i.logFile = nil
}
}
// readOutput reads from the given reader and writes lines to the log file
func (i *InstanceLogger) readOutput(reader io.ReadCloser) {
defer reader.Close()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if i.logFile != nil {
fmt.Fprintln(i.logFile, line)
i.logFile.Sync() // Ensure data is written to disk
}
}
}

View File

@@ -4,14 +4,16 @@ import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/validation"
"log"
"maps"
"slices"
"sync"
)
type CreateInstanceOptions struct {
// Options contains the actual configuration (exported - this is the public API).
type Options struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
@@ -20,21 +22,84 @@ type CreateInstanceOptions struct {
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
// Environment variables
Environment map[string]string `json:"environment,omitempty"`
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// Execution context overrides
DockerEnabled *bool `json:"docker_enabled,omitempty"`
CommandOverride string `json:"command_override,omitempty"`
// Backend-specific options
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
VllmServerOptions *vllm.VllmServerOptions `json:"-"`
// Assigned nodes
Nodes map[string]struct{} `json:"-"`
// Backend options
BackendOptions backends.Options `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// options wraps Options with thread-safe access (unexported).
type options struct {
mu sync.RWMutex
opts *Options
}
// newOptions creates a new options wrapper with the given Options
func newOptions(opts *Options) *options {
return &options{
opts: opts,
}
}
// get returns a copy of the current options
func (o *options) get() *Options {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts
}
// set updates the options
func (o *options) set(opts *Options) {
o.mu.Lock()
defer o.mu.Unlock()
o.opts = opts
}
func (o *options) GetHost() string {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts.BackendOptions.GetHost()
}
func (o *options) GetPort() int {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts.BackendOptions.GetPort()
}
// MarshalJSON implements json.Marshaler for options wrapper
func (o *options) MarshalJSON() ([]byte, error) {
o.mu.RLock()
defer o.mu.RUnlock()
return o.opts.MarshalJSON()
}
// UnmarshalJSON implements json.Unmarshaler for options wrapper
func (o *options) UnmarshalJSON(data []byte) error {
o.mu.Lock()
defer o.mu.Unlock()
if o.opts == nil {
o.opts = &Options{}
}
return o.opts.UnmarshalJSON(data)
}
// UnmarshalJSON implements custom JSON unmarshaling for Options
func (c *Options) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
type Alias Options
aux := &struct {
Nodes []string `json:"nodes,omitempty"`
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
*Alias
}{
Alias: (*Alias)(c),
@@ -44,113 +109,97 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
// Convert nodes array to map
if len(aux.Nodes) > 0 {
c.Nodes = make(map[string]struct{}, len(aux.Nodes))
for _, node := range aux.Nodes {
c.Nodes[node] = struct{}{}
}
case backends.BackendTypeMlxLm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
}
c.MlxServerOptions = &mlx.MlxServerOptions{}
if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal MLX options: %w", err)
}
}
case backends.BackendTypeVllm:
if c.BackendOptions != nil {
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
// Create backend options struct and unmarshal
c.BackendOptions = backends.Options{
BackendType: aux.BackendType,
BackendOptions: aux.BackendOptions,
}
c.VllmServerOptions = &vllm.VllmServerOptions{}
if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
// Marshal the backend options to JSON for proper unmarshaling
backendJson, err := json.Marshal(struct {
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
}{
BackendType: aux.BackendType,
BackendOptions: aux.BackendOptions,
})
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
// Unmarshal into the backends.Options struct to trigger its custom unmarshaling
if err := json.Unmarshal(backendJson, &c.BackendOptions); err != nil {
return fmt.Errorf("failed to unmarshal backend options: %w", err)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := struct {
// MarshalJSON implements custom JSON marshaling for Options
func (c *Options) MarshalJSON() ([]byte, error) {
type Alias Options
// Make a copy of the struct
temp := *c
// Copy environment map to avoid concurrent access issues
if temp.Environment != nil {
envCopy := make(map[string]string, len(temp.Environment))
maps.Copy(envCopy, temp.Environment)
temp.Environment = envCopy
}
aux := &struct {
Nodes []string `json:"nodes,omitempty"` // Output as JSON array
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
*Alias
}{
Alias: (*Alias)(c),
Alias: (*Alias)(&temp),
}
// Convert backend-specific options back to BackendOptions map for JSON
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
data, err := json.Marshal(c.MlxServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
data, err := json.Marshal(c.VllmServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
// Convert nodes map to array (sorted for consistency)
if len(c.Nodes) > 0 {
aux.Nodes = make([]string, 0, len(c.Nodes))
for node := range c.Nodes {
aux.Nodes = append(aux.Nodes, node)
}
// Sort for consistent output
slices.Sort(aux.Nodes)
}
// Set backend type
aux.BackendType = c.BackendOptions.BackendType
// Marshal the backends.Options struct to get the properly formatted backend options
backendData, err := json.Marshal(&c.BackendOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
// Unmarshal into a new temporary map to extract the backend_options
var tempBackend struct {
BackendOptions map[string]any `json:"backend_options,omitempty"`
}
if err := json.Unmarshal(backendData, &tempBackend); err != nil {
return nil, fmt.Errorf("failed to unmarshal backend data: %w", err)
}
aux.BackendOptions = tempBackend.BackendOptions
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// validateAndApplyDefaults validates the instance options and applies constraints
func (c *Options) validateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
@@ -167,6 +216,28 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
*c.IdleTimeout = 0
}
// Validate docker_enabled and command_override relationship
if c.DockerEnabled != nil && *c.DockerEnabled && c.CommandOverride != "" {
log.Printf("Instance %s: command_override cannot be set when docker_enabled is true, ignoring command_override", name)
c.CommandOverride = "" // Clear invalid configuration
}
// Validate command_override if set
if c.CommandOverride != "" {
if err := validation.ValidateStringForInjection(c.CommandOverride); err != nil {
log.Printf("Instance %s: invalid command_override: %v, clearing value", name, err)
c.CommandOverride = "" // Clear invalid value
}
}
// Validate docker_enabled for MLX backend
if c.BackendOptions.BackendType == backends.BackendTypeMlxLm {
if c.DockerEnabled != nil && *c.DockerEnabled {
log.Printf("Instance %s: docker_enabled is not supported for MLX backend, ignoring", name)
c.DockerEnabled = nil // Clear invalid configuration
}
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
@@ -187,25 +258,3 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
}
}
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs()
}
case backends.BackendTypeMlxLm:
if c.MlxServerOptions != nil {
return c.MlxServerOptions.BuildCommandArgs()
}
case backends.BackendTypeVllm:
if c.VllmServerOptions != nil {
// Prepend "serve" as first argument
args := []string{"serve"}
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
return args
}
}
return []string{}
}

428
pkg/instance/process.go Normal file
View File

@@ -0,0 +1,428 @@
package instance
import (
"context"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
"runtime"
"sync"
"syscall"
"time"
)
// process manages the OS process lifecycle for a local instance.
// process owns its complete lifecycle including auto-restart logic.
type process struct {
instance *Instance // Back-reference for SetStatus, GetOptions
mu sync.RWMutex
cmd *exec.Cmd
ctx context.Context
cancel context.CancelFunc
stdout io.ReadCloser
stderr io.ReadCloser
restarts int
restartCancel context.CancelFunc
monitorDone chan struct{}
}
// newProcess creates a new process component for the given instance
func newProcess(instance *Instance) *process {
return &process{
instance: instance,
}
}
// start starts the OS process and returns an error if it fails.
func (p *process) start() error {
p.mu.Lock()
defer p.mu.Unlock()
if p.instance.IsRunning() {
return fmt.Errorf("instance %s is already running", p.instance.Name)
}
// Safety check: ensure options are valid
if p.instance.options == nil {
return fmt.Errorf("instance %s has no options set", p.instance.Name)
}
// Reset restart counter when manually starting (not during auto-restart)
// We can detect auto-restart by checking if restartCancel is set
if p.restartCancel == nil {
p.restarts = 0
}
// Initialize last request time to current time when starting
if p.instance.proxy != nil {
p.instance.proxy.updateLastRequestTime()
}
// Create context before building command (needed for CommandContext)
p.ctx, p.cancel = context.WithCancel(context.Background())
// Create log files
if err := p.instance.logger.create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
// Build command using backend-specific methods
cmd, cmdErr := p.buildCommand()
if cmdErr != nil {
return fmt.Errorf("failed to build command: %w", cmdErr)
}
p.cmd = cmd
if runtime.GOOS != "windows" {
setProcAttrs(p.cmd)
}
var err error
p.stdout, err = p.cmd.StdoutPipe()
if err != nil {
p.instance.logger.close()
return fmt.Errorf("failed to get stdout pipe: %w", err)
}
p.stderr, err = p.cmd.StderrPipe()
if err != nil {
p.stdout.Close()
p.instance.logger.close()
return fmt.Errorf("failed to get stderr pipe: %w", err)
}
if err := p.cmd.Start(); err != nil {
return fmt.Errorf("failed to start instance %s: %w", p.instance.Name, err)
}
p.instance.SetStatus(Running)
// Create channel for monitor completion signaling
p.monitorDone = make(chan struct{})
go p.instance.logger.readOutput(p.stdout)
go p.instance.logger.readOutput(p.stderr)
go p.monitorProcess()
return nil
}
// stop terminates the subprocess without restarting
func (p *process) stop() error {
p.mu.Lock()
if !p.instance.IsRunning() {
// Even if not running, cancel any pending restart
if p.restartCancel != nil {
p.restartCancel()
p.restartCancel = nil
log.Printf("Cancelled pending restart for instance %s", p.instance.Name)
}
p.mu.Unlock()
return fmt.Errorf("instance %s is not running", p.instance.Name)
}
// Cancel any pending restart
if p.restartCancel != nil {
p.restartCancel()
p.restartCancel = nil
}
// Set status to ShuttingDown first to reject new requests
p.instance.SetStatus(ShuttingDown)
// Get the monitor done channel before releasing the lock
monitorDone := p.monitorDone
p.mu.Unlock()
// Wait for inflight requests to complete (max 30 seconds)
log.Printf("Instance %s shutting down, waiting for inflight requests to complete...", p.instance.Name)
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
inflight := p.instance.GetInflightRequests()
if inflight == 0 {
break
}
time.Sleep(100 * time.Millisecond)
}
// Now set status to stopped to signal intentional stop
p.instance.SetStatus(Stopped)
// Stop the process with SIGINT if cmd exists
if p.cmd != nil && p.cmd.Process != nil {
if err := p.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", p.instance.Name, err)
}
}
// If no process exists, we can return immediately
if p.cmd == nil || monitorDone == nil {
p.instance.logger.close()
return nil
}
select {
case <-monitorDone:
// Process exited normally
log.Printf("Instance %s shut down gracefully", p.instance.Name)
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if p.cmd != nil && p.cmd.Process != nil {
killErr := p.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", p.instance.Name, killErr)
}
log.Printf("Instance %s did not stop in time, force killed", p.instance.Name)
// Wait a bit more for the monitor to finish after force kill
select {
case <-monitorDone:
// Monitor completed after force kill
case <-time.After(2 * time.Second):
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", p.instance.Name)
}
}
}
p.instance.logger.close()
return nil
}
// restart manually restarts the process (resets restart counter)
func (p *process) restart() error {
// Stop the process first
if err := p.stop(); err != nil {
// If it's not running, that's ok - we'll just start it
if err.Error() != fmt.Sprintf("instance %s is not running", p.instance.Name) {
return fmt.Errorf("failed to stop instance during restart: %w", err)
}
}
// Reset restart counter for manual restart
p.mu.Lock()
p.restarts = 0
p.mu.Unlock()
// Start the process
return p.start()
}
// waitForHealthy waits for the process to become healthy
func (p *process) waitForHealthy(timeout int) error {
if !p.instance.IsRunning() {
return fmt.Errorf("instance %s is not running", p.instance.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get host/port from instance
host := p.instance.options.GetHost()
port := p.instance.options.GetPort()
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", p.instance.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
// monitorProcess monitors the OS process and handles crashes/exits
func (p *process) monitorProcess() {
defer func() {
p.mu.Lock()
if p.monitorDone != nil {
close(p.monitorDone)
p.monitorDone = nil
}
p.mu.Unlock()
}()
err := p.cmd.Wait()
p.mu.Lock()
// Check if the instance was intentionally stopped
if !p.instance.IsRunning() {
p.mu.Unlock()
return
}
p.instance.SetStatus(Stopped)
p.instance.logger.close()
// Cancel any existing restart context since we're handling a new exit
if p.restartCancel != nil {
p.restartCancel()
p.restartCancel = nil
}
// Log the exit
if err != nil {
log.Printf("Instance %s crashed with error: %v", p.instance.Name, err)
// Handle auto-restart logic
p.handleAutoRestart(err)
} else {
log.Printf("Instance %s exited cleanly", p.instance.Name)
p.mu.Unlock()
}
}
// shouldAutoRestart checks if the process should auto-restart
func (p *process) shouldAutoRestart() bool {
opts := p.instance.GetOptions()
if opts == nil {
log.Printf("Instance %s not restarting: options are nil", p.instance.Name)
return false
}
if opts.AutoRestart == nil || !*opts.AutoRestart {
log.Printf("Instance %s not restarting: AutoRestart is disabled", p.instance.Name)
return false
}
if opts.MaxRestarts == nil {
log.Printf("Instance %s not restarting: MaxRestarts is nil", p.instance.Name)
return false
}
maxRestarts := *opts.MaxRestarts
if p.restarts >= maxRestarts {
log.Printf("Instance %s exceeded max restart attempts (%d)", p.instance.Name, maxRestarts)
return false
}
return true
}
// handleAutoRestart manages the auto-restart process
func (p *process) handleAutoRestart(err error) {
// Check if should restart
if !p.shouldAutoRestart() {
p.instance.SetStatus(Failed)
p.mu.Unlock()
return
}
// Get restart parameters
opts := p.instance.GetOptions()
if opts.RestartDelay == nil {
log.Printf("Instance %s not restarting: RestartDelay is nil", p.instance.Name)
p.instance.SetStatus(Failed)
p.mu.Unlock()
return
}
restartDelay := *opts.RestartDelay
maxRestarts := *opts.MaxRestarts
p.restarts++
// Set status to Restarting instead of leaving as Stopped
p.instance.SetStatus(Restarting)
log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
p.instance.Name, p.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
// Create a cancellable context for the restart delay
restartCtx, cancel := context.WithCancel(context.Background())
p.restartCancel = cancel
// Release the lock before sleeping
p.mu.Unlock()
// Use context-aware sleep so it can be cancelled
select {
case <-time.After(time.Duration(restartDelay) * time.Second):
// Sleep completed normally, continue with restart
case <-restartCtx.Done():
// Restart was cancelled
log.Printf("Restart cancelled for instance %s", p.instance.Name)
return
}
// Restart the instance
if err := p.start(); err != nil {
log.Printf("Failed to restart instance %s: %v", p.instance.Name, err)
} else {
log.Printf("Successfully restarted instance %s", p.instance.Name)
// Clear the cancel function
p.mu.Lock()
p.restartCancel = nil
p.mu.Unlock()
}
}
// buildCommand builds the command to execute using backend-specific logic
func (p *process) buildCommand() (*exec.Cmd, error) {
// Build the environment variables
env := p.instance.buildEnvironment()
// Get the command to execute
command := p.instance.getCommand()
// Build command arguments
args := p.instance.buildCommandArgs()
// Create the exec.Cmd
cmd := exec.CommandContext(p.ctx, command, args...)
// Start with host environment variables
cmd.Env = os.Environ()
// Add/override with backend-specific environment variables
for k, v := range env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
}
return cmd, nil
}

234
pkg/instance/proxy.go Normal file
View File

@@ -0,0 +1,234 @@
package instance
import (
"fmt"
"net/http"
"net/http/httputil"
"net/url"
"sync"
"sync/atomic"
"time"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
// proxy manages HTTP reverse proxy and request tracking for an instance.
type proxy struct {
instance *Instance
targetURL *url.URL
apiKey string // For remote instances
responseHeaders map[string]string
mu sync.RWMutex
proxy *httputil.ReverseProxy
proxyOnce sync.Once
proxyErr error
lastRequestTime atomic.Int64
inflightRequests atomic.Int32
timeProvider TimeProvider
}
// newProxy creates a new Proxy for the given instance
func newProxy(instance *Instance) (*proxy, error) {
p := &proxy{
instance: instance,
timeProvider: realTimeProvider{},
}
var err error
options := instance.GetOptions()
if options == nil {
return nil, fmt.Errorf("instance %s has no options set", instance.Name)
}
if instance.IsRemote() {
// Take the first remote node as the target for now
var nodeName string
for node := range options.Nodes {
nodeName = node
break
}
if nodeName == "" {
return nil, fmt.Errorf("instance %s has no remote nodes defined", p.instance.Name)
}
node, ok := p.instance.globalNodesConfig[nodeName]
if !ok {
return nil, fmt.Errorf("remote node %s is not defined", nodeName)
}
p.targetURL, err = url.Parse(node.Address)
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for remote instance %s: %w", p.instance.Name, err)
}
p.apiKey = node.APIKey
} else {
// Get host/port from process
host := p.instance.options.GetHost()
port := p.instance.options.GetPort()
if port == 0 {
return nil, fmt.Errorf("instance %s has no port assigned", p.instance.Name)
}
p.targetURL, err = url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", p.instance.Name, err)
}
// Get response headers from backend config
p.responseHeaders = options.BackendOptions.GetResponseHeaders(p.instance.globalBackendSettings)
}
return p, nil
}
// get returns the reverse proxy for this instance, creating it if needed.
// Uses sync.Once to ensure thread-safe one-time initialization.
func (p *proxy) get() (*httputil.ReverseProxy, error) {
// sync.Once guarantees buildProxy() is called exactly once
// Other callers block until first initialization completes
p.proxyOnce.Do(func() {
p.proxy, p.proxyErr = p.build()
})
return p.proxy, p.proxyErr
}
// build creates the reverse proxy based on instance options
func (p *proxy) build() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(p.targetURL)
// Modify the request before sending it to the backend
originalDirector := proxy.Director
proxy.Director = func(req *http.Request) {
originalDirector(req)
// Add API key header for remote instances
if p.instance.IsRemote() && p.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+p.apiKey)
}
// Update last request time
p.updateLastRequestTime()
}
if !p.instance.IsRemote() {
// Add custom headers to the request
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from backend response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range p.responseHeaders {
resp.Header.Set(key, value)
}
return nil
}
}
return proxy, nil
}
// serveHTTP handles HTTP requests with inflight tracking
func (p *proxy) serveHTTP(w http.ResponseWriter, r *http.Request) error {
// Get the reverse proxy
reverseProxy, err := p.get()
if err != nil {
return err
}
// Track inflight requests
p.incInflightRequests()
defer p.decInflightRequests()
// Serve the request
reverseProxy.ServeHTTP(w, r)
return nil
}
// clear resets the proxy, allowing it to be recreated when options change.
func (p *proxy) clear() {
p.mu.Lock()
defer p.mu.Unlock()
p.proxy = nil
p.proxyErr = nil
p.proxyOnce = sync.Once{}
}
// updateLastRequestTime updates the last request access time for the instance
func (p *proxy) updateLastRequestTime() {
lastRequestTime := p.timeProvider.Now().Unix()
p.lastRequestTime.Store(lastRequestTime)
}
// getLastRequestTime returns the last request time as a Unix timestamp
func (p *proxy) getLastRequestTime() int64 {
return p.lastRequestTime.Load()
}
// shouldTimeout checks if the instance should timeout based on idle time
func (p *proxy) shouldTimeout() bool {
if !p.instance.IsRunning() {
return false
}
options := p.instance.GetOptions()
if options == nil || options.IdleTimeout == nil || *options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := p.lastRequestTime.Load()
idleTimeoutMinutes := *options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (p.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}
// setTimeProvider sets a custom time provider for testing
func (p *proxy) setTimeProvider(tp TimeProvider) {
p.timeProvider = tp
}
// incInflightRequests increments the inflight request counter
func (p *proxy) incInflightRequests() {
p.inflightRequests.Add(1)
}
// decInflightRequests decrements the inflight request counter
func (p *proxy) decInflightRequests() {
p.inflightRequests.Add(-1)
}
// getInflightRequests returns the current number of inflight requests
func (p *proxy) getInflightRequests() int32 {
return p.inflightRequests.Load()
}

View File

@@ -3,48 +3,38 @@ package instance
import (
"encoding/json"
"log"
"sync"
)
// Enum for instance status
type InstanceStatus int
// Status is the enum for status values (exported).
type Status int
const (
Stopped InstanceStatus = iota
Stopped Status = iota
Running
Failed
Restarting
ShuttingDown
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
var nameToStatus = map[string]Status{
"stopped": Stopped,
"running": Running,
"failed": Failed,
"restarting": Restarting,
"shutting_down": ShuttingDown,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
var statusToName = map[Status]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
Restarting: "restarting",
ShuttingDown: "shutting_down",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
// Status enum JSON marshaling methods
func (s Status) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
@@ -52,8 +42,8 @@ func (s InstanceStatus) MarshalJSON() ([]byte, error) {
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
// UnmarshalJSON implements json.Unmarshaler for Status enum
func (s *Status) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
@@ -68,3 +58,61 @@ func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
*s = status
return nil
}
// status represents the instance status with thread-safe access (unexported).
type status struct {
mu sync.RWMutex
s Status
// Callback for status changes
onStatusChange func(oldStatus, newStatus Status)
}
// newStatus creates a new status wrapper with the given initial status
func newStatus(initial Status) *status {
return &status{
s: initial,
}
}
// get returns the current status
func (st *status) get() Status {
st.mu.RLock()
defer st.mu.RUnlock()
return st.s
}
// set updates the status and triggers the onStatusChange callback if set
func (st *status) set(newStatus Status) {
st.mu.Lock()
oldStatus := st.s
st.s = newStatus
callback := st.onStatusChange
st.mu.Unlock()
// Call the callback outside the lock to avoid potential deadlocks
if callback != nil {
callback(oldStatus, newStatus)
}
}
// isRunning returns true if the status is Running
func (st *status) isRunning() bool {
st.mu.RLock()
defer st.mu.RUnlock()
return st.s == Running
}
// MarshalJSON implements json.Marshaler for status wrapper
func (st *status) MarshalJSON() ([]byte, error) {
st.mu.RLock()
defer st.mu.RUnlock()
return st.s.MarshalJSON()
}
// UnmarshalJSON implements json.Unmarshaler for status wrapper
func (st *status) UnmarshalJSON(data []byte) error {
st.mu.Lock()
defer st.mu.Unlock()
return st.s.UnmarshalJSON(data)
}

View File

@@ -1,28 +0,0 @@
package instance
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Process) UpdateLastRequestTime() {
i.mu.Lock()
defer i.mu.Unlock()
lastRequestTime := i.timeProvider.Now().Unix()
i.lastRequestTime.Store(lastRequestTime)
}
func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := i.lastRequestTime.Load()
idleTimeoutMinutes := *i.options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}

View File

@@ -1,250 +0,0 @@
package instance_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"sync/atomic"
"testing"
"time"
)
// MockTimeProvider implements TimeProvider for testing
type MockTimeProvider struct {
currentTime atomic.Int64 // Unix timestamp
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
m := &MockTimeProvider{}
m.currentTime.Store(t.Unix())
return m
}
func (m *MockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime.Load(), 0)
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.currentTime.Store(t.Unix())
}
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
idleTimeout *int
}{
{"nil timeout", nil},
{"zero timeout", testutil.IntPtr(0)},
{"negative timeout", testutil.IntPtr(-5)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
}
})
}
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
inst.UpdateLastRequestTime()
// Should not timeout immediately
if inst.ShouldTimeout() {
t.Error("Instance should not timeout when last request was recent")
}
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.SetTime(time.Now().Add(2 * time.Minute))
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when last request exceeds idle timeout")
}
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
inputTimeout *int
expectedTimeout int
}{
{"default value when nil", nil, 0},
{"positive value", testutil.IntPtr(10), 10},
{"zero value", testutil.IntPtr(0), 0},
{"negative value gets corrected", testutil.IntPtr(-5), 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
}
})
}
}

152
pkg/manager/lifecycle.go Normal file
View File

@@ -0,0 +1,152 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
"sync"
"time"
)
// lifecycleManager handles background timeout checking and LRU eviction.
// It properly coordinates shutdown to prevent races with the timeout checker.
type lifecycleManager struct {
registry *instanceRegistry
manager InstanceManager // For calling Stop/Evict operations
ticker *time.Ticker
checkInterval time.Duration
enableLRU bool
shutdownChan chan struct{}
shutdownDone chan struct{}
shutdownOnce sync.Once
}
// newLifecycleManager creates a new lifecycle manager.
func newLifecycleManager(
registry *instanceRegistry,
manager InstanceManager,
checkInterval time.Duration,
enableLRU bool,
) *lifecycleManager {
if checkInterval <= 0 {
checkInterval = 5 * time.Minute // Default to 5 minutes
}
return &lifecycleManager{
registry: registry,
manager: manager,
ticker: time.NewTicker(checkInterval),
checkInterval: checkInterval,
enableLRU: enableLRU,
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
}
}
// Start begins the timeout checking loop in a goroutine.
func (l *lifecycleManager) start() {
go l.timeoutCheckLoop()
}
// Stop gracefully stops the lifecycle manager.
// This ensures the timeout checker completes before instance cleanup begins.
func (l *lifecycleManager) stop() {
l.shutdownOnce.Do(func() {
close(l.shutdownChan)
<-l.shutdownDone // Wait for checker to finish (prevents shutdown race)
l.ticker.Stop()
})
}
// timeoutCheckLoop is the main loop that periodically checks for timeouts.
func (l *lifecycleManager) timeoutCheckLoop() {
defer close(l.shutdownDone) // Signal completion
for {
select {
case <-l.ticker.C:
l.checkTimeouts()
case <-l.shutdownChan:
return // Exit goroutine on shutdown
}
}
}
// checkTimeouts checks all instances for timeout and stops those that have timed out.
func (l *lifecycleManager) checkTimeouts() {
// Get all instances from registry
instances := l.registry.list()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range instances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
// Only check running instances
if !l.registry.isRunning(inst.Name) {
continue
}
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := l.manager.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRU finds and stops the least recently used running instance.
// This is called when max running instances limit is reached.
func (l *lifecycleManager) evictLRU() error {
if !l.enableLRU {
return fmt.Errorf("LRU eviction is not enabled")
}
// Get all running instances
runningInstances := l.registry.listRunning()
var lruInstance *instance.Instance
for _, inst := range runningInstances {
// Skip remote instances - they are managed by their respective nodes
if inst.IsRemote() {
continue
}
// Skip instances without idle timeout
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict the LRU instance
log.Printf("Evicting LRU instance %s", lruInstance.Name)
_, err := l.manager.StopInstance(lruInstance.Name)
return err
}

View File

@@ -0,0 +1,220 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestInstanceTimeoutLogic(t *testing.T) {
testManager := createTestManager(t)
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
inst := createInstanceWithTimeout(t, testManager, "timeout-test", "/path/to/model.gguf", &idleTimeout)
// Test timeout logic with mock time provider
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
defer inst.SetStatus(instance.Stopped)
// Update last request time
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
}
func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
testManager := createTestManager(t)
defer testManager.Shutdown()
noTimeoutInst := createInstanceWithTimeout(t, testManager, "no-timeout-test", "/path/to/model.gguf", nil)
mockTime := NewMockTimeProvider(time.Now())
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running)
defer noTimeoutInst.SetStatus(instance.Stopped)
noTimeoutInst.UpdateLastRequestTime()
// Advance time significantly
mockTime.SetTime(mockTime.Now().Add(24 * time.Hour))
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager(t)
defer manager.Shutdown()
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
validTimeout := 1
inst1 := createInstanceWithTimeout(t, manager, "instance-1", "/path/to/model1.gguf", &validTimeout)
inst2 := createInstanceWithTimeout(t, manager, "instance-2", "/path/to/model2.gguf", &validTimeout)
inst3 := createInstanceWithTimeout(t, manager, "instance-3", "/path/to/model3.gguf", &validTimeout)
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
defer func() {
// Clean up - ensure all instances are stopped
for _, inst := range []*instance.Instance{inst1, inst2, inst3} {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
if err := manager.EvictLRUInstance(); err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
}
func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
manager := createTestManager(t)
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
}
func TestEvictLRUInstance_OnlyEvictsTimeoutEnabledInstances(t *testing.T) {
manager := createTestManager(t)
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
// Only timeout-enabled instances should be eligible for eviction
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(t, manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(t, manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(t, manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Instance{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
}
// Helper function to create instances with different timeout configurations
func createInstanceWithTimeout(t *testing.T, manager manager.InstanceManager, name, model string, timeout *int) *instance.Instance {
t.Helper()
options := &instance.Options{
IdleTimeout: timeout,
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: model,
},
},
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,298 +1,296 @@
package manager
import (
"encoding/json"
"context"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*instance.Process, error)
CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetInstance(name string) (*instance.Process, error)
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
ListInstances() ([]*instance.Instance, error)
CreateInstance(name string, options *instance.Options) (*instance.Instance, error)
GetInstance(name string) (*instance.Instance, error)
UpdateInstance(name string, options *instance.Options) (*instance.Instance, error)
DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error)
StartInstance(name string) (*instance.Instance, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error)
StopInstance(name string) (*instance.Instance, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
RestartInstance(name string) (*instance.Instance, error)
GetInstanceLogs(name string, numLines int) (string, error)
Shutdown()
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
runningInstances map[string]struct{}
ports map[int]bool
instancesConfig config.InstancesConfig
backendsConfig config.BackendConfig
// Components (each with own synchronization)
registry *instanceRegistry
ports *portAllocator
db database.InstanceStore
remote *remoteManager
lifecycle *lifecycleManager
// Timeout checker
timeoutChecker *time.Ticker
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
// Configuration
globalConfig *config.AppConfig
// Synchronization
instanceLocks sync.Map // map[string]*sync.Mutex - per-instance locks for concurrent operations
shutdownOnce sync.Once
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
backendsConfig: backendsConfig,
// New creates a new instance of InstanceManager with dependency injection.
func New(globalConfig *config.AppConfig, db database.InstanceStore) InstanceManager {
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
if globalConfig.Instances.TimeoutCheckInterval <= 0 {
globalConfig.Instances.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
// Initialize components
registry := newInstanceRegistry()
// Initialize port allocator
portRange := globalConfig.Instances.PortRange
ports := newPortAllocator(portRange[0], portRange[1])
// Initialize remote manager
remote := newRemoteManager(globalConfig.Nodes, 30*time.Second)
// Create manager instance
im := &instanceManager{
registry: registry,
ports: ports,
db: db,
remote: remote,
globalConfig: globalConfig,
}
// Initialize lifecycle manager (needs reference to manager for Stop/Evict operations)
checkInterval := time.Duration(globalConfig.Instances.TimeoutCheckInterval) * time.Minute
im.lifecycle = newLifecycleManager(registry, im, checkInterval, true)
// Load existing instances from disk
if err := im.loadInstances(); err != nil {
log.Printf("Error loading instances: %v", err)
}
// Start the timeout checker goroutine after initialization is complete
go func() {
defer close(im.shutdownDone)
for {
select {
case <-im.timeoutChecker.C:
im.checkAllTimeouts()
case <-im.shutdownChan:
return // Exit goroutine on shutdown
}
}
}()
// Start the lifecycle manager
im.lifecycle.start()
return im
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}
// persistInstance saves an instance to its JSON file
func (im *instanceManager) persistInstance(instance *instance.Process) error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(instance, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", instance.Name, err)
}
// Write to temporary file first
if err := os.WriteFile(tempPath, jsonData, 0644); err != nil {
return fmt.Errorf("failed to write temp file for instance %s: %w", instance.Name, err)
}
// Atomic rename
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath) // Clean up temp file
return fmt.Errorf("failed to rename temp file for instance %s: %w", instance.Name, err)
}
return nil
// persistInstance saves an instance using the persistence layer
func (im *instanceManager) persistInstance(inst *instance.Instance) error {
return im.db.Save(inst)
}
func (im *instanceManager) Shutdown() {
im.mu.Lock()
im.shutdownOnce.Do(func() {
// 1. Stop lifecycle manager (stops timeout checker)
im.lifecycle.stop()
// Check if already shutdown
if im.isShutdown {
im.mu.Unlock()
return
}
im.isShutdown = true
// 2. Get running instances (no lock needed - registry handles it)
running := im.registry.listRunning()
// Signal the timeout checker to stop
close(im.shutdownChan)
// Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances {
if inst.IsRunning() {
runningInstances = append(runningInstances, inst)
runningNames = append(runningNames, name)
}
}
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
// Stop instances without holding the manager lock
var wg sync.WaitGroup
wg.Add(len(runningInstances))
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name)
// Attempt to stop the instance gracefully
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err)
// 3. Stop local instances concurrently
var wg sync.WaitGroup
for _, inst := range running {
if inst.IsRemote() {
continue // Skip remote instances
}
}(runningNames[i], inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
wg.Add(1)
go func(inst *instance.Instance) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", inst.Name)
if err := inst.Stop(); err != nil {
log.Printf("Error stopping instance %s: %v\n", inst.Name, err)
}
}(inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
})
}
// loadInstances restores all instances from disk
// loadInstances restores all instances from the persistence layer
func (im *instanceManager) loadInstances() error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
// Check if instances directory exists
if _, err := os.Stat(im.instancesConfig.InstancesDir); os.IsNotExist(err) {
return nil // No instances directory, start fresh
}
// Read all JSON files from instances directory
files, err := os.ReadDir(im.instancesConfig.InstancesDir)
// Load all instances from persistence
instances, err := im.db.LoadAll()
if err != nil {
return fmt.Errorf("failed to read instances directory: %w", err)
return fmt.Errorf("failed to load instances: %w", err)
}
loadedCount := 0
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
if len(instances) == 0 {
return nil
}
// Process each loaded instance
for _, persistedInst := range instances {
if err := im.loadInstance(persistedInst); err != nil {
log.Printf("Failed to load instance %s: %v", persistedInst.Name, err)
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(im.instancesConfig.InstancesDir, file.Name())
if err := im.loadInstance(instanceName, instancePath); err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
continue
}
loadedCount++
}
if loadedCount > 0 {
log.Printf("Loaded %d instances from persistence", loadedCount)
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
}
log.Printf("Loaded %d instances from persistence", len(instances))
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
return nil
}
// loadInstance loads a single instance from its JSON file
func (im *instanceManager) loadInstance(name, path string) error {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read instance file: %w", err)
// loadInstance loads a single persisted instance and adds it to the registry
func (im *instanceManager) loadInstance(persistedInst *instance.Instance) error {
name := persistedInst.Name
options := persistedInst.GetOptions()
// Check if this is a remote instance (local node not in the Nodes set)
var isRemote bool
var nodeName string
if options != nil {
if _, isLocal := options.Nodes[im.globalConfig.LocalNode]; !isLocal && len(options.Nodes) > 0 {
// Get the first node from the set
for node := range options.Nodes {
nodeName = node
isRemote = true
break
}
}
}
var persistedInstance instance.Process
if err := json.Unmarshal(data, &persistedInstance); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if persistedInstance.Name != name {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
var statusCallback func(oldStatus, newStatus instance.Status)
if !isRemote {
// Only set status callback for local instances
statusCallback = func(oldStatus, newStatus instance.Status) {
im.onStatusChange(name, oldStatus, newStatus)
}
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
inst := instance.New(name, im.globalConfig, options, statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status)
inst.ID = persistedInst.ID
inst.Created = persistedInst.Created
inst.SetStatus(persistedInst.GetStatus())
// Check for port conflicts and add to maps
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
// Handle remote instance mapping
if isRemote {
// Map instance to node in remote manager
if err := im.remote.setInstanceNode(name, nodeName); err != nil {
return fmt.Errorf("failed to set instance node: %w", err)
}
} else {
// Allocate port for local instances
if inst.GetPort() > 0 {
port := inst.GetPort()
if err := im.ports.allocateSpecific(port, name); err != nil {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use: %w", name, port, err)
}
}
im.ports[port] = true
}
im.instances[name] = inst
// Add instance to registry
if err := im.registry.add(inst); err != nil {
return fmt.Errorf("failed to add instance to registry: %w", err)
}
return nil
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
// For instances with auto-restart disabled, it sets their status to Stopped
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
for _, inst := range im.instances {
instances := im.registry.list()
var instancesToStart []*instance.Instance
var instancesToStop []*instance.Instance
for _, inst := range instances {
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
inst.GetOptions().AutoRestart != nil {
if *inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
} else {
// Instance was running but auto-restart is disabled, mark as stopped
instancesToStop = append(instancesToStop, inst)
}
}
}
im.mu.RUnlock()
// Stop instances that have auto-restart disabled
for _, inst := range instancesToStop {
log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
inst.SetStatus(instance.Stopped)
im.registry.markStopped(inst.Name)
}
// Start instances that have auto-restart enabled
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
im.registry.markStopped(inst.Name)
// Check if this is a remote instance
if node, exists := im.remote.getNodeForInstance(inst.Name); exists && node != nil {
// Remote instance - use remote manager with context
ctx := context.Background()
if _, err := im.remote.startInstance(ctx, node, inst.Name); err != nil {
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
}
} else {
// Local instance - call Start() directly
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
func (im *instanceManager) onStatusChange(name string, _, newStatus instance.Status) {
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
im.registry.markRunning(name)
} else {
delete(im.runningInstances, name)
im.registry.markStopped(name)
}
}
// getNodeForInstance returns the node configuration for a remote instance
// Returns nil if the instance is not remote or the node is not found
func (im *instanceManager) getNodeForInstance(inst *instance.Instance) *config.NodeConfig {
if !inst.IsRemote() {
return nil
}
// Check if we have a node mapping in remote manager
if nodeConfig, exists := im.remote.getNodeForInstance(inst.Name); exists {
return nodeConfig
}
return nil
}
// lockInstance returns the lock for a specific instance, creating one if needed.
// This allows concurrent operations on different instances while preventing
// concurrent operations on the same instance.
func (im *instanceManager) lockInstance(name string) *sync.Mutex {
lock, _ := im.instanceLocks.LoadOrStore(name, &sync.Mutex{})
return lock.(*sync.Mutex)
}
// unlockAndCleanup unlocks the instance lock and removes it from the map.
// This should only be called when deleting an instance to prevent memory leaks.
func (im *instanceManager) unlockAndCleanup(name string) {
if lock, ok := im.instanceLocks.Load(name); ok {
lock.(*sync.Mutex).Unlock()
im.instanceLocks.Delete(name)
}
}

View File

@@ -3,86 +3,67 @@ package manager_test
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"time"
)
func TestNewInstanceManager(t *testing.T) {
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
mgr := manager.NewInstanceManager(backendConfig, cfg)
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestPersistence(t *testing.T) {
func TestManager_PersistsAndLoadsInstances(t *testing.T) {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
// Use file-based database for this test since we need to persist across connections
appConfig.Database.Path = tempDir + "/test.db"
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
// Create instance and check database was created
db1, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
if err := database.RunMigrations(db1); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(backendConfig, cfg)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
manager1 := manager.New(appConfig, db1)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
_, err := manager1.CreateInstance("test-instance", options)
_, err = manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Check that JSON file was created
expectedPath := filepath.Join(tempDir, "test-instance.json")
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Errorf("Expected persistence file %s to exist", expectedPath)
}
// Shutdown first manager to close database connection
manager1.Shutdown()
// Test loading instances from disk
manager2 := manager.NewInstanceManager(backendConfig, cfg)
// Load instances from database
db2, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db2); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
manager2 := manager.New(appConfig, db2)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
@@ -94,25 +75,70 @@ func TestPersistence(t *testing.T) {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
}
// Test port map populated from loaded instances (port conflict should be detected)
_, err = manager2.CreateInstance("new-instance", options) // Same port
if err == nil || !strings.Contains(err.Error(), "port") {
t.Errorf("Expected port conflict error, got: %v", err)
manager2.Shutdown()
}
func TestDeleteInstance_RemovesFromDatabase(t *testing.T) {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
db, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
mgr := manager.New(appConfig, db)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
// Test file deletion on instance deletion
err = manager2.DeleteInstance("test-instance")
_, err = mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Verify instance exists
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Fatalf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = mgr.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
if _, err := os.Stat(expectedPath); !os.IsNotExist(err) {
t.Error("Expected persistence file to be deleted")
// Verify instance was deleted from database
instances, err = mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected 0 instances after deletion, got %d", len(instances))
}
}
func TestConcurrentAccess(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
// Test concurrent operations
@@ -124,10 +150,12 @@ func TestConcurrentAccess(t *testing.T) {
wg.Add(1)
go func(index int) {
defer wg.Done()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
instanceName := fmt.Sprintf("concurrent-test-%d", index)
@@ -138,7 +166,7 @@ func TestConcurrentAccess(t *testing.T) {
}
// Concurrent list operations
for i := 0; i < 3; i++ {
for range 3 {
wg.Add(1)
go func() {
defer wg.Done()
@@ -157,43 +185,57 @@ func TestConcurrentAccess(t *testing.T) {
}
}
func TestShutdown(t *testing.T) {
mgr := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
// Helper functions for test configuration
func createTestAppConfig(instancesDir string) *config.AppConfig {
// Use 'sh -c "sleep 999999"' as a test command instead of 'llama-server'
// The shell ignores all additional arguments passed after the command
return &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
MLX: config.BackendSettings{
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: instancesDir,
MaxInstances: 10,
MaxRunningInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
LogsDir: instancesDir,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
},
Database: config.DatabaseConfig{
Path: ":memory:",
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
_, err := mgr.CreateInstance("test-instance", options)
}
func createTestManager(t *testing.T) manager.InstanceManager {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
db, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
t.Fatalf("Failed to open database: %v", err)
}
// Shutdown should not panic
mgr.Shutdown()
// Multiple shutdowns should not panic
mgr.Shutdown()
}
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(backendConfig, cfg)
if err := database.RunMigrations(db); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
return manager.New(appConfig, db)
}

View File

@@ -1,159 +1,350 @@
package manager
import (
"context"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
"path/filepath"
"log"
)
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances {
instances = append(instances, inst)
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Instance, remoteInst *instance.Instance) {
if localInst == nil || remoteInst == nil {
return
}
remoteOptions := remoteInst.GetOptions()
if remoteOptions == nil {
return
}
// Update the local instance with all remote data
localInst.SetOptions(remoteOptions)
localInst.SetStatus(remoteInst.GetStatus())
localInst.Created = remoteInst.Created
}
// ListInstances returns a list of all instances managed by the instance manager.
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
func (im *instanceManager) ListInstances() ([]*instance.Instance, error) {
instances := im.registry.list()
// Update remote instances with live state
ctx := context.Background()
for _, inst := range instances {
if node := im.getNodeForInstance(inst); node != nil {
remoteInst, err := im.remote.getInstance(ctx, node, inst.Name)
if err != nil {
// Don't fail the entire list operation due to one remote failure
continue
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
}
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
func (im *instanceManager) CreateInstance(name string, options *instance.Options) (*instance.Instance, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
name, err := validation.ValidateInstanceName(name)
err := options.BackendOptions.ValidateInstanceOptions()
if err != nil {
return nil, err
}
err = validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil {
// Check if instance with this name already exists (must be globally unique)
if _, exists := im.registry.get(name); exists {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil {
return nil, err
// Check if this is a remote instance (local node not in the Nodes set)
if _, isLocal := options.Nodes[im.globalConfig.LocalNode]; !isLocal && len(options.Nodes) > 0 {
// Get the first node from the set
var nodeName string
for node := range options.Nodes {
nodeName = node
break
}
// Create the remote instance on the remote node
ctx := context.Background()
nodeConfig, exists := im.remote.getNodeForInstance(nodeName)
if !exists {
// Try to set the node if it doesn't exist yet
if err := im.remote.setInstanceNode(name, nodeName); err != nil {
return nil, fmt.Errorf("node %s not found", nodeName)
}
nodeConfig, _ = im.remote.getNodeForInstance(name)
}
remoteInst, err := im.remote.createInstance(ctx, nodeConfig, name, options)
if err != nil {
return nil, err
}
// Create a local stub that preserves the Nodes field for tracking
// We keep the original options (with Nodes) so IsRemote() works correctly
inst := instance.New(name, im.globalConfig, options, nil)
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Map instance to node
if err := im.remote.setInstanceNode(name, nodeName); err != nil {
return nil, fmt.Errorf("failed to map instance to node: %w", err)
}
// Add to registry (doesn't count towards local limits)
if err := im.registry.add(inst); err != nil {
return nil, fmt.Errorf("failed to add instance to registry: %w", err)
}
// Persist the remote instance locally for tracking across restarts
if err := im.persistInstance(inst); err != nil {
// Rollback: remove from registry
im.registry.remove(name)
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
}
return inst, nil
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
// Local instance creation
// Check max instances limit for local instances only
totalInstances := im.registry.count()
remoteCount := 0
for _, inst := range im.registry.list() {
if inst.IsRemote() {
remoteCount++
}
}
localInstanceCount := totalInstances - remoteCount
if localInstanceCount >= im.globalConfig.Instances.MaxInstances && im.globalConfig.Instances.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.globalConfig.Instances.MaxInstances)
}
// Assign and validate port for backend-specific options
currentPort := im.getPortFromOptions(options)
var allocatedPort int
if currentPort == 0 {
// Allocate a port if not specified
allocatedPort, err = im.ports.allocate(name)
if err != nil {
return nil, fmt.Errorf("failed to allocate port: %w", err)
}
im.setPortInOptions(options, allocatedPort)
} else {
// Use the specified port
if err := im.ports.allocateSpecific(currentPort, name); err != nil {
return nil, fmt.Errorf("port %d is already in use: %w", currentPort, err)
}
allocatedPort = currentPort
}
statusCallback := func(oldStatus, newStatus instance.Status) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst
inst := instance.New(name, im.globalConfig, options, statusCallback)
// Add to registry
if err := im.registry.add(inst); err != nil {
// Rollback: release port
im.ports.release(allocatedPort)
return nil, fmt.Errorf("failed to add instance to registry: %w", err)
}
// Persist instance (best-effort, don't fail if persistence fails)
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return inst, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
// For remote instances, this fetches the live state from the remote node and updates the local stub.
func (im *instanceManager) GetInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
// Check if instance is remote and fetch live state
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.getInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Return the local stub (preserving Nodes field)
return inst, nil
}
return inst, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
func (im *instanceManager) UpdateInstance(name string, options *instance.Options) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.updateInstance(ctx, node, name, options)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
// Persist the updated remote instance locally
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
}
return inst, nil
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := validation.ValidateInstanceOptions(options)
err := options.BackendOptions.ValidateInstanceOptions()
if err != nil {
return nil, err
}
// Lock this specific instance only
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Handle port changes
oldPort := inst.GetPort()
newPort := im.getPortFromOptions(options)
var allocatedPort int
if newPort != oldPort {
// Port is changing - need to release old and allocate new
if newPort == 0 {
// Auto-allocate new port
allocatedPort, err = im.ports.allocate(name)
if err != nil {
return nil, fmt.Errorf("failed to allocate new port: %w", err)
}
im.setPortInOptions(options, allocatedPort)
} else {
// Use specified port
if err := im.ports.allocateSpecific(newPort, name); err != nil {
return nil, fmt.Errorf("failed to allocate port %d: %w", newPort, err)
}
allocatedPort = newPort
}
// Release old port
if oldPort > 0 {
if err := im.ports.release(oldPort); err != nil {
// Rollback new port allocation
im.ports.release(allocatedPort)
return nil, fmt.Errorf("failed to release old port %d: %w", oldPort, err)
}
}
}
// Check if instance is running before updating options
wasRunning := instance.IsRunning()
wasRunning := inst.IsRunning()
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
inst.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil {
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
}
return instance, nil
return inst, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
instance, exists := im.instances[name]
inst, exists := im.registry.get(name)
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
err := im.remote.deleteInstance(ctx, node, name)
if err != nil {
return err
}
// Clean up local tracking
im.remote.removeInstance(name)
im.registry.remove(name)
// Delete the instance's persistence
if err := im.db.Delete(name); err != nil {
return fmt.Errorf("failed to delete remote instance %s: %w", name, err)
}
return nil
}
// Lock this specific instance and clean up the lock on completion
lock := im.lockInstance(name)
lock.Lock()
defer im.unlockAndCleanup(name)
status := inst.GetStatus()
if status == instance.Running || status == instance.Restarting {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, instance.GetPort())
delete(im.instances, name)
// Release port (use ReleaseByInstance for proper cleanup)
im.ports.releaseByInstance(name)
// Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
// Remove from registry
if err := im.registry.remove(name); err != nil {
return fmt.Errorf("failed to remove instance from registry: %w", err)
}
// Delete from persistence
if err := im.db.Delete(name); err != nil {
return fmt.Errorf("failed to delete instance from persistence %s: %w", name, err)
}
return nil
@@ -161,156 +352,186 @@ func (im *instanceManager) DeleteInstance(name string) error {
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
func (im *instanceManager) StartInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.startInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
return inst, nil
}
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
// Lock this specific instance only
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Idempotent: if already running, just return success
if inst.IsRunning() {
return inst, nil
}
if err := instance.Start(); err != nil {
// Check max running instances limit for local instances only
if im.IsMaxRunningInstancesReached() {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances))
}
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
// Persist instance (best-effort, don't fail if persistence fails)
if err := im.persistInstance(inst); err != nil {
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return instance, nil
return inst, nil
}
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
if im.globalConfig.Instances.MaxRunningInstances == -1 {
return false
}
return false
// Count only local running instances (each node has its own limits)
localRunningCount := 0
for _, inst := range im.registry.listRunning() {
if !inst.IsRemote() {
localRunningCount++
}
}
return localRunningCount >= im.globalConfig.Instances.MaxRunningInstances
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
func (im *instanceManager) StopInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.stopInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
return inst, nil
}
if err := instance.Stop(); err != nil {
// Lock this specific instance only
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Idempotent: if already stopped, just return success
if !inst.IsRunning() {
return inst, nil
}
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
// Persist instance (best-effort, don't fail if persistence fails)
if err := im.persistInstance(inst); err != nil {
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return instance, nil
return inst, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
func (im *instanceManager) RestartInstance(name string) (*instance.Instance, error) {
inst, exists := im.registry.get(name)
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return im.StartInstance(instance.Name)
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
remoteInst, err := im.remote.restartInstance(ctx, node, name)
if err != nil {
return nil, err
}
// Update the local stub with all remote data (preserving Nodes)
im.updateLocalInstanceFromRemote(inst, remoteInst)
return inst, nil
}
// Lock this specific instance for the entire restart operation to ensure atomicity
lock := im.lockInstance(name)
lock.Lock()
defer lock.Unlock()
// Stop the instance
if inst.IsRunning() {
if err := inst.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
}
// Start the instance
if err := inst.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
// Persist the restarted instance
if err := im.persistInstance(inst); err != nil {
log.Printf("Warning: failed to persist instance %s: %v", name, err)
}
return inst, nil
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
inst, exists := im.registry.get(name)
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
// Check if instance is remote and delegate to remote operation
if node := im.getNodeForInstance(inst); node != nil {
ctx := context.Background()
return im.remote.getInstanceLogs(ctx, node, name, numLines)
}
// Get logs from the local instance
return inst.GetLogs(numLines)
}
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
return options.MlxServerOptions.Port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
return options.VllmServerOptions.Port
}
}
return 0
func (im *instanceManager) getPortFromOptions(options *instance.Options) int {
return options.BackendOptions.GetPort()
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
case backends.BackendTypeMlxLm:
if options.MlxServerOptions != nil {
options.MlxServerOptions.Port = port
}
case backends.BackendTypeVllm:
if options.VllmServerOptions != nil {
options.VllmServerOptions.Port = port
}
}
func (im *instanceManager) setPortInOptions(options *instance.Options, port int) {
options.BackendOptions.SetPort(port)
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
return im.lifecycle.evictLRU()
}

View File

@@ -2,48 +2,23 @@ package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
"time"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
mngr := createTestManager(t)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
@@ -60,18 +35,53 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
}
// Test max instances limit
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
tempDir := t.TempDir()
appConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
},
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
},
Database: config.DatabaseConfig{
Path: ":memory:",
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
db, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
limitedManager := manager.New(appConfig, db)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
@@ -88,33 +98,32 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
manager := createTestManager(t)
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
options1 := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
inst1, err := manager.CreateInstance("instance1", options1)
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
// Try to create instance with same port
options2 := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080, // Same port - should conflict
},
},
}
@@ -125,98 +134,21 @@ func TestPortManagement(t *testing.T) {
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
manager := createTestManager(t)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
_, err := manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
@@ -231,3 +163,141 @@ func TestInstanceOperations(t *testing.T) {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
},
}
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Simulate starting the instance
inst.SetStatus(instance.Running)
// Should fail to delete running instance
err = mgr.DeleteInstance("test-instance")
if err == nil {
t.Error("Expected error when deleting running instance")
}
}
func TestUpdateInstance(t *testing.T) {
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Start the instance (will use 'yes' command from test config)
if err := inst.Start(); err != nil {
t.Fatalf("Failed to start instance: %v", err)
}
// Update running instance with new model
newOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8080,
},
},
}
updated, err := mgr.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
// Should be running after update (was running before, should be restarted)
if !updated.IsRunning() {
t.Errorf("Instance should be running after update, got: %v", updated.GetStatus())
}
if updated.GetOptions().BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model to be updated")
}
}
func TestUpdateInstance_ReleasesOldPort(t *testing.T) {
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
},
}
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
// Update with new port
newOptions := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8081,
},
},
}
updated, err := mgr.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetPort() != 8081 {
t.Errorf("Expected port 8081, got %d", updated.GetPort())
}
// Old port should be released - try to create new instance with old port
options2 := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080,
},
},
}
_, err = mgr.CreateInstance("test-instance-2", options2)
if err != nil {
t.Errorf("Should be able to use old port 8080: %v", err)
}
}

176
pkg/manager/ports.go Normal file
View File

@@ -0,0 +1,176 @@
package manager
import (
"fmt"
"math/bits"
"sync"
)
// portAllocator provides efficient port allocation using a bitmap for O(1) operations.
// The bitmap approach prevents unbounded memory growth and simplifies port management.
type portAllocator struct {
mu sync.Mutex
// Bitmap for O(1) allocation/release
// Each bit represents a port (1 = allocated, 0 = free)
bitmap []uint64 // Each uint64 covers 64 ports
// Map port to instance name for cleanup operations
allocated map[int]string
minPort int
maxPort int
rangeSize int
}
// newPortAllocator creates a new port allocator for the given port range.
func newPortAllocator(minPort, maxPort int) *portAllocator {
rangeSize := maxPort - minPort + 1
bitmapSize := (rangeSize + 63) / 64 // Round up to nearest uint64
return &portAllocator{
bitmap: make([]uint64, bitmapSize),
allocated: make(map[int]string),
minPort: minPort,
maxPort: maxPort,
rangeSize: rangeSize,
}
}
// allocate finds and allocates the first available port for the given instance.
// Returns the allocated port or an error if no ports are available.
func (p *portAllocator) allocate(instanceName string) (int, error) {
if instanceName == "" {
return 0, fmt.Errorf("instance name cannot be empty")
}
p.mu.Lock()
defer p.mu.Unlock()
port, err := p.findFirstFreeBit()
if err != nil {
return 0, err
}
p.setBit(port)
p.allocated[port] = instanceName
return port, nil
}
// allocateSpecific allocates a specific port for the given instance.
// Returns an error if the port is already allocated or out of range.
func (p *portAllocator) allocateSpecific(port int, instanceName string) error {
if instanceName == "" {
return fmt.Errorf("instance name cannot be empty")
}
if port < p.minPort || port > p.maxPort {
return fmt.Errorf("port %d is out of range [%d-%d]", port, p.minPort, p.maxPort)
}
p.mu.Lock()
defer p.mu.Unlock()
if p.isBitSet(port) {
return fmt.Errorf("port %d is already allocated", port)
}
p.setBit(port)
p.allocated[port] = instanceName
return nil
}
// release releases a specific port, making it available for reuse.
// Returns an error if the port is not allocated.
func (p *portAllocator) release(port int) error {
if port < p.minPort || port > p.maxPort {
return fmt.Errorf("port %d is out of range [%d-%d]", port, p.minPort, p.maxPort)
}
p.mu.Lock()
defer p.mu.Unlock()
if !p.isBitSet(port) {
return fmt.Errorf("port %d is not allocated", port)
}
p.clearBit(port)
delete(p.allocated, port)
return nil
}
// releaseByInstance releases all ports allocated to the given instance.
// This is useful for cleanup when deleting or updating an instance.
// Returns the number of ports released.
func (p *portAllocator) releaseByInstance(instanceName string) int {
if instanceName == "" {
return 0
}
p.mu.Lock()
defer p.mu.Unlock()
portsToRelease := make([]int, 0)
for port, name := range p.allocated {
if name == instanceName {
portsToRelease = append(portsToRelease, port)
}
}
for _, port := range portsToRelease {
p.clearBit(port)
delete(p.allocated, port)
}
return len(portsToRelease)
}
// --- Internal bitmap operations ---
// portToBitPos converts a port number to bitmap array index and bit position.
func (p *portAllocator) portToBitPos(port int) (index int, bit uint) {
offset := port - p.minPort
index = offset / 64
bit = uint(offset % 64)
return
}
// setBit marks a port as allocated in the bitmap.
func (p *portAllocator) setBit(port int) {
index, bit := p.portToBitPos(port)
p.bitmap[index] |= (1 << bit)
}
// clearBit marks a port as free in the bitmap.
func (p *portAllocator) clearBit(port int) {
index, bit := p.portToBitPos(port)
p.bitmap[index] &^= (1 << bit)
}
// isBitSet checks if a port is allocated in the bitmap.
func (p *portAllocator) isBitSet(port int) bool {
index, bit := p.portToBitPos(port)
return (p.bitmap[index] & (1 << bit)) != 0
}
// findFirstFreeBit scans the bitmap to find the first unallocated port.
// Returns the port number or an error if no ports are available.
func (p *portAllocator) findFirstFreeBit() (int, error) {
for i, word := range p.bitmap {
if word != ^uint64(0) { // Not all bits are set (some ports are free)
// Find the first 0 bit in this word
// XOR with all 1s to flip bits, then find first 1 (which was 0)
bit := bits.TrailingZeros64(^word)
port := p.minPort + (i * 64) + bit
// Ensure we don't go beyond maxPort due to bitmap rounding
if port <= p.maxPort {
return port, nil
}
}
}
return 0, fmt.Errorf("no available ports in range [%d-%d]", p.minPort, p.maxPort)
}

121
pkg/manager/registry.go Normal file
View File

@@ -0,0 +1,121 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"sync"
)
// instanceRegistry provides thread-safe storage and lookup of instances
// with running state tracking using lock-free sync.Map for status checks.
type instanceRegistry struct {
mu sync.RWMutex
instances map[string]*instance.Instance
running sync.Map // map[string]struct{} - lock-free for status checks
}
// newInstanceRegistry creates a new instance registry.
func newInstanceRegistry() *instanceRegistry {
return &instanceRegistry{
instances: make(map[string]*instance.Instance),
}
}
// Get retrieves an instance by name.
// Returns the instance and true if found, nil and false otherwise.
func (r *instanceRegistry) get(name string) (*instance.Instance, bool) {
r.mu.RLock()
defer r.mu.RUnlock()
inst, exists := r.instances[name]
return inst, exists
}
// List returns a snapshot copy of all instances to prevent external mutation.
func (r *instanceRegistry) list() []*instance.Instance {
r.mu.RLock()
defer r.mu.RUnlock()
result := make([]*instance.Instance, 0, len(r.instances))
for _, inst := range r.instances {
result = append(result, inst)
}
return result
}
// ListRunning returns a snapshot of all currently running instances.
func (r *instanceRegistry) listRunning() []*instance.Instance {
r.mu.RLock()
defer r.mu.RUnlock()
result := make([]*instance.Instance, 0)
for name, inst := range r.instances {
if _, isRunning := r.running.Load(name); isRunning {
result = append(result, inst)
}
}
return result
}
// Add adds a new instance to the registry.
// Returns an error if an instance with the same name already exists.
func (r *instanceRegistry) add(inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("cannot add nil instance")
}
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.instances[inst.Name]; exists {
return fmt.Errorf("instance %s already exists", inst.Name)
}
r.instances[inst.Name] = inst
// Initialize running state if the instance is running
if inst.IsRunning() {
r.running.Store(inst.Name, struct{}{})
}
return nil
}
// Remove removes an instance from the registry.
// Returns an error if the instance doesn't exist.
func (r *instanceRegistry) remove(name string) error {
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.instances[name]; !exists {
return fmt.Errorf("instance %s not found", name)
}
delete(r.instances, name)
r.running.Delete(name)
return nil
}
// MarkRunning marks an instance as running using lock-free sync.Map.
func (r *instanceRegistry) markRunning(name string) {
r.running.Store(name, struct{}{})
}
// MarkStopped marks an instance as stopped using lock-free sync.Map.
func (r *instanceRegistry) markStopped(name string) {
r.running.Delete(name)
}
// IsRunning checks if an instance is running using lock-free sync.Map.
func (r *instanceRegistry) isRunning(name string) bool {
_, isRunning := r.running.Load(name)
return isRunning
}
// Count returns the total number of instances in the registry.
func (r *instanceRegistry) count() int {
r.mu.RLock()
defer r.mu.RUnlock()
return len(r.instances)
}

293
pkg/manager/remote.go Normal file
View File

@@ -0,0 +1,293 @@
package manager
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"net/http"
"net/url"
"sync"
"time"
)
const apiBasePath = "/api/v1/instances/"
// remoteManager handles HTTP operations for remote instances.
type remoteManager struct {
mu sync.RWMutex
client *http.Client
nodeMap map[string]*config.NodeConfig // node name -> node config
instanceToNode map[string]*config.NodeConfig // instance name -> node config
}
// newRemoteManager creates a new remote manager.
func newRemoteManager(nodes map[string]config.NodeConfig, timeout time.Duration) *remoteManager {
if timeout <= 0 {
timeout = 30 * time.Second
}
// Build node config map
nodeMap := make(map[string]*config.NodeConfig)
for name := range nodes {
nodeCopy := nodes[name]
nodeMap[name] = &nodeCopy
}
return &remoteManager{
client: &http.Client{
Timeout: timeout,
},
nodeMap: nodeMap,
instanceToNode: make(map[string]*config.NodeConfig),
}
}
// GetNodeForInstance returns the node configuration for a given instance.
// Returns nil if the instance is not mapped to any node.
func (rm *remoteManager) getNodeForInstance(instanceName string) (*config.NodeConfig, bool) {
rm.mu.RLock()
defer rm.mu.RUnlock()
node, exists := rm.instanceToNode[instanceName]
return node, exists
}
// SetInstanceNode maps an instance to a specific node.
// Returns an error if the node doesn't exist.
func (rm *remoteManager) setInstanceNode(instanceName, nodeName string) error {
rm.mu.Lock()
defer rm.mu.Unlock()
node, exists := rm.nodeMap[nodeName]
if !exists {
return fmt.Errorf("node %s not found", nodeName)
}
rm.instanceToNode[instanceName] = node
return nil
}
// RemoveInstance removes the instance-to-node mapping.
func (rm *remoteManager) removeInstance(instanceName string) {
rm.mu.Lock()
defer rm.mu.Unlock()
delete(rm.instanceToNode, instanceName)
}
// --- HTTP request helpers ---
// makeRemoteRequest creates and executes an HTTP request to a remote node with context support.
func (rm *remoteManager) makeRemoteRequest(ctx context.Context, nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
var reqBody io.Reader
if body != nil {
jsonData, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("failed to marshal request body: %w", err)
}
reqBody = bytes.NewBuffer(jsonData)
}
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
req, err := http.NewRequestWithContext(ctx, method, url, reqBody)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
if nodeConfig.APIKey != "" {
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
}
resp, err := rm.client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to execute request: %w", err)
}
return resp, nil
}
// parseRemoteResponse parses an HTTP response and unmarshals the result.
func parseRemoteResponse(resp *http.Response, result any) error {
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
if result != nil {
if err := json.Unmarshal(body, result); err != nil {
return fmt.Errorf("failed to unmarshal response: %w", err)
}
}
return nil
}
// --- Remote CRUD operations ---
// createInstance creates a new instance on a remote node.
func (rm *remoteManager) createInstance(ctx context.Context, node *config.NodeConfig, name string, opts *instance.Options) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, opts)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// getInstance retrieves an instance by name from a remote node.
func (rm *remoteManager) getInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "GET", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// updateInstance updates an existing instance on a remote node.
func (rm *remoteManager) updateInstance(ctx context.Context, node *config.NodeConfig, name string, opts *instance.Options) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "PUT", path, opts)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// deleteInstance deletes an instance from a remote node.
func (rm *remoteManager) deleteInstance(ctx context.Context, node *config.NodeConfig, name string) error {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "DELETE", path, nil)
if err != nil {
return err
}
return parseRemoteResponse(resp, nil)
}
// startInstance starts an instance on a remote node.
func (rm *remoteManager) startInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/start", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// stopInstance stops an instance on a remote node.
func (rm *remoteManager) stopInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/stop", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// restartInstance restarts an instance on a remote node.
func (rm *remoteManager) restartInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/restart", apiBasePath, escapedName)
resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
if err != nil {
return nil, err
}
var inst instance.Instance
if err := parseRemoteResponse(resp, &inst); err != nil {
return nil, err
}
return &inst, nil
}
// getInstanceLogs retrieves logs for an instance from a remote node.
func (rm *remoteManager) getInstanceLogs(ctx context.Context, node *config.NodeConfig, name string, numLines int) (string, error) {
escapedName := url.PathEscape(name)
path := fmt.Sprintf("%s%s/logs?lines=%d", apiBasePath, escapedName, numLines)
resp, err := rm.makeRemoteRequest(ctx, node, "GET", path, nil)
if err != nil {
return "", err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response body: %w", err)
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
}
// Logs endpoint returns plain text (Content-Type: text/plain)
return string(body), nil
}

View File

@@ -1,64 +0,0 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range im.instances {
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
im.mu.RUnlock() // Release read lock before calling StopInstance
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := im.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name, _ := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

View File

@@ -1,332 +0,0 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
backendConfig := config.BackendConfig{
LlamaExecutable: "llama-server",
MLXLMExecutable: "mlx_lm.server",
}
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(backendConfig, cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,795 +1,121 @@
package server
import (
"bytes"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/backends/mlx"
"llamactl/pkg/backends/vllm"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"llamactl/pkg/validation"
"log"
"net/http"
"os/exec"
"strconv"
"strings"
"time"
"github.com/go-chi/chi/v5"
)
// errorResponse represents an error response returned by the API
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
// writeError writes a JSON error response with the specified HTTP status code
func writeError(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}); err != nil {
log.Printf("Failed to encode error response: %v", err)
}
}
// writeJSON writes a JSON response with the specified HTTP status code
func writeJSON(w http.ResponseWriter, status int, data any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("Failed to encode JSON response: %v", err)
}
}
// writeText writes a plain text response with the specified HTTP status code
func writeText(w http.ResponseWriter, status int, data string) {
w.Header().Set("Content-Type", "text/plain")
w.WriteHeader(status)
if _, err := w.Write([]byte(data)); err != nil {
log.Printf("Failed to write text response: %v", err)
}
}
// Handler provides HTTP handlers for the llamactl server API
type Handler struct {
InstanceManager manager.InstanceManager
cfg config.AppConfig
httpClient *http.Client
authStore database.AuthStore
authMiddleware *APIAuthMiddleware
}
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
// NewHandler creates a new Handler instance with the provided instance manager and configuration
func NewHandler(im manager.InstanceManager, cfg config.AppConfig, authStore database.AuthStore) *Handler {
handler := &Handler{
InstanceManager: im,
cfg: cfg,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
authStore: authStore,
}
handler.authMiddleware = NewAPIAuthMiddleware(cfg.Auth, authStore)
return handler
}
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
// getInstance retrieves an instance by name from request query parameters
func (h *Handler) getInstance(r *http.Request) (*instance.Instance, error) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
return nil, fmt.Errorf("invalid instance name: %w", err)
}
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
return nil, fmt.Errorf("failed to get instance by name: %w", err)
}
return inst, nil
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
// ensureInstanceRunning ensures that an instance is running by starting it if on-demand start is enabled
// It handles LRU eviction when the maximum number of running instances is reached
func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
options := inst.GetOptions()
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
if !allowOnDemand {
return fmt.Errorf("instance is not running and on-demand start is not enabled")
}
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StartInstance(name)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
return fmt.Errorf("cannot start instance, failed to evict instance: %w", err)
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
if lines == "" {
lines = "-1"
}
num_lines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := inst.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
// @Router /instances/{name}/proxy [post]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
inst, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
http.Error(w, "Failed to read request body", http.StatusBadRequest)
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Instance name is required", http.StatusBadRequest)
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !inst.IsRunning() {
allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
}
proxy, err := inst.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
}
}
// ParseCommandRequest represents the request body for command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: llamaOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
// Currently only support mlx_lm backend type
backendType := backends.BackendTypeMlxLm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
MlxServerOptions: mlxOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
}
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
type errorResponse struct {
Error string `json:"error"`
Details string `json:"details,omitempty"`
}
writeError := func(w http.ResponseWriter, status int, code, details string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
}
return func(w http.ResponseWriter, r *http.Request) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return
}
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return
}
backendType := backends.BackendTypeVllm
options := &instance.CreateInstanceOptions{
BackendType: backendType,
VllmServerOptions: vllmOptions,
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(options); err != nil {
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
} else {
return fmt.Errorf("cannot start instance, maximum number of instances reached")
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(inst.Name); err != nil {
return fmt.Errorf("failed to start instance: %w", err)
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil {
return fmt.Errorf("instance failed to become healthy: %w", err)
}
return nil
}

354
pkg/server/handlers_auth.go Normal file
View File

@@ -0,0 +1,354 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/auth"
"net/http"
"strconv"
"time"
"github.com/go-chi/chi/v5"
)
// CreateKeyRequest represents the request body for creating a new API key.
type CreateKeyRequest struct {
Name string `json:"name"`
PermissionMode auth.PermissionMode `json:"permission_mode"`
ExpiresAt *int64 `json:"expires_at,omitempty"`
InstanceIDs []int `json:"instance_ids,omitempty"`
}
// CreateKeyResponse represents the response returned when creating a new API key.
type CreateKeyResponse struct {
ID int `json:"id"`
Name string `json:"name"`
UserID string `json:"user_id"`
PermissionMode auth.PermissionMode `json:"permission_mode"`
ExpiresAt *int64 `json:"expires_at"`
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
LastUsedAt *int64 `json:"last_used_at"`
Key string `json:"key"`
}
// KeyResponse represents an API key in responses for list and get operations.
type KeyResponse struct {
ID int `json:"id"`
Name string `json:"name"`
UserID string `json:"user_id"`
PermissionMode auth.PermissionMode `json:"permission_mode"`
ExpiresAt *int64 `json:"expires_at"`
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
LastUsedAt *int64 `json:"last_used_at"`
}
// KeyPermissionResponse represents the permissions for an API key on a specific instance.
type KeyPermissionResponse struct {
InstanceID int `json:"instance_id"`
InstanceName string `json:"instance_name"`
}
// CreateKey godoc
// @Summary Create a new API key
// @Description Creates a new API key with the specified permissions and returns the plain-text key (only shown once)
// @Tags Keys
// @Accept json
// @Produce json
// @Param key body CreateKeyRequest true "API key configuration"
// @Success 201 {object} CreateKeyResponse "Created API key with plain-text key"
// @Failure 400 {string} string "Invalid request body or validation error"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys [post]
func (h *Handler) CreateKey() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req CreateKeyRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_json", "Invalid JSON in request body")
return
}
// Validate request
if req.Name == "" {
writeError(w, http.StatusBadRequest, "invalid_name", "Name is required")
return
}
if len(req.Name) > 100 {
writeError(w, http.StatusBadRequest, "invalid_name", "Name must be 100 characters or less")
return
}
if req.PermissionMode != auth.PermissionModeAllowAll && req.PermissionMode != auth.PermissionModePerInstance {
writeError(w, http.StatusBadRequest, "invalid_permission_mode", "Permission mode must be 'allow_all' or 'per_instance'")
return
}
if req.PermissionMode == auth.PermissionModePerInstance && len(req.InstanceIDs) == 0 {
writeError(w, http.StatusBadRequest, "missing_permissions", "Instance IDs required when permission mode is 'per_instance'")
return
}
if req.ExpiresAt != nil && *req.ExpiresAt <= time.Now().Unix() {
writeError(w, http.StatusBadRequest, "invalid_expires_at", "Expiration time must be in future")
return
}
// Validate instance IDs exist
if req.PermissionMode == auth.PermissionModePerInstance {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_instances_failed", fmt.Sprintf("Failed to fetch instances: %v", err))
return
}
instanceIDMap := make(map[int]bool)
for _, inst := range instances {
instanceIDMap[inst.ID] = true
}
for _, instanceID := range req.InstanceIDs {
if !instanceIDMap[instanceID] {
writeError(w, http.StatusBadRequest, "invalid_instance_id", fmt.Sprintf("Instance ID %d does not exist", instanceID))
return
}
}
}
// Generate plain-text key
plainTextKey, err := auth.GenerateKey("llamactl")
if err != nil {
writeError(w, http.StatusInternalServerError, "key_generation_failed", "Failed to generate API key")
return
}
// Hash key
keyHash, err := auth.HashKey(plainTextKey)
if err != nil {
writeError(w, http.StatusInternalServerError, "key_hashing_failed", "Failed to hash API key")
return
}
// Create APIKey struct
now := time.Now().Unix()
apiKey := &auth.APIKey{
KeyHash: keyHash,
Name: req.Name,
UserID: "system",
PermissionMode: req.PermissionMode,
ExpiresAt: req.ExpiresAt,
CreatedAt: now,
UpdatedAt: now,
}
// Convert InstanceIDs to KeyPermissions
var keyPermissions []auth.KeyPermission
for _, instanceID := range req.InstanceIDs {
keyPermissions = append(keyPermissions, auth.KeyPermission{
KeyID: 0, // Will be set by database after key creation
InstanceID: instanceID,
})
}
// Create in database
err = h.authStore.CreateKey(r.Context(), apiKey, keyPermissions)
if err != nil {
writeError(w, http.StatusInternalServerError, "creation_failed", fmt.Sprintf("Failed to create API key: %v", err))
return
}
// Return response with plain-text key (only shown once)
response := CreateKeyResponse{
ID: apiKey.ID,
Name: apiKey.Name,
UserID: apiKey.UserID,
PermissionMode: apiKey.PermissionMode,
ExpiresAt: apiKey.ExpiresAt,
CreatedAt: apiKey.CreatedAt,
UpdatedAt: apiKey.UpdatedAt,
LastUsedAt: apiKey.LastUsedAt,
Key: plainTextKey,
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
json.NewEncoder(w).Encode(response)
}
}
// ListKeys godoc
// @Summary List all API keys
// @Description Returns a list of all API keys for the system user (excludes key hash and plain-text key)
// @Tags Keys
// @Security ApiKeyAuth
// @Produce json
// @Success 200 {array} KeyResponse "List of API keys"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys [get]
func (h *Handler) ListKeys() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
keys, err := h.authStore.GetUserKeys(r.Context(), "system")
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch API keys: %v", err))
return
}
// Remove key_hash from all keys
response := make([]KeyResponse, 0, len(keys))
for _, key := range keys {
response = append(response, KeyResponse{
ID: key.ID,
Name: key.Name,
UserID: key.UserID,
PermissionMode: key.PermissionMode,
ExpiresAt: key.ExpiresAt,
CreatedAt: key.CreatedAt,
UpdatedAt: key.UpdatedAt,
LastUsedAt: key.LastUsedAt,
})
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
}
// GetKey godoc
// @Summary Get details of a specific API key
// @Description Returns details for a specific API key by ID (excludes key hash and plain-text key)
// @Tags Keys
// @Security ApiKeyAuth
// @Produce json
// @Param id path int true "Key ID"
// @Success 200 {object} KeyResponse "API key details"
// @Failure 400 {string} string "Invalid key ID"
// @Failure 404 {string} string "API key not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys/{id} [get]
func (h *Handler) GetKey() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.Atoi(idStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_id", "Invalid key ID")
return
}
key, err := h.authStore.GetKeyByID(r.Context(), id)
if err != nil {
if err.Error() == "API key not found" {
writeError(w, http.StatusNotFound, "not_found", "API key not found")
return
}
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch API key: %v", err))
return
}
// Remove key_hash from response
response := KeyResponse{
ID: key.ID,
Name: key.Name,
UserID: key.UserID,
PermissionMode: key.PermissionMode,
ExpiresAt: key.ExpiresAt,
CreatedAt: key.CreatedAt,
UpdatedAt: key.UpdatedAt,
LastUsedAt: key.LastUsedAt,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
}
// DeleteKey godoc
// @Summary Delete an API key
// @Description Deletes an API key by ID
// @Tags Keys
// @Security ApiKeyAuth
// @Param id path int true "Key ID"
// @Success 204 "API key deleted successfully"
// @Failure 400 {string} string "Invalid key ID"
// @Failure 404 {string} string "API key not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys/{id} [delete]
func (h *Handler) DeleteKey() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.Atoi(idStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_id", "Invalid key ID")
return
}
err = h.authStore.DeleteKey(r.Context(), id)
if err != nil {
if err.Error() == "API key not found" {
writeError(w, http.StatusNotFound, "not_found", "API key not found")
return
}
writeError(w, http.StatusInternalServerError, "deletion_failed", fmt.Sprintf("Failed to delete API key: %v", err))
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetKeyPermissions godoc
// @Summary Get API key permissions
// @Description Returns the instance-level permissions for a specific API key (includes instance names)
// @Tags Keys
// @Security ApiKeyAuth
// @Produce json
// @Param id path int true "Key ID"
// @Success 200 {array} KeyPermissionResponse "List of key permissions"
// @Failure 400 {string} string "Invalid key ID"
// @Failure 404 {string} string "API key not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys/{id}/permissions [get]
func (h *Handler) GetKeyPermissions() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.Atoi(idStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_id", "Invalid key ID")
return
}
// Verify key exists
_, err = h.authStore.GetKeyByID(r.Context(), id)
if err != nil {
if err.Error() == "API key not found" {
writeError(w, http.StatusNotFound, "not_found", "API key not found")
return
}
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch API key: %v", err))
return
}
permissions, err := h.authStore.GetPermissions(r.Context(), id)
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch permissions: %v", err))
return
}
// Get instance names for the permissions
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_instances_failed", fmt.Sprintf("Failed to fetch instances: %v", err))
return
}
instanceNameMap := make(map[int]string)
for _, inst := range instances {
instanceNameMap[inst.ID] = inst.Name
}
response := make([]KeyPermissionResponse, 0, len(permissions))
for _, perm := range permissions {
response = append(response, KeyPermissionResponse{
InstanceID: perm.InstanceID,
InstanceName: instanceNameMap[perm.InstanceID],
})
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
}

View File

@@ -0,0 +1,308 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"net/http"
"os/exec"
"strings"
)
// ParseCommandRequest represents the request body for backend command parsing
type ParseCommandRequest struct {
Command string `json:"command"`
}
// validateLlamaCppInstance validates that the instance specified in the request is a llama.cpp instance
func (h *Handler) validateLlamaCppInstance(r *http.Request) (*instance.Instance, error) {
inst, err := h.getInstance(r)
if err != nil {
return nil, fmt.Errorf("invalid instance: %w", err)
}
options := inst.GetOptions()
if options == nil {
return nil, fmt.Errorf("cannot obtain instance's options")
}
if options.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
return nil, fmt.Errorf("instance is not a llama.cpp server")
}
return inst, nil
}
// stripLlamaCppPrefix removes the llama.cpp proxy prefix from the request URL path
func (h *Handler) stripLlamaCppPrefix(r *http.Request, instName string) {
// Strip the "/llama-cpp/<name>" prefix from the request URL
prefix := fmt.Sprintf("/llama-cpp/%s", instName)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
}
// LlamaCppUIProxy godoc
// @Summary Proxy requests to llama.cpp UI for the instance
// @Description Proxies requests to the llama.cpp UI for the specified instance
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produce html
// @Param name query string true "Instance Name"
// @Success 200 {string} string "Proxied HTML response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /llama-cpp/{name}/ [get]
func (h *Handler) LlamaCppUIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
if !inst.IsRemote() && !inst.IsRunning() {
writeError(w, http.StatusBadRequest, "instance is not running", "Instance is not running")
return
}
if !inst.IsRemote() {
h.stripLlamaCppPrefix(r, inst.Name)
}
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
// LlamaCppProxy godoc
// @Summary Proxy requests to llama.cpp server instance
// @Description Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produce json
// @Param name path string true "Instance Name"
// @Success 200 {object} map[string]any "Proxied response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /llama-cpp/{name}/props [get]
// @Router /llama-cpp/{name}/slots [get]
// @Router /llama-cpp/{name}/apply-template [post]
// @Router /llama-cpp/{name}/completion [post]
// @Router /llama-cpp/{name}/detokenize [post]
// @Router /llama-cpp/{name}/embeddings [post]
// @Router /llama-cpp/{name}/infill [post]
// @Router /llama-cpp/{name}/metrics [post]
// @Router /llama-cpp/{name}/props [post]
// @Router /llama-cpp/{name}/reranking [post]
// @Router /llama-cpp/{name}/tokenize [post]
func (h *Handler) LlamaCppProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
return
}
}
if !inst.IsRemote() {
h.stripLlamaCppPrefix(r, inst.Name)
}
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
// parseHelper parses a backend command and returns the parsed options
func parseHelper(w http.ResponseWriter, r *http.Request, backend interface {
ParseCommand(string) (any, error)
}) (any, bool) {
var req ParseCommandRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
return nil, false
}
if strings.TrimSpace(req.Command) == "" {
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
return nil, false
}
// Parse command using the backend's ParseCommand method
parsedOptions, err := backend.ParseCommand(req.Command)
if err != nil {
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
return nil, false
}
return parsedOptions, true
}
// ParseLlamaCommand godoc
// @Summary Parse llama-server command
// @Description Parses a llama-server command string into instance options
// @Tags Backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.Options "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Failure 500 {object} map[string]string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/parse-command [post]
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
parsedOptions, ok := parseHelper(w, r, &backends.LlamaServerOptions{})
if !ok {
return
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: parsedOptions.(*backends.LlamaServerOptions),
},
}
writeJSON(w, http.StatusOK, options)
}
}
// ParseMlxCommand godoc
// @Summary Parse mlx_lm.server command
// @Description Parses MLX-LM server command string into instance options
// @Tags Backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.Options "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /api/v1/backends/mlx/parse-command [post]
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
parsedOptions, ok := parseHelper(w, r, &backends.MlxServerOptions{})
if !ok {
return
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeMlxLm,
MlxServerOptions: parsedOptions.(*backends.MlxServerOptions),
},
}
writeJSON(w, http.StatusOK, options)
}
}
// ParseVllmCommand godoc
// @Summary Parse vllm serve command
// @Description Parses a vLLM serve command string into instance options
// @Tags Backends
// @Security ApiKeyAuth
// @Accept json
// @Produce json
// @Param request body ParseCommandRequest true "Command to parse"
// @Success 200 {object} instance.Options "Parsed options"
// @Failure 400 {object} map[string]string "Invalid request or command"
// @Router /api/v1/backends/vllm/parse-command [post]
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
parsedOptions, ok := parseHelper(w, r, &backends.VllmServerOptions{})
if !ok {
return
}
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeVllm,
VllmServerOptions: parsedOptions.(*backends.VllmServerOptions),
},
}
writeJSON(w, http.StatusOK, options)
}
}
// executeLlamaServerCommand executes a llama-server command with the specified flag and returns the output
func (h *Handler) executeLlamaServerCommand(flag, errorMsg string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
cmd := exec.Command("llama-server", flag)
output, err := cmd.CombinedOutput()
if err != nil {
writeError(w, http.StatusInternalServerError, "command failed", errorMsg+": "+err.Error())
return
}
writeText(w, http.StatusOK, string(output))
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags Backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/help [get]
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--help", "Failed to get help")
}
// LlamaServerVersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags Backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/version [get]
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--version", "Failed to get version")
}
// LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags Backends
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/backends/llama-cpp/devices [get]
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
}

View File

@@ -0,0 +1,358 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"llamactl/pkg/validation"
"net/http"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
)
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {array} instance.Instance "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
return
}
writeJSON(w, http.StatusOK, instances)
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags Instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.Options true "Instance configuration options"
// @Success 201 {object} instance.Instance "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
var options instance.Options
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return
}
inst, err := h.InstanceManager.CreateInstance(validatedName, &options)
if err != nil {
writeError(w, http.StatusInternalServerError, "create_failed", "Failed to create instance: "+err.Error())
return
}
writeJSON(w, http.StatusCreated, inst)
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body instance.Options true "Instance configuration options"
// @Success 200 {object} instance.Instance "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
var options instance.Options
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return
}
inst, err := h.InstanceManager.UpdateInstance(validatedName, &options)
if err != nil {
writeError(w, http.StatusInternalServerError, "update_failed", "Failed to update instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.StartInstance(validatedName)
if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
writeError(w, http.StatusConflict, "max_instances_reached", err.Error())
return
}
writeError(w, http.StatusInternalServerError, "start_failed", "Failed to start instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.StopInstance(validatedName)
if err != nil {
writeError(w, http.StatusInternalServerError, "stop_failed", "Failed to stop instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} instance.Instance "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
inst, err := h.InstanceManager.RestartInstance(validatedName)
if err != nil {
writeError(w, http.StatusInternalServerError, "restart_failed", "Failed to restart instance: "+err.Error())
return
}
writeJSON(w, http.StatusOK, inst)
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags Instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
if err := h.InstanceManager.DeleteInstance(validatedName); err != nil {
writeError(w, http.StatusInternalServerError, "delete_failed", "Failed to delete instance: "+err.Error())
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags Instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
lines := r.URL.Query().Get("lines")
numLines := -1 // Default to all lines
if lines != "" {
parsedLines, err := strconv.Atoi(lines)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_parameter", "Invalid lines parameter: "+err.Error())
return
}
numLines = parsedLines
}
// Use the instance manager which handles both local and remote instances
logs, err := h.InstanceManager.GetInstanceLogs(validatedName, numLines)
if err != nil {
writeError(w, http.StatusInternalServerError, "logs_failed", "Failed to get logs: "+err.Error())
return
}
writeText(w, http.StatusOK, logs)
}
}
// InstanceProxy godoc
// @Summary Proxy requests to a specific instance, does not autostart instance if stopped
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags Instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /api/v1/instances/{name}/proxy [get]
// @Router /api/v1/instances/{name}/proxy [post]
func (h *Handler) InstanceProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.getInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
if !inst.IsRunning() {
writeError(w, http.StatusServiceUnavailable, "instance_not_running", "Instance is not running")
return
}
if !inst.IsRemote() {
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", inst.Name)
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
}
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -0,0 +1,70 @@
package server
import (
"net/http"
"github.com/go-chi/chi/v5"
)
// NodeResponse represents a node configuration in API responses
type NodeResponse struct {
Address string `json:"address"`
}
// ListNodes godoc
// @Summary List all configured nodes
// @Description Returns a map of all nodes configured in the server (node name -> node config)
// @Tags Nodes
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/nodes [get]
func (h *Handler) ListNodes() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Convert to sanitized response format (map of name -> NodeResponse)
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
for name, node := range h.cfg.Nodes {
nodeResponses[name] = NodeResponse{
Address: node.Address,
}
}
writeJSON(w, http.StatusOK, nodeResponses)
}
}
// GetNode godoc
// @Summary Get details of a specific node
// @Description Returns the details of a specific node by name
// @Tags Nodes
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Node Name"
// @Success 200 {object} NodeResponse "Node details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 404 {string} string "Node not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/nodes/{name} [get]
func (h *Handler) GetNode() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
writeError(w, http.StatusBadRequest, "invalid_request", "Node name cannot be empty")
return
}
nodeConfig, exists := h.cfg.Nodes[name]
if !exists {
writeError(w, http.StatusNotFound, "not_found", "Node not found")
return
}
// Convert to sanitized response format
nodeResponse := NodeResponse{
Address: nodeConfig.Address,
}
writeJSON(w, http.StatusOK, nodeResponse)
}
}

View File

@@ -0,0 +1,141 @@
package server
import (
"bytes"
"encoding/json"
"io"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"net/http"
)
// OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format
type OpenAIListInstancesResponse struct {
Object string `json:"object"`
Data []OpenAIInstance `json:"data"`
}
// OpenAIInstance represents a single instance (model) in OpenAI-compatible format
type OpenAIInstance struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
}
// OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags OpenAI
// @Security ApiKeyAuth
// @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/models [get]
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
return
}
openaiInstances := make([]OpenAIInstance, len(instances))
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{
ID: inst.Name,
Object: "model",
Created: inst.Created,
OwnedBy: "llamactl",
}
}
openaiResponse := OpenAIListInstancesResponse{
Object: "list",
Data: openaiInstances,
}
writeJSON(w, http.StatusOK, openaiResponse)
}
}
// OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags OpenAI
// @Security ApiKeyAuth
// @Accept json
// @Produces json
// @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Read the entire body first
bodyBytes, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Failed to read request body")
return
}
r.Body.Close()
// Parse the body to extract instance name
var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
writeError(w, http.StatusBadRequest, "invalid_request", "Instance name is required")
return
}
// Validate instance name at the entry point
validatedName, err := validation.ValidateInstanceName(modelName)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return
}
// Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(validatedName)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance_start_failed", err.Error())
return
}
}
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -0,0 +1,42 @@
package server
import (
"fmt"
"net/http"
)
// VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags System
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionInfo := fmt.Sprintf("Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
writeText(w, http.StatusOK, versionInfo)
}
}
// ConfigHandler godoc
// @Summary Get server configuration
// @Description Returns the current server configuration (sanitized)
// @Tags System
// @Security ApiKeyAuth
// @Produces application/json
// @Success 200 {object} config.AppConfig "Sanitized configuration"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/config [get]
func (h *Handler) ConfigHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
sanitizedConfig, err := h.cfg.SanitizedCopy()
if err != nil {
writeError(w, http.StatusInternalServerError, "sanitized_copy_error", "Failed to get sanitized config")
return
}
writeJSON(w, http.StatusOK, sanitizedConfig)
}
}

View File

@@ -1,107 +1,76 @@
package server
import (
"crypto/rand"
"context"
"crypto/subtle"
"encoding/hex"
"fmt"
"llamactl/pkg/auth"
"llamactl/pkg/config"
"llamactl/pkg/database"
"log"
"net/http"
"os"
"strings"
"time"
)
type KeyType int
// contextKey is a custom type for context keys to avoid collisions
type contextKey string
const (
KeyTypeInference KeyType = iota
KeyTypeManagement
apiKeyContextKey contextKey = "apiKey"
)
type APIAuthMiddleware struct {
authStore database.AuthStore
requireInferenceAuth bool
inferenceKeys map[string]bool
requireManagementAuth bool
managementKeys map[string]bool
managementKeys map[string]bool // Config-based management keys
}
// NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
func NewAPIAuthMiddleware(authCfg config.AuthConfig) *APIAuthMiddleware {
func NewAPIAuthMiddleware(authCfg config.AuthConfig, authStore database.AuthStore) *APIAuthMiddleware {
// Load management keys from config into managementKeys map
managementKeys := make(map[string]bool)
for _, key := range authCfg.ManagementKeys {
managementKeys[key] = true
}
// Handle legacy auto-generation for management keys if none provided and auth is required
var generated bool = false
inferenceAPIKeys := make(map[string]bool)
managementAPIKeys := make(map[string]bool)
const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if authCfg.RequireManagementAuth && len(authCfg.ManagementKeys) == 0 {
key := generateAPIKey(KeyTypeManagement)
managementAPIKeys[key] = true
key, err := auth.GenerateKey("llamactl-mgmt")
if err != nil {
log.Printf("Warning: Failed to generate management key: %v", err)
// Fallback to PID-based key for safety
key = fmt.Sprintf("sk-management-fallback-%d", os.Getpid())
}
managementKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Management API Key:\n\n %s\n\n", key)
}
for _, key := range authCfg.ManagementKeys {
managementAPIKeys[key] = true
}
if authCfg.RequireInferenceAuth && len(authCfg.InferenceKeys) == 0 {
key := generateAPIKey(KeyTypeInference)
inferenceAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Inference API Key:\n\n %s\n\n", key)
}
for _, key := range authCfg.InferenceKeys {
inferenceAPIKeys[key] = true
}
if generated {
fmt.Printf("%s\n⚠ IMPORTANT\n%s\n", banner, banner)
fmt.Println("• These keys are auto-generated and will change on restart")
fmt.Println("• This key is auto-generated and will change on restart")
fmt.Println("• For production, add explicit keys to your configuration")
fmt.Println("• Copy these keys before they disappear from the terminal")
fmt.Println("• Copy this key before it disappears from the terminal")
fmt.Println(banner)
}
return &APIAuthMiddleware{
authStore: authStore,
requireInferenceAuth: authCfg.RequireInferenceAuth,
inferenceKeys: inferenceAPIKeys,
requireManagementAuth: authCfg.RequireManagementAuth,
managementKeys: managementAPIKeys,
managementKeys: managementKeys,
}
}
// generateAPIKey creates a cryptographically secure API key
func generateAPIKey(keyType KeyType) string {
// Generate 32 random bytes (256 bits)
randomBytes := make([]byte, 32)
var prefix string
switch keyType {
case KeyTypeInference:
prefix = "sk-inference"
case KeyTypeManagement:
prefix = "sk-management"
default:
prefix = "sk-unknown"
}
if _, err := rand.Read(randomBytes); err != nil {
log.Printf("Warning: Failed to generate secure random key, using fallback")
// Fallback to a less secure method if crypto/rand fails
return fmt.Sprintf("%s-fallback-%d", prefix, os.Getpid())
}
// Convert to hex and add prefix
return fmt.Sprintf("%s-%s", prefix, hex.EncodeToString(randomBytes))
}
// AuthMiddleware returns a middleware that checks API keys for the given key type
func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) http.Handler {
// InferenceAuthMiddleware returns middleware for inference endpoints
func (a *APIAuthMiddleware) InferenceAuthMiddleware() func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
@@ -109,24 +78,74 @@ func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) h
return
}
// Extract API key from request
apiKey := a.extractAPIKey(r)
if apiKey == "" {
a.unauthorized(w, "Missing API key")
return
}
var isValid bool
switch keyType {
case KeyTypeInference:
// Management keys also work for OpenAI endpoints (higher privilege)
isValid = a.isValidKey(apiKey, KeyTypeInference) || a.isValidKey(apiKey, KeyTypeManagement)
case KeyTypeManagement:
isValid = a.isValidKey(apiKey, KeyTypeManagement)
default:
isValid = false
// Try database authentication first
var foundKey *auth.APIKey
if a.requireInferenceAuth && a.authStore != nil {
activeKeys, err := a.authStore.GetActiveKeys(r.Context())
if err != nil {
log.Printf("Failed to get active inference keys: %v", err)
// Continue to management key fallback
} else {
for _, key := range activeKeys {
if auth.VerifyKey(apiKey, key.KeyHash) {
foundKey = key
// Async update last_used_at
go func(keyID int) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := a.authStore.TouchKey(ctx, keyID); err != nil {
log.Printf("Failed to update last used timestamp for key %d: %v", keyID, err)
}
}(key.ID)
break
}
}
}
}
if !isValid {
// If no database key found, try management key authentication (config-based)
if foundKey == nil {
if !a.isValidManagementKey(apiKey) {
a.unauthorized(w, "Invalid API key")
return
}
// Management key was used, continue without adding APIKey to context
} else {
// Add APIKey to context for permission checking
ctx := context.WithValue(r.Context(), apiKeyContextKey, foundKey)
r = r.WithContext(ctx)
}
next.ServeHTTP(w, r)
})
}
}
// ManagementAuthMiddleware returns middleware for management endpoints
func (a *APIAuthMiddleware) ManagementAuthMiddleware() func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
next.ServeHTTP(w, r)
return
}
// Extract API key from request
apiKey := a.extractAPIKey(r)
if apiKey == "" {
a.unauthorized(w, "Missing API key")
return
}
// Check if key exists in managementKeys map using constant-time comparison
if !a.isValidManagementKey(apiKey) {
a.unauthorized(w, "Invalid API key")
return
}
@@ -136,6 +155,33 @@ func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) h
}
}
// CheckInstancePermission checks if the authenticated key has permission for the instance
func (a *APIAuthMiddleware) CheckInstancePermission(ctx context.Context, instanceID int) error {
// Extract APIKey from context
apiKey, ok := ctx.Value(apiKeyContextKey).(*auth.APIKey)
if !ok {
// APIKey is nil, management key was used, allow all
return nil
}
// If permission_mode == "allow_all", allow all
if apiKey.PermissionMode == auth.PermissionModeAllowAll {
return nil
}
// Check per-instance permissions
canInfer, err := a.authStore.HasPermission(ctx, apiKey.ID, instanceID)
if err != nil {
return fmt.Errorf("failed to check permission: %w", err)
}
if !canInfer {
return fmt.Errorf("permission denied: key does not have access to this instance")
}
return nil
}
// extractAPIKey extracts the API key from the request
func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
// Check Authorization header: "Bearer sk-..."
@@ -158,20 +204,9 @@ func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
return ""
}
// isValidKey checks if the provided API key is valid for the given key type
func (a *APIAuthMiddleware) isValidKey(providedKey string, keyType KeyType) bool {
var validKeys map[string]bool
switch keyType {
case KeyTypeInference:
validKeys = a.inferenceKeys
case KeyTypeManagement:
validKeys = a.managementKeys
default:
return false
}
for validKey := range validKeys {
// isValidManagementKey checks if the provided API key is a valid management key
func (a *APIAuthMiddleware) isValidManagementKey(providedKey string) bool {
for validKey := range a.managementKeys {
if len(providedKey) == len(validKey) &&
subtle.ConstantTimeCompare([]byte(providedKey), []byte(validKey)) == 1 {
return true
@@ -187,3 +222,11 @@ func (a *APIAuthMiddleware) unauthorized(w http.ResponseWriter, message string)
response := fmt.Sprintf(`{"error": {"message": "%s", "type": "authentication_error"}}`, message)
w.Write([]byte(response))
}
// forbidden sends a forbidden response
func (a *APIAuthMiddleware) forbidden(w http.ResponseWriter, message string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusForbidden)
response := fmt.Sprintf(`{"error": {"message": "%s", "type": "permission_denied"}}`, message)
w.Write([]byte(response))
}

View File

@@ -9,107 +9,44 @@ import (
"testing"
)
func TestAuthMiddleware(t *testing.T) {
func TestInferenceAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType server.KeyType
inferenceKeys []string
managementKeys []string
requestKey string
method string
expectedStatus int
}{
// Valid key tests
{
name: "valid inference key for inference",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-valid123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for inference", // Management keys work for inference
keyType: server.KeyTypeInference,
name: "valid management key for inference",
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for management",
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
// Invalid key tests
{
name: "inference key for management should fail",
keyType: server.KeyTypeManagement,
inferenceKeys: []string{"sk-inference-user123"},
requestKey: "sk-inference-user123",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid inference key",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing inference key",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid management key",
keyType: server.KeyTypeManagement,
name: "invalid key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing management key",
keyType: server.KeyTypeManagement,
name: "missing key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
// OPTIONS requests should always pass
{
name: "OPTIONS request bypasses inference auth",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
{
name: "OPTIONS request bypasses management auth",
keyType: server.KeyTypeManagement,
name: "OPTIONS request bypasses auth",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
// Cross-key-type validation
{
name: "management key works for inference endpoint",
keyType: server.KeyTypeInference,
inferenceKeys: []string{},
managementKeys: []string{"sk-management-admin"},
requestKey: "sk-management-admin",
method: "POST",
@@ -120,10 +57,10 @@ func TestAuthMiddleware(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
InferenceKeys: tt.inferenceKeys,
ManagementKeys: tt.managementKeys,
RequireInferenceAuth: true,
ManagementKeys: tt.managementKeys,
}
middleware := server.NewAPIAuthMiddleware(cfg)
middleware := server.NewAPIAuthMiddleware(cfg, nil)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
@@ -131,24 +68,17 @@ func TestAuthMiddleware(t *testing.T) {
req.Header.Set("Authorization", "Bearer "+tt.requestKey)
}
// Create test handler using the appropriate middleware
var handler http.Handler
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
// Create test handler
handler := middleware.InferenceAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
// Execute request
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != tt.expectedStatus {
t.Errorf("AuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
t.Errorf("InferenceAuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
}
// Check that unauthorized responses have proper format
@@ -167,178 +97,171 @@ func TestAuthMiddleware(t *testing.T) {
}
}
func TestGenerateAPIKey(t *testing.T) {
func TestManagementAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType server.KeyType
}{
{"inference key generation", server.KeyTypeInference},
{"management key generation", server.KeyTypeManagement},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test auto-generation by creating config that will trigger it
var config config.AuthConfig
if tt.keyType == server.KeyTypeInference {
config.RequireInferenceAuth = true
config.InferenceKeys = []string{} // Empty to trigger generation
} else {
config.RequireManagementAuth = true
config.ManagementKeys = []string{} // Empty to trigger generation
}
// Create middleware - this should trigger key generation
middleware := server.NewAPIAuthMiddleware(config)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
var handler http.Handler
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
handler.ServeHTTP(recorder, req)
// Should be unauthorized without a key (proving that a key was generated and auth is working)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
}
// Test uniqueness by creating another middleware instance
middleware2 := server.NewAPIAuthMiddleware(config)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
if tt.keyType == server.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
} else {
handler2 := middleware2.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
}
// Both should require auth (proving keys were generated for both instances)
if recorder2.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
}
})
}
}
func TestAutoGeneration(t *testing.T) {
tests := []struct {
name string
requireInference bool
requireManagement bool
providedInference []string
providedManagement []string
shouldGenerateInf bool // Whether inference key should be generated
shouldGenerateMgmt bool // Whether management key should be generated
name string
managementKeys []string
requestKey string
method string
expectedStatus int
}{
{
name: "inference auth required, keys provided - no generation",
requireInference: true,
requireManagement: false,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
name: "valid management key",
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "inference auth required, no keys - should auto-generate",
requireInference: true,
requireManagement: false,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: false,
name: "invalid management key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "management auth required, keys provided - no generation",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
name: "missing management key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "management auth required, no keys - should auto-generate",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: true,
},
{
name: "both required, both provided - no generation",
requireInference: true,
requireManagement: true,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "both required, none provided - should auto-generate both",
requireInference: true,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: true,
name: "OPTIONS request bypasses management auth",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
RequireManagementAuth: true,
ManagementKeys: tt.managementKeys,
}
middleware := server.NewAPIAuthMiddleware(cfg, nil)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
if tt.requestKey != "" {
req.Header.Set("Authorization", "Bearer "+tt.requestKey)
}
// Create test handler
handler := middleware.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
// Execute request
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != tt.expectedStatus {
t.Errorf("ManagementAuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
}
// Check that unauthorized responses have proper format
if recorder.Code == http.StatusUnauthorized {
contentType := recorder.Header().Get("Content-Type")
if contentType != "application/json" {
t.Errorf("Unauthorized response Content-Type = %v, expected application/json", contentType)
}
body := recorder.Body.String()
if !strings.Contains(body, `"type": "authentication_error"`) {
t.Errorf("Unauthorized response missing proper error type: %v", body)
}
}
})
}
}
func TestManagementKeyAutoGeneration(t *testing.T) {
// Test auto-generation for management keys
config := config.AuthConfig{
RequireManagementAuth: true,
ManagementKeys: []string{}, // Empty to trigger generation
}
// Create middleware - this should trigger key generation
middleware := server.NewAPIAuthMiddleware(config, nil)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
handler := middleware.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should be unauthorized without a key (proving that a key was generated and auth is working)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
}
// Test uniqueness by creating another middleware instance
middleware2 := server.NewAPIAuthMiddleware(config, nil)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
handler2 := middleware2.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
// Both should require auth (proving keys were generated for both instances)
if recorder2.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
}
}
func TestAutoGenerationScenarios(t *testing.T) {
tests := []struct {
name string
requireManagement bool
providedManagement []string
shouldGenerate bool
}{
{
name: "management auth required, keys provided - no generation",
requireManagement: true,
providedManagement: []string{"sk-management-provided"},
shouldGenerate: false,
},
{
name: "management auth required, no keys - should auto-generate",
requireManagement: true,
providedManagement: []string{},
shouldGenerate: true,
},
{
name: "management auth not required - no generation",
requireManagement: false,
providedManagement: []string{},
shouldGenerate: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
RequireInferenceAuth: tt.requireInference,
RequireManagementAuth: tt.requireManagement,
InferenceKeys: tt.providedInference,
ManagementKeys: tt.providedManagement,
}
middleware := server.NewAPIAuthMiddleware(cfg)
// Test inference behavior if inference auth is required
if tt.requireInference {
req := httptest.NewRequest("GET", "/v1/models", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should always be unauthorized without a key (since middleware assumes auth is required)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for inference without key, got status %v", recorder.Code)
}
}
middleware := server.NewAPIAuthMiddleware(cfg, nil)
// Test management behavior if management auth is required
if tt.requireManagement {
req := httptest.NewRequest("GET", "/api/v1/instances", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler := middleware.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
@@ -352,3 +275,16 @@ func TestAutoGeneration(t *testing.T) {
})
}
}
func TestConfigBasedInferenceKeysDeprecationWarning(t *testing.T) {
// Test that config-based inference keys trigger a warning (captured in logs)
cfg := config.AuthConfig{
InferenceKeys: []string{"sk-inference-old"},
}
// Creating middleware should log a warning, but shouldn't fail
_ = server.NewAPIAuthMiddleware(cfg, nil)
// If we get here without panic, the test passes
// The warning is logged but not returned as an error
}

Some files were not shown because too many files have changed in this diff Show More