92 Commits

Author SHA1 Message Date
80d5d44a0b Add inference api key frontend integration 2025-12-04 23:26:32 +01:00
2d0acc60f2 Fix double dash in generated keys 2025-12-04 23:25:51 +01:00
a1b6f0c1b0 Remove JSON file archiving from migration process 2025-12-04 23:02:06 +01:00
991ce3c678 Remove unnecessary canviewlogs permission 2025-12-04 22:18:29 +01:00
d9c666a245 Update deprication warnings 2025-12-04 21:23:22 +01:00
85cf712b03 Update api docs 2025-12-03 21:25:03 +01:00
5ccf493e04 Add permission checks to proxies 2025-12-03 21:14:44 +01:00
9eee42c673 Initial api key store implementation 2025-12-03 20:31:38 +01:00
470f90076f Merge pull request #99 from lordmathis/feat/db-persistence
feat: Migrate instance persistance to sqlite3
2025-12-02 20:46:55 +01:00
3fd597638b Fix migration 2025-12-02 20:38:52 +01:00
645aa63186 Fix vscode launch params 2025-12-02 20:38:52 +01:00
7c05fd278c Update configuration configs and logs paths 2025-12-02 20:38:52 +01:00
00114caa00 Add db config and move data dir config 2025-12-02 20:38:52 +01:00
7272aa26ec Refactor database interface and migration functions 2025-12-02 20:38:52 +01:00
fec989fee2 Implement SQLite database persistence for instance management 2025-12-02 20:38:52 +01:00
0c11365d7e Merge pull request #97 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-e5d9180762
chore: bump lucide-react from 0.554.0 to 0.555.0 in /webui in the npm-production group
2025-12-02 06:55:22 +01:00
dependabot[bot]
bb88fb2bb2 chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.554.0 to 0.555.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.555.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.555.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-02 01:24:30 +00:00
6d049be13e Merge pull request #96 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-e747de9977
chore: bump typescript-eslint from 8.47.0 to 8.48.0 in /webui in the npm-development group
2025-11-25 11:13:54 +01:00
dependabot[bot]
bb0d4863d8 chore: bump typescript-eslint in /webui in the npm-development group
Bumps the npm-development group in /webui with 1 update: [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint).


Updates `typescript-eslint` from 8.47.0 to 8.48.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.48.0/packages/typescript-eslint)

---
updated-dependencies:
- dependency-name: typescript-eslint
  dependency-version: 8.48.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 22:26:17 +00:00
22a747c318 Merge pull request #95 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-143683ff68
chore: bump typescript-eslint from 8.46.4 to 8.47.0 in /webui in the npm-development group
2025-11-17 23:05:49 +01:00
ceef48a125 Merge branch 'main' into dependabot/npm_and_yarn/webui/npm-development-143683ff68 2025-11-17 23:03:42 +01:00
db1347a709 Merge pull request #94 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-1d33cd6545
chore: bump lucide-react from 0.553.0 to 0.554.0 in /webui in the npm-production group
2025-11-17 23:02:23 +01:00
dependabot[bot]
e4027722d7 chore: bump typescript-eslint in /webui in the npm-development group
Bumps the npm-development group in /webui with 1 update: [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint).


Updates `typescript-eslint` from 8.46.4 to 8.47.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.47.0/packages/typescript-eslint)

---
updated-dependencies:
- dependency-name: typescript-eslint
  dependency-version: 8.47.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-17 21:43:26 +00:00
dependabot[bot]
8218c042c8 chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.553.0 to 0.554.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.554.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.554.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-17 21:42:58 +00:00
efed0f543b Merge pull request #93 from lordmathis/dependabot/npm_and_yarn/webui/js-yaml-4.1.1
chore: bump js-yaml from 4.1.0 to 4.1.1 in /webui
2025-11-16 18:42:04 +01:00
dependabot[bot]
aa0508eb9b chore: bump js-yaml from 4.1.0 to 4.1.1 in /webui
Bumps [js-yaml](https://github.com/nodeca/js-yaml) from 4.1.0 to 4.1.1.
- [Changelog](https://github.com/nodeca/js-yaml/blob/master/CHANGELOG.md)
- [Commits](https://github.com/nodeca/js-yaml/compare/4.1.0...4.1.1)

---
updated-dependencies:
- dependency-name: js-yaml
  dependency-version: 4.1.1
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-16 13:53:06 +00:00
514b1b0e76 Merge pull request #92 from lordmathis/feat/command-override
feat: Add per instance command override
2025-11-15 01:04:01 +01:00
6565be3676 Refactor ConfigContext hooks 2025-11-15 01:02:15 +01:00
ad772a05ce Refactor import statement in App.test.tsx 2025-11-15 00:45:45 +01:00
b594ade8f9 Add mocks for ConfigContext in tests to use default configuration values 2025-11-15 00:45:02 +01:00
2ceeddbce5 Improve instance creation documentation with clearer settings and options 2025-11-15 00:18:55 +01:00
6ed99fccf9 Update swagger api docs 2025-11-14 23:43:14 +01:00
4f8f4b96cd Fix docker_enabled inconsistency 2025-11-14 23:41:16 +01:00
c04c952293 Pass default config values to instance dialog 2025-11-14 23:07:30 +01:00
7544fbb1ce Refactor JSON marshaling in Options to improve thread safety 2025-11-14 21:50:58 +01:00
4f4feacaa8 Remove manual config fields assignment 2025-11-14 20:38:36 +01:00
2c28971e15 Add JSON tags to configuration structs 2025-11-14 20:37:33 +01:00
e77ed0cdef Deep copy config for sanitization 2025-11-14 20:25:40 +01:00
09605d07ab Implement ConfigContext for instance defaults 2025-11-14 19:24:18 +01:00
623e258a2a Add API endpoint to retrieve sanitized server configuration 2025-11-14 18:57:03 +01:00
91d8a9008f Add command override to webui 2025-11-14 18:44:39 +01:00
511889e56d Implement per instance command override on backend 2025-11-14 18:38:31 +01:00
19eb552dc7 Merge pull request #89 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-bfaa8512fe
chore: bump the npm-development group across 1 directory with 14 updates
2025-11-13 21:40:29 +01:00
4bddea2831 Merge branch 'main' into dependabot/npm_and_yarn/webui/npm-development-bfaa8512fe 2025-11-13 21:38:39 +01:00
b878dfe8da Merge pull request #91 from lordmathis/fix/node-types
fix: Add missing @types/node to types in tsconfig.json
2025-11-13 21:38:25 +01:00
d600212cd0 Add @types/node to types in tsconfig.json 2025-11-13 21:36:21 +01:00
dependabot[bot]
5837521821 chore: bump the npm-development group across 1 directory with 14 updates
Bumps the npm-development group with 14 updates in the /webui directory:

| Package | From | To |
| --- | --- | --- |
| [@eslint/js](https://github.com/eslint/eslint/tree/HEAD/packages/js) | `9.32.0` | `9.39.1` |
| [@types/eslint__js](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/eslint__js) | `8.42.3` | `9.14.0` |
| [@testing-library/jest-dom](https://github.com/testing-library/jest-dom) | `6.6.3` | `6.9.1` |
| [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) | `24.0.15` | `24.10.1` |
| [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/tree/HEAD/packages/plugin-react) | `4.7.0` | `5.1.1` |
| [@vitest/ui](https://github.com/vitest-dev/vitest/tree/HEAD/packages/ui) | `3.2.4` | `4.0.8` |
| [eslint](https://github.com/eslint/eslint) | `9.32.0` | `9.39.1` |
| [eslint-plugin-react-hooks](https://github.com/facebook/react/tree/HEAD/packages/eslint-plugin-react-hooks) | `5.2.0` | `7.0.1` |
| [jsdom](https://github.com/jsdom/jsdom) | `26.1.0` | `27.2.0` |
| [tw-animate-css](https://github.com/Wombosvideo/tw-animate-css) | `1.3.5` | `1.4.0` |
| [typescript](https://github.com/microsoft/TypeScript) | `5.8.3` | `5.9.3` |
| [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) | `8.38.0` | `8.46.4` |
| [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) | `7.1.11` | `7.2.2` |
| [vitest](https://github.com/vitest-dev/vitest/tree/HEAD/packages/vitest) | `3.2.4` | `4.0.8` |



Updates `@eslint/js` from 9.32.0 to 9.39.1
- [Release notes](https://github.com/eslint/eslint/releases)
- [Commits](https://github.com/eslint/eslint/commits/v9.39.1/packages/js)

Updates `@types/eslint__js` from 8.42.3 to 9.14.0
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/eslint__js)

Updates `@testing-library/jest-dom` from 6.6.3 to 6.9.1
- [Release notes](https://github.com/testing-library/jest-dom/releases)
- [Changelog](https://github.com/testing-library/jest-dom/blob/main/CHANGELOG.md)
- [Commits](https://github.com/testing-library/jest-dom/compare/v6.6.3...v6.9.1)

Updates `@types/eslint__js` from 8.42.3 to 9.14.0
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/eslint__js)

Updates `@types/node` from 24.0.15 to 24.10.1
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node)

Updates `@vitejs/plugin-react` from 4.7.0 to 5.1.1
- [Release notes](https://github.com/vitejs/vite-plugin-react/releases)
- [Changelog](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite-plugin-react/commits/plugin-react@5.1.1/packages/plugin-react)

Updates `@vitest/ui` from 3.2.4 to 4.0.8
- [Release notes](https://github.com/vitest-dev/vitest/releases)
- [Commits](https://github.com/vitest-dev/vitest/commits/v4.0.8/packages/ui)

Updates `eslint` from 9.32.0 to 9.39.1
- [Release notes](https://github.com/eslint/eslint/releases)
- [Commits](https://github.com/eslint/eslint/compare/v9.32.0...v9.39.1)

Updates `eslint-plugin-react-hooks` from 5.2.0 to 7.0.1
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/packages/eslint-plugin-react-hooks/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/HEAD/packages/eslint-plugin-react-hooks)

Updates `jsdom` from 26.1.0 to 27.2.0
- [Release notes](https://github.com/jsdom/jsdom/releases)
- [Changelog](https://github.com/jsdom/jsdom/blob/main/Changelog.md)
- [Commits](https://github.com/jsdom/jsdom/compare/26.1.0...27.2.0)

Updates `tw-animate-css` from 1.3.5 to 1.4.0
- [Release notes](https://github.com/Wombosvideo/tw-animate-css/releases)
- [Commits](https://github.com/Wombosvideo/tw-animate-css/compare/v1.3.5...v1.4.0)

Updates `typescript` from 5.8.3 to 5.9.3
- [Release notes](https://github.com/microsoft/TypeScript/releases)
- [Changelog](https://github.com/microsoft/TypeScript/blob/main/azure-pipelines.release-publish.yml)
- [Commits](https://github.com/microsoft/TypeScript/compare/v5.8.3...v5.9.3)

Updates `typescript-eslint` from 8.38.0 to 8.46.4
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.46.4/packages/typescript-eslint)

Updates `vite` from 7.1.11 to 7.2.2
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.2.2/packages/vite)

Updates `vitest` from 3.2.4 to 4.0.8
- [Release notes](https://github.com/vitest-dev/vitest/releases)
- [Commits](https://github.com/vitest-dev/vitest/commits/v4.0.8/packages/vitest)

---
updated-dependencies:
- dependency-name: "@eslint/js"
  dependency-version: 9.39.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: "@types/eslint__js"
  dependency-version: 9.14.0
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: "@testing-library/jest-dom"
  dependency-version: 6.9.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: "@types/eslint__js"
  dependency-version: 9.14.0
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: "@types/node"
  dependency-version: 24.10.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: "@vitejs/plugin-react"
  dependency-version: 5.1.1
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: "@vitest/ui"
  dependency-version: 4.0.8
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: eslint
  dependency-version: 9.39.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: eslint-plugin-react-hooks
  dependency-version: 7.0.1
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: jsdom
  dependency-version: 27.2.0
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: tw-animate-css
  dependency-version: 1.4.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: typescript
  dependency-version: 5.9.3
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: typescript-eslint
  dependency-version: 8.46.4
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: vite
  dependency-version: 7.2.2
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: vitest
  dependency-version: 4.0.8
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-13 20:19:01 +00:00
7e71ada904 Merge pull request #88 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-ebddbb6ace
chore: bump the npm-production group in /webui with 5 updates
2025-11-13 21:16:36 +01:00
5335634879 Merge branch 'main' into dependabot/npm_and_yarn/webui/npm-production-ebddbb6ace 2025-11-13 21:11:33 +01:00
15d1e17454 Merge pull request #90 from lordmathis/feat/custom-args
feat: Add support for custom args
2025-11-13 21:10:45 +01:00
72b70918fa Add useEffect to sync internal state with value prop in KeyValueInput 2025-11-13 21:04:20 +01:00
11bfe75a3c Add support for extra args for command parser 2025-11-13 20:41:08 +01:00
ae5358ff65 Change FlashAttn field type to string in LlamaServerOptions 2025-11-12 23:49:34 +01:00
bff8e7d914 Refactor command line argument keys to use snake_case format in LlamaServerOptions 2025-11-12 23:46:15 +01:00
5ad076919e Add extra args test to backend 2025-11-12 23:40:07 +01:00
c022746cd8 Fix import path for EnvVarsInput component 2025-11-12 23:13:09 +01:00
8d92f9b371 Add ExtraArgs support for Llama, Mlx, and Vllm server options 2025-11-12 23:11:22 +01:00
15180a227b Add support for extra arguments in frontend 2025-11-12 22:50:15 +01:00
dependabot[bot]
0708327a16 chore: bump the npm-production group in /webui with 5 updates
Bumps the npm-production group in /webui with 5 updates:

| Package | From | To |
| --- | --- | --- |
| [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react) | `0.525.0` | `0.553.0` |
| [react](https://github.com/facebook/react/tree/HEAD/packages/react) | `19.1.0` | `19.2.0` |
| [react-dom](https://github.com/facebook/react/tree/HEAD/packages/react-dom) | `19.1.0` | `19.2.0` |
| [tailwind-merge](https://github.com/dcastil/tailwind-merge) | `3.3.1` | `3.4.0` |
| [zod](https://github.com/colinhacks/zod) | `4.0.5` | `4.1.12` |


Updates `lucide-react` from 0.525.0 to 0.553.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.553.0/packages/lucide-react)

Updates `react` from 19.1.0 to 19.2.0
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/v19.2.0/packages/react)

Updates `react-dom` from 19.1.0 to 19.2.0
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/v19.2.0/packages/react-dom)

Updates `tailwind-merge` from 3.3.1 to 3.4.0
- [Release notes](https://github.com/dcastil/tailwind-merge/releases)
- [Commits](https://github.com/dcastil/tailwind-merge/compare/v3.3.1...v3.4.0)

Updates `zod` from 4.0.5 to 4.1.12
- [Release notes](https://github.com/colinhacks/zod/releases)
- [Commits](https://github.com/colinhacks/zod/compare/v4.0.5...v4.1.12)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.553.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: react
  dependency-version: 19.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: react-dom
  dependency-version: 19.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: tailwind-merge
  dependency-version: 3.4.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: zod
  dependency-version: 4.1.12
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-12 20:58:32 +00:00
a2740055c2 Merge pull request #87 from lordmathis/chore/update-dependabot
chore: Simplify dependabot configuration
2025-11-12 21:57:07 +01:00
0ddffaa2e6 Simplify dependabot configuration 2025-11-12 21:53:01 +01:00
9a160a5312 Merge pull request #80 from lordmathis/chore/dependabot
chore: Add dependabot configuration for Go and npm dependencies
2025-11-12 21:34:51 +01:00
8861057f11 Add dependabot configuration for Go and npm dependencies 2025-11-12 21:31:47 +01:00
34edb8a2e5 Merge pull request #78 from lordmathis/feat/inflight-requests
feat: Wait for inflight requests to finish before shutting down an instance
2025-10-30 18:08:55 +01:00
560850f86d Add shutdown state checks in HTTP handlers 2025-10-30 18:00:59 +01:00
c340439306 Add support for 'shutting_down' state in HealthBadge and health service 2025-10-29 00:09:18 +01:00
77c0e22fd0 Use instance's ServeHTTP in handlers 2025-10-29 00:01:29 +01:00
d65c5ab717 Wait for inflight requests before stopping 2025-10-29 00:00:56 +01:00
2b94244c8a Replace GetProxy with ServeHttp in instance 2025-10-29 00:00:02 +01:00
2e5644db53 Implement inflight request tracking 2025-10-28 23:59:02 +01:00
7ee22fee51 Implement shutting down status 2025-10-28 23:53:11 +01:00
e5baedb776 Merge pull request #76 from lordmathis/feat/import-export
feat: Ad support for instance import and export on frontend
2025-10-27 20:46:48 +01:00
e6205b930e Document import and export features 2025-10-27 20:44:28 +01:00
f9eb424690 Fix concurrent map write issue in MarshalJSON by initializing BackendOptions 2025-10-27 20:36:42 +01:00
5b84b64623 Fix some typescript issues 2025-10-27 20:36:31 +01:00
7813a5f2be Move import instance configuration to InstanceDialog component 2025-10-27 20:17:18 +01:00
a00c9b82a6 Add import functionality for instance configuration from JSON file 2025-10-27 20:11:22 +01:00
cbfa6bd48f Fix export functionality to exclude computed field from JSON output 2025-10-27 19:59:43 +01:00
bee0f72c10 Add export functionality to InstanceCard component 2025-10-27 19:55:07 +01:00
a5d8f541f0 Merge pull request #75 from lordmathis/fix/delete-instance
fix: Prevent restarting instance from getting deleted
2025-10-27 19:27:58 +01:00
dfcc16083c Update test configuration to use 'sh -c "sleep 999999"' command 2025-10-27 19:25:13 +01:00
6ec2919049 Fix instance start simulation in TestUpdateInstance 2025-10-27 19:14:54 +01:00
d6a6f377fc Fix logger race condition 2025-10-27 19:06:06 +01:00
cd9a71d9fc Update test configuration to use 'yes' command instead of 'sleep' 2025-10-27 18:54:20 +01:00
2c4cc5a69a Fix manager tests 2025-10-27 18:47:17 +01:00
b1fc1d2dc8 Add InstancesDir to test configuration for instance management 2025-10-27 18:38:23 +01:00
08c47a16a0 Fix operations tests 2025-10-27 18:35:16 +01:00
219db7abce Move port range validation to config 2025-10-27 18:23:49 +01:00
14131a6274 Remove redundant code 2025-10-27 18:18:25 +01:00
e65f4f1641 Remove unsupported error wrapping from log.Printf 2025-10-27 18:01:58 +01:00
5ef0654cdd Use %w for error wrapping in log messages across multiple files 2025-10-27 17:54:39 +01:00
1814772fa2 Fix instance deletion check to account for restarting status 2025-10-27 17:42:27 +01:00
100 changed files with 7389 additions and 2193 deletions

40
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
version: 2
updates:
# Go modules
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"
day: "monday"
open-pull-requests-limit: 5
commit-message:
prefix: "chore"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]
groups:
go-dependencies:
patterns:
- "*"
labels:
- "dependencies"
# npm dependencies for webui
- package-ecosystem: "npm"
directory: "/webui"
schedule:
interval: "weekly"
day: "monday"
open-pull-requests-limit: 5
commit-message:
prefix: "chore"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]
groups:
npm-production:
dependency-type: "production"
npm-development:
dependency-type: "development"
labels:
- "dependencies"

5
.gitignore vendored
View File

@@ -42,4 +42,7 @@ site/
llamactl.dev.yaml
# Debug files
__debug*
__debug*
# Binary
llamactl-*

2
.vscode/launch.json vendored
View File

@@ -9,7 +9,7 @@
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"program": "${workspaceFolder}/cmd/server",
"env": {
"GO_ENV": "development",
"LLAMACTL_CONFIG_PATH": "${workspaceFolder}/llamactl.dev.yaml"

View File

@@ -179,21 +179,28 @@ backends:
args: []
environment: {} # Environment variables for the backend process
data_dir: ~/.local/share/llamactl # Main data directory (database, instances, logs), default varies by OS
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
port_range: [8000, 9000] # Port range for instances
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory (platform dependent)
logs_dir: ~/.local/share/llamactl/logs # Logs directory (platform dependent)
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
database:
path: ~/.local/share/llamactl/llamactl.db # Database file path (platform dependent)
max_open_connections: 25 # Maximum open database connections
max_idle_connections: 5 # Maximum idle database connections
connection_max_lifetime: 5m # Connection max lifetime
auth:
require_inference_auth: true # Require auth for inference endpoints

View File

@@ -3,8 +3,10 @@ package main
import (
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"log"
"net/http"
"os"
"os/signal"
@@ -38,8 +40,7 @@ func main() {
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil {
fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.")
log.Printf("Error loading config: %v\nUsing default configuration.", err)
}
// Set version information
@@ -47,24 +48,50 @@ func main() {
cfg.CommitHash = commitHash
cfg.BuildTime = buildTime
// Create the data directory if it doesn't exist
// Create data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
// Create the main data directory
if err := os.MkdirAll(cfg.DataDir, 0755); err != nil {
log.Printf("Error creating data directory %s: %v\nData persistence may not be available.", cfg.DataDir, err)
}
// Create instances directory
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
log.Printf("Error creating instances directory %s: %v\nPersistence will not be available.", cfg.Instances.InstancesDir, err)
}
// Create logs directory
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err)
}
}
// Initialize the instance manager
instanceManager := manager.New(&cfg)
// Initialize database
db, err := database.Open(&database.Config{
Path: cfg.Database.Path,
MaxOpenConnections: cfg.Database.MaxOpenConnections,
MaxIdleConnections: cfg.Database.MaxIdleConnections,
ConnMaxLifetime: cfg.Database.ConnMaxLifetime,
})
if err != nil {
log.Fatalf("Failed to open database: %v", err)
}
// Run database migrations
if err := database.RunMigrations(db); err != nil {
log.Fatalf("Failed to run database migrations: %v", err)
}
// Migrate from JSON files if needed (one-time migration)
if err := migrateFromJSON(&cfg, db); err != nil {
log.Printf("Warning: Failed to migrate from JSON: %v", err)
}
// Initialize the instance manager with dependency injection
instanceManager := manager.New(&cfg, db)
// Create a new handler with the instance manager
handler := server.NewHandler(instanceManager, cfg)
handler := server.NewHandler(instanceManager, cfg, db)
// Setup the router with the handler
r := server.SetupRouter(handler)
@@ -81,7 +108,7 @@ func main() {
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
log.Printf("Error starting server: %v\n", err)
}
}()
@@ -90,7 +117,7 @@ func main() {
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
log.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}

View File

@@ -0,0 +1,82 @@
package main
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
)
// migrateFromJSON migrates instances from JSON files to SQLite database
// This is a one-time migration that runs on first startup with existing JSON files.
func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
instancesDir := cfg.Instances.InstancesDir
if instancesDir == "" {
return nil // No instances directory configured
}
// Check if instances directory exists
if _, err := os.Stat(instancesDir); os.IsNotExist(err) {
return nil // No instances directory, nothing to migrate
}
// Check if database is empty (no instances)
existing, err := db.LoadAll()
if err != nil {
return fmt.Errorf("failed to check existing instances: %w", err)
}
if len(existing) > 0 {
return nil // Database already has instances, skip migration
}
// Find all JSON files
files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
if err != nil {
return fmt.Errorf("failed to list instance files: %w", err)
}
if len(files) == 0 {
return nil // No JSON files to migrate
}
log.Printf("Migrating %d instances from JSON to SQLite...", len(files))
// Migrate each JSON file
var migrated int
for _, file := range files {
if err := migrateJSONFile(file, db); err != nil {
log.Printf("Failed to migrate %s: %v", file, err)
continue
}
migrated++
}
log.Printf("Successfully migrated %d/%d instances to SQLite", migrated, len(files))
return nil
}
// migrateJSONFile migrates a single JSON file to the database
func migrateJSONFile(filename string, db database.InstanceStore) error {
data, err := os.ReadFile(filename)
if err != nil {
return fmt.Errorf("failed to read file: %w", err)
}
var inst instance.Instance
if err := json.Unmarshal(data, &inst); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
if err := db.Save(&inst); err != nil {
return fmt.Errorf("failed to save instance to database: %w", err)
}
log.Printf("Migrated instance %s from JSON to SQLite", inst.Name)
return nil
}

View File

@@ -49,21 +49,28 @@ backends:
environment: {} # Environment variables for the backend process
response_headers: {} # Additional response headers to send with responses
data_dir: ~/.local/share/llamactl # Main data directory (database, instances, logs), default varies by OS
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
port_range: [8000, 9000] # Port range for instances
configs_dir: data_dir/instances # Instance configs directory
logs_dir: data_dir/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
database:
path: data_dir/llamactl.db # Database file path
max_open_connections: 25 # Maximum open database connections
max_idle_connections: 5 # Maximum idle database connections
connection_max_lifetime: 5m # Connection max lifetime
auth:
require_inference_auth: true # Require auth for inference endpoints
@@ -193,32 +200,44 @@ backends:
- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
### Data Directory Configuration
```yaml
data_dir: "~/.local/share/llamactl" # Main data directory for database, instances, and logs (default varies by OS)
```
**Environment Variables:**
- `LLAMACTL_DATA_DIRECTORY` - Main data directory path
**Default Data Directory by Platform:**
- **Linux**: `~/.local/share/llamactl`
- **macOS**: `~/Library/Application Support/llamactl`
- **Windows**: `%LOCALAPPDATA%\llamactl` or `%PROGRAMDATA%\llamactl`
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
configs_dir: "instances" # Directory for instance configs, default: data_dir/instances
logs_dir: "logs" # Directory for instance logs, default: data_dir/logs
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
@@ -226,8 +245,24 @@ instances:
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Database Configuration
```yaml
database:
path: "llamactl.db" # Database file path, default: data_dir/llamactl.db
max_open_connections: 25 # Maximum open database connections (default: 25)
max_idle_connections: 5 # Maximum idle database connections (default: 5)
connection_max_lifetime: 5m # Connection max lifetime (default: 5m)
```
**Environment Variables:**
- `LLAMACTL_DATABASE_PATH` - Database file path (relative to data_dir or absolute)
- `LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS` - Maximum open database connections
- `LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS` - Maximum idle database connections
- `LLAMACTL_DATABASE_CONN_MAX_LIFETIME` - Connection max lifetime (e.g., "5m", "1h")
### Authentication Configuration

View File

@@ -19,6 +19,235 @@ const docTemplate = `{
"host": "{{.Host}}",
"basePath": "{{.BasePath}}",
"paths": {
"/api/v1/auth/keys": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all API keys for the system user (excludes key hash and plain-text key)",
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "List all API keys",
"responses": {
"200": {
"description": "List of API keys",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/server.KeyResponse"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"post": {
"description": "Creates a new API key with the specified permissions and returns the plain-text key (only shown once)",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "Create a new API key",
"parameters": [
{
"description": "API key configuration",
"name": "key",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.CreateKeyRequest"
}
}
],
"responses": {
"201": {
"description": "Created API key with plain-text key",
"schema": {
"$ref": "#/definitions/server.CreateKeyResponse"
}
},
"400": {
"description": "Invalid request body or validation error",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/auth/keys/{id}": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns details for a specific API key by ID (excludes key hash and plain-text key)",
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "Get details of a specific API key",
"parameters": [
{
"type": "integer",
"description": "Key ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "API key details",
"schema": {
"$ref": "#/definitions/server.KeyResponse"
}
},
"400": {
"description": "Invalid key ID",
"schema": {
"type": "string"
}
},
"404": {
"description": "API key not found",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Deletes an API key by ID",
"tags": [
"Keys"
],
"summary": "Delete an API key",
"parameters": [
{
"type": "integer",
"description": "Key ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"204": {
"description": "API key deleted successfully"
},
"400": {
"description": "Invalid key ID",
"schema": {
"type": "string"
}
},
"404": {
"description": "API key not found",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/auth/keys/{id}/permissions": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the instance-level permissions for a specific API key (includes instance names)",
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "Get API key permissions",
"parameters": [
{
"type": "integer",
"description": "Key ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "List of key permissions",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/server.KeyPermissionResponse"
}
}
},
"400": {
"description": "Invalid key ID",
"schema": {
"type": "string"
}
},
"404": {
"description": "API key not found",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/backends/llama-cpp/devices": {
"get": {
"security": [
@@ -256,6 +485,34 @@ const docTemplate = `{
}
}
},
"/api/v1/config": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the current server configuration (sanitized)",
"tags": [
"System"
],
"summary": "Get server configuration",
"responses": {
"200": {
"description": "Sanitized configuration",
"schema": {
"$ref": "#/definitions/config.AppConfig"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/instances": {
"get": {
"security": [
@@ -1475,11 +1732,289 @@ const docTemplate = `{
}
},
"definitions": {
"auth.PermissionMode": {
"type": "string",
"enum": [
"allow_all",
"per_instance"
],
"x-enum-varnames": [
"PermissionModeAllowAll",
"PermissionModePerInstance"
]
},
"config.AppConfig": {
"type": "object",
"properties": {
"auth": {
"$ref": "#/definitions/config.AuthConfig"
},
"backends": {
"$ref": "#/definitions/config.BackendConfig"
},
"build_time": {
"type": "string"
},
"commit_hash": {
"type": "string"
},
"data_dir": {
"description": "Directory where all llamactl data will be stored (database, instances, logs, etc.)",
"type": "string"
},
"database": {
"$ref": "#/definitions/config.DatabaseConfig"
},
"instances": {
"$ref": "#/definitions/config.InstancesConfig"
},
"local_node": {
"type": "string"
},
"nodes": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/config.NodeConfig"
}
},
"server": {
"$ref": "#/definitions/config.ServerConfig"
},
"version": {
"type": "string"
}
}
},
"config.AuthConfig": {
"type": "object",
"properties": {
"inference_keys": {
"description": "List of keys for OpenAI compatible inference endpoints",
"type": "array",
"items": {
"type": "string"
}
},
"management_keys": {
"description": "List of keys for management endpoints",
"type": "array",
"items": {
"type": "string"
}
},
"require_inference_auth": {
"description": "Require authentication for OpenAI compatible inference endpoints",
"type": "boolean"
},
"require_management_auth": {
"description": "Require authentication for management endpoints",
"type": "boolean"
}
}
},
"config.BackendConfig": {
"type": "object",
"properties": {
"llama-cpp": {
"$ref": "#/definitions/config.BackendSettings"
},
"mlx": {
"$ref": "#/definitions/config.BackendSettings"
},
"vllm": {
"$ref": "#/definitions/config.BackendSettings"
}
}
},
"config.BackendSettings": {
"type": "object",
"properties": {
"args": {
"type": "array",
"items": {
"type": "string"
}
},
"command": {
"type": "string"
},
"docker": {
"$ref": "#/definitions/config.DockerSettings"
},
"environment": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"response_headers": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
},
"config.DatabaseConfig": {
"type": "object",
"properties": {
"connection_max_lifetime": {
"type": "string",
"example": "1h"
},
"max_idle_connections": {
"type": "integer"
},
"max_open_connections": {
"description": "Connection settings",
"type": "integer"
},
"path": {
"description": "Database file path (relative to the top-level data_dir or absolute)",
"type": "string"
}
}
},
"config.DockerSettings": {
"type": "object",
"properties": {
"args": {
"type": "array",
"items": {
"type": "string"
}
},
"enabled": {
"type": "boolean"
},
"environment": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"image": {
"type": "string"
}
}
},
"config.InstancesConfig": {
"type": "object",
"properties": {
"auto_create_dirs": {
"description": "Automatically create the data directory if it doesn't exist",
"type": "boolean"
},
"configs_dir": {
"description": "Instance config directory override (relative to data_dir if not absolute)",
"type": "string"
},
"default_auto_restart": {
"description": "Default auto-restart setting for new instances",
"type": "boolean"
},
"default_max_restarts": {
"description": "Default max restarts for new instances",
"type": "integer"
},
"default_on_demand_start": {
"description": "Default on-demand start setting for new instances",
"type": "boolean"
},
"default_restart_delay": {
"description": "Default restart delay for new instances (in seconds)",
"type": "integer"
},
"enable_lru_eviction": {
"description": "Enable LRU eviction for instance logs",
"type": "boolean"
},
"logs_dir": {
"description": "Logs directory override (relative to data_dir if not absolute)",
"type": "string"
},
"max_instances": {
"description": "Maximum number of instances that can be created",
"type": "integer"
},
"max_running_instances": {
"description": "Maximum number of instances that can be running at the same time",
"type": "integer"
},
"on_demand_start_timeout": {
"description": "How long to wait for an instance to start on demand (in seconds)",
"type": "integer"
},
"port_range": {
"description": "Port range for instances (e.g., 8000,9000)",
"type": "array",
"items": {
"type": "integer"
}
},
"timeout_check_interval": {
"description": "Interval for checking instance timeouts (in minutes)",
"type": "integer"
}
}
},
"config.NodeConfig": {
"type": "object",
"properties": {
"address": {
"type": "string"
},
"api_key": {
"type": "string"
}
}
},
"config.ServerConfig": {
"type": "object",
"properties": {
"allowed_headers": {
"description": "Allowed headers for CORS (e.g., \"Accept\", \"Authorization\", \"Content-Type\", \"X-CSRF-Token\")",
"type": "array",
"items": {
"type": "string"
}
},
"allowed_origins": {
"description": "Allowed origins for CORS (e.g., \"http://localhost:3000\")",
"type": "array",
"items": {
"type": "string"
}
},
"enable_swagger": {
"description": "Enable Swagger UI for API documentation",
"type": "boolean"
},
"host": {
"description": "Server host to bind to",
"type": "string"
},
"port": {
"description": "Server port to bind to",
"type": "integer"
},
"response_headers": {
"description": "Response headers to send with responses",
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
},
"instance.Instance": {
"type": "object",
"properties": {
"created": {
"description": "Unix timestamp when the instance was created",
"description": "Unix timestamp when instance was created",
"type": "integer"
},
"id": {
"type": "integer"
},
"name": {
@@ -1494,6 +2029,13 @@ const docTemplate = `{
"description": "Auto restart",
"type": "boolean"
},
"command_override": {
"type": "string"
},
"docker_enabled": {
"description": "Execution context overrides",
"type": "boolean"
},
"environment": {
"description": "Environment variables",
"type": "object",
@@ -1518,6 +2060,125 @@ const docTemplate = `{
}
}
},
"server.CreateKeyRequest": {
"type": "object",
"properties": {
"expiresAt": {
"type": "integer",
"format": "int64"
},
"instancePermissions": {
"type": "array",
"items": {
"$ref": "#/definitions/server.InstancePermission"
}
},
"name": {
"type": "string"
},
"permissionMode": {
"$ref": "#/definitions/auth.PermissionMode"
}
}
},
"server.CreateKeyResponse": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"enabled": {
"type": "boolean"
},
"expires_at": {
"type": "integer"
},
"id": {
"type": "integer"
},
"key": {
"type": "string"
},
"last_used_at": {
"type": "integer"
},
"name": {
"type": "string"
},
"permission_mode": {
"$ref": "#/definitions/auth.PermissionMode"
},
"updated_at": {
"type": "integer"
},
"user_id": {
"type": "string"
}
}
},
"server.InstancePermission": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
}
}
},
"server.KeyPermissionResponse": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
},
"instance_name": {
"type": "string"
}
}
},
"server.KeyResponse": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"enabled": {
"type": "boolean"
},
"expires_at": {
"type": "integer"
},
"id": {
"type": "integer"
},
"last_used_at": {
"type": "integer"
},
"name": {
"type": "string"
},
"permission_mode": {
"$ref": "#/definitions/auth.PermissionMode"
},
"updated_at": {
"type": "integer"
},
"user_id": {
"type": "string"
}
}
},
"server.NodeResponse": {
"type": "object",
"properties": {

View File

@@ -42,32 +42,41 @@ Each instance is displayed as a card showing:
![Create Instance Screenshot](images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
4. **Choose Backend Type**:
- **llama.cpp**: For GGUF models using llama-server
- **MLX**: For MLX-optimized models (macOS only)
2. *Optional*: Click **"Import"** to load a previously exported configuration
**Instance Settings:**
3. Enter a unique **Instance Name** (required)
4. **Select Node**: Choose which node to deploy the instance to
5. Configure **Auto Restart** settings:
- Enable automatic restart on failure
- Set max restarts and delay between attempts
6. Configure basic instance options:
- **Idle Timeout**: Minutes before stopping idle instance
- **On Demand Start**: Start instance only when needed
**Backend Configuration:**
7. **Select Backend Type**:
- **Llama Server**: For GGUF models using llama-server
- **MLX LM**: For MLX-optimized models (macOS only)
- **vLLM**: For distributed serving and high-throughput inference
5. Configure model source:
- **For llama.cpp**: GGUF model path or HuggingFace repo
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
6. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
- **Environment Variables**: Set custom environment variables for the instance process
7. Configure backend-specific options:
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
8. *Optional*: Click **"Parse Command"** to import settings from an existing backend command
9. Configure **Execution Context**:
- **Enable Docker**: Run backend in Docker container
- **Command Override**: Custom path to backend executable
- **Environment Variables**: Custom environment variables
!!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
8. Click **"Create"** to save the instance
10. Configure **Basic Backend Options** (varies by backend):
- **llama.cpp**: Model path, threads, context size, GPU layers, etc.
- **MLX**: Model identifier, temperature, max tokens, etc.
- **vLLM**: Model identifier, tensor parallel size, GPU memory utilization, etc.
11. *Optional*: Expand **Advanced Backend Options** for additional settings
12. *Optional*: Add **Extra Args** as key-value pairs for custom command-line arguments
13. Click **"Create"** to save the instance
**Via API**
@@ -82,11 +91,34 @@ curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096,
"gpu_layers": 32
"gpu_layers": 32,
"flash_attn": "on"
},
"auto_restart": true,
"max_restarts": 3,
"docker_enabled": false,
"command_override": "/opt/llama-server-dev",
"nodes": ["main"]
}'
# Create vLLM instance with environment variables
curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1"
},
"nodes": ["worker1", "worker2"]
}'
# Create MLX instance (macOS only)
curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
-H "Content-Type: application/json" \
@@ -96,74 +128,10 @@ curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
"backend_options": {
"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
"temp": 0.7,
"top_p": 0.9,
"max_tokens": 2048
},
"auto_restart": true,
"max_restarts": 3,
"nodes": ["main"]
}'
# Create vLLM instance
curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "vllm",
"backend_options": {
"model": "microsoft/DialoGPT-medium",
"tensor_parallel_size": 2,
"gpu_memory_utilization": 0.9
},
"auto_restart": true,
"on_demand_start": true,
"environment": {
"CUDA_VISIBLE_DEVICES": "0,1",
"NCCL_DEBUG": "INFO",
"PYTHONPATH": "/custom/path"
},
"nodes": ["main"]
}'
# Create llama.cpp instance with HuggingFace model
curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
},
"nodes": ["main"]
}'
# Create instance on specific remote node
curl -X POST http://localhost:8080/api/v1/instances/remote-llama \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1"]
}'
# Create instance on multiple nodes for high availability
curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/models/llama-7b.gguf",
"gpu_layers": 32
},
"nodes": ["worker1", "worker2", "worker3"]
}'
```
## Start Instance
@@ -219,6 +187,12 @@ curl -X PUT http://localhost:8080/api/v1/instances/{name} \
Configuration changes require restarting the instance to take effect.
## Export Instance
**Via Web UI**
1. Click the **"More actions"** button (three dots) on an instance card
2. Click **"Export"** to download the instance configuration as a JSON file
## View Logs
**Via Web UI**

View File

@@ -12,6 +12,235 @@
},
"basePath": "/api/v1",
"paths": {
"/api/v1/auth/keys": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all API keys for the system user (excludes key hash and plain-text key)",
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "List all API keys",
"responses": {
"200": {
"description": "List of API keys",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/server.KeyResponse"
}
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"post": {
"description": "Creates a new API key with the specified permissions and returns the plain-text key (only shown once)",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "Create a new API key",
"parameters": [
{
"description": "API key configuration",
"name": "key",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/server.CreateKeyRequest"
}
}
],
"responses": {
"201": {
"description": "Created API key with plain-text key",
"schema": {
"$ref": "#/definitions/server.CreateKeyResponse"
}
},
"400": {
"description": "Invalid request body or validation error",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/auth/keys/{id}": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns details for a specific API key by ID (excludes key hash and plain-text key)",
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "Get details of a specific API key",
"parameters": [
{
"type": "integer",
"description": "Key ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "API key details",
"schema": {
"$ref": "#/definitions/server.KeyResponse"
}
},
"400": {
"description": "Invalid key ID",
"schema": {
"type": "string"
}
},
"404": {
"description": "API key not found",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Deletes an API key by ID",
"tags": [
"Keys"
],
"summary": "Delete an API key",
"parameters": [
{
"type": "integer",
"description": "Key ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"204": {
"description": "API key deleted successfully"
},
"400": {
"description": "Invalid key ID",
"schema": {
"type": "string"
}
},
"404": {
"description": "API key not found",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/auth/keys/{id}/permissions": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the instance-level permissions for a specific API key (includes instance names)",
"produces": [
"application/json"
],
"tags": [
"Keys"
],
"summary": "Get API key permissions",
"parameters": [
{
"type": "integer",
"description": "Key ID",
"name": "id",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "List of key permissions",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/server.KeyPermissionResponse"
}
}
},
"400": {
"description": "Invalid key ID",
"schema": {
"type": "string"
}
},
"404": {
"description": "API key not found",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/backends/llama-cpp/devices": {
"get": {
"security": [
@@ -249,6 +478,34 @@
}
}
},
"/api/v1/config": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the current server configuration (sanitized)",
"tags": [
"System"
],
"summary": "Get server configuration",
"responses": {
"200": {
"description": "Sanitized configuration",
"schema": {
"$ref": "#/definitions/config.AppConfig"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/instances": {
"get": {
"security": [
@@ -1468,11 +1725,289 @@
}
},
"definitions": {
"auth.PermissionMode": {
"type": "string",
"enum": [
"allow_all",
"per_instance"
],
"x-enum-varnames": [
"PermissionModeAllowAll",
"PermissionModePerInstance"
]
},
"config.AppConfig": {
"type": "object",
"properties": {
"auth": {
"$ref": "#/definitions/config.AuthConfig"
},
"backends": {
"$ref": "#/definitions/config.BackendConfig"
},
"build_time": {
"type": "string"
},
"commit_hash": {
"type": "string"
},
"data_dir": {
"description": "Directory where all llamactl data will be stored (database, instances, logs, etc.)",
"type": "string"
},
"database": {
"$ref": "#/definitions/config.DatabaseConfig"
},
"instances": {
"$ref": "#/definitions/config.InstancesConfig"
},
"local_node": {
"type": "string"
},
"nodes": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/config.NodeConfig"
}
},
"server": {
"$ref": "#/definitions/config.ServerConfig"
},
"version": {
"type": "string"
}
}
},
"config.AuthConfig": {
"type": "object",
"properties": {
"inference_keys": {
"description": "List of keys for OpenAI compatible inference endpoints",
"type": "array",
"items": {
"type": "string"
}
},
"management_keys": {
"description": "List of keys for management endpoints",
"type": "array",
"items": {
"type": "string"
}
},
"require_inference_auth": {
"description": "Require authentication for OpenAI compatible inference endpoints",
"type": "boolean"
},
"require_management_auth": {
"description": "Require authentication for management endpoints",
"type": "boolean"
}
}
},
"config.BackendConfig": {
"type": "object",
"properties": {
"llama-cpp": {
"$ref": "#/definitions/config.BackendSettings"
},
"mlx": {
"$ref": "#/definitions/config.BackendSettings"
},
"vllm": {
"$ref": "#/definitions/config.BackendSettings"
}
}
},
"config.BackendSettings": {
"type": "object",
"properties": {
"args": {
"type": "array",
"items": {
"type": "string"
}
},
"command": {
"type": "string"
},
"docker": {
"$ref": "#/definitions/config.DockerSettings"
},
"environment": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"response_headers": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
},
"config.DatabaseConfig": {
"type": "object",
"properties": {
"connection_max_lifetime": {
"type": "string",
"example": "1h"
},
"max_idle_connections": {
"type": "integer"
},
"max_open_connections": {
"description": "Connection settings",
"type": "integer"
},
"path": {
"description": "Database file path (relative to the top-level data_dir or absolute)",
"type": "string"
}
}
},
"config.DockerSettings": {
"type": "object",
"properties": {
"args": {
"type": "array",
"items": {
"type": "string"
}
},
"enabled": {
"type": "boolean"
},
"environment": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"image": {
"type": "string"
}
}
},
"config.InstancesConfig": {
"type": "object",
"properties": {
"auto_create_dirs": {
"description": "Automatically create the data directory if it doesn't exist",
"type": "boolean"
},
"configs_dir": {
"description": "Instance config directory override (relative to data_dir if not absolute)",
"type": "string"
},
"default_auto_restart": {
"description": "Default auto-restart setting for new instances",
"type": "boolean"
},
"default_max_restarts": {
"description": "Default max restarts for new instances",
"type": "integer"
},
"default_on_demand_start": {
"description": "Default on-demand start setting for new instances",
"type": "boolean"
},
"default_restart_delay": {
"description": "Default restart delay for new instances (in seconds)",
"type": "integer"
},
"enable_lru_eviction": {
"description": "Enable LRU eviction for instance logs",
"type": "boolean"
},
"logs_dir": {
"description": "Logs directory override (relative to data_dir if not absolute)",
"type": "string"
},
"max_instances": {
"description": "Maximum number of instances that can be created",
"type": "integer"
},
"max_running_instances": {
"description": "Maximum number of instances that can be running at the same time",
"type": "integer"
},
"on_demand_start_timeout": {
"description": "How long to wait for an instance to start on demand (in seconds)",
"type": "integer"
},
"port_range": {
"description": "Port range for instances (e.g., 8000,9000)",
"type": "array",
"items": {
"type": "integer"
}
},
"timeout_check_interval": {
"description": "Interval for checking instance timeouts (in minutes)",
"type": "integer"
}
}
},
"config.NodeConfig": {
"type": "object",
"properties": {
"address": {
"type": "string"
},
"api_key": {
"type": "string"
}
}
},
"config.ServerConfig": {
"type": "object",
"properties": {
"allowed_headers": {
"description": "Allowed headers for CORS (e.g., \"Accept\", \"Authorization\", \"Content-Type\", \"X-CSRF-Token\")",
"type": "array",
"items": {
"type": "string"
}
},
"allowed_origins": {
"description": "Allowed origins for CORS (e.g., \"http://localhost:3000\")",
"type": "array",
"items": {
"type": "string"
}
},
"enable_swagger": {
"description": "Enable Swagger UI for API documentation",
"type": "boolean"
},
"host": {
"description": "Server host to bind to",
"type": "string"
},
"port": {
"description": "Server port to bind to",
"type": "integer"
},
"response_headers": {
"description": "Response headers to send with responses",
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
},
"instance.Instance": {
"type": "object",
"properties": {
"created": {
"description": "Unix timestamp when the instance was created",
"description": "Unix timestamp when instance was created",
"type": "integer"
},
"id": {
"type": "integer"
},
"name": {
@@ -1487,6 +2022,13 @@
"description": "Auto restart",
"type": "boolean"
},
"command_override": {
"type": "string"
},
"docker_enabled": {
"description": "Execution context overrides",
"type": "boolean"
},
"environment": {
"description": "Environment variables",
"type": "object",
@@ -1511,6 +2053,125 @@
}
}
},
"server.CreateKeyRequest": {
"type": "object",
"properties": {
"expiresAt": {
"type": "integer",
"format": "int64"
},
"instancePermissions": {
"type": "array",
"items": {
"$ref": "#/definitions/server.InstancePermission"
}
},
"name": {
"type": "string"
},
"permissionMode": {
"$ref": "#/definitions/auth.PermissionMode"
}
}
},
"server.CreateKeyResponse": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"enabled": {
"type": "boolean"
},
"expires_at": {
"type": "integer"
},
"id": {
"type": "integer"
},
"key": {
"type": "string"
},
"last_used_at": {
"type": "integer"
},
"name": {
"type": "string"
},
"permission_mode": {
"$ref": "#/definitions/auth.PermissionMode"
},
"updated_at": {
"type": "integer"
},
"user_id": {
"type": "string"
}
}
},
"server.InstancePermission": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
}
}
},
"server.KeyPermissionResponse": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
},
"instance_name": {
"type": "string"
}
}
},
"server.KeyResponse": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"enabled": {
"type": "boolean"
},
"expires_at": {
"type": "integer"
},
"id": {
"type": "integer"
},
"last_used_at": {
"type": "integer"
},
"name": {
"type": "string"
},
"permission_mode": {
"$ref": "#/definitions/auth.PermissionMode"
},
"updated_at": {
"type": "integer"
},
"user_id": {
"type": "string"
}
}
},
"server.NodeResponse": {
"type": "object",
"properties": {

View File

@@ -1,9 +1,204 @@
basePath: /api/v1
definitions:
auth.PermissionMode:
enum:
- allow_all
- per_instance
type: string
x-enum-varnames:
- PermissionModeAllowAll
- PermissionModePerInstance
config.AppConfig:
properties:
auth:
$ref: '#/definitions/config.AuthConfig'
backends:
$ref: '#/definitions/config.BackendConfig'
build_time:
type: string
commit_hash:
type: string
data_dir:
description: Directory where all llamactl data will be stored (database, instances,
logs, etc.)
type: string
database:
$ref: '#/definitions/config.DatabaseConfig'
instances:
$ref: '#/definitions/config.InstancesConfig'
local_node:
type: string
nodes:
additionalProperties:
$ref: '#/definitions/config.NodeConfig'
type: object
server:
$ref: '#/definitions/config.ServerConfig'
version:
type: string
type: object
config.AuthConfig:
properties:
inference_keys:
description: List of keys for OpenAI compatible inference endpoints
items:
type: string
type: array
management_keys:
description: List of keys for management endpoints
items:
type: string
type: array
require_inference_auth:
description: Require authentication for OpenAI compatible inference endpoints
type: boolean
require_management_auth:
description: Require authentication for management endpoints
type: boolean
type: object
config.BackendConfig:
properties:
llama-cpp:
$ref: '#/definitions/config.BackendSettings'
mlx:
$ref: '#/definitions/config.BackendSettings'
vllm:
$ref: '#/definitions/config.BackendSettings'
type: object
config.BackendSettings:
properties:
args:
items:
type: string
type: array
command:
type: string
docker:
$ref: '#/definitions/config.DockerSettings'
environment:
additionalProperties:
type: string
type: object
response_headers:
additionalProperties:
type: string
type: object
type: object
config.DatabaseConfig:
properties:
connection_max_lifetime:
example: 1h
type: string
max_idle_connections:
type: integer
max_open_connections:
description: Connection settings
type: integer
path:
description: Database file path (relative to the top-level data_dir or absolute)
type: string
type: object
config.DockerSettings:
properties:
args:
items:
type: string
type: array
enabled:
type: boolean
environment:
additionalProperties:
type: string
type: object
image:
type: string
type: object
config.InstancesConfig:
properties:
auto_create_dirs:
description: Automatically create the data directory if it doesn't exist
type: boolean
configs_dir:
description: Instance config directory override (relative to data_dir if not
absolute)
type: string
default_auto_restart:
description: Default auto-restart setting for new instances
type: boolean
default_max_restarts:
description: Default max restarts for new instances
type: integer
default_on_demand_start:
description: Default on-demand start setting for new instances
type: boolean
default_restart_delay:
description: Default restart delay for new instances (in seconds)
type: integer
enable_lru_eviction:
description: Enable LRU eviction for instance logs
type: boolean
logs_dir:
description: Logs directory override (relative to data_dir if not absolute)
type: string
max_instances:
description: Maximum number of instances that can be created
type: integer
max_running_instances:
description: Maximum number of instances that can be running at the same time
type: integer
on_demand_start_timeout:
description: How long to wait for an instance to start on demand (in seconds)
type: integer
port_range:
description: Port range for instances (e.g., 8000,9000)
items:
type: integer
type: array
timeout_check_interval:
description: Interval for checking instance timeouts (in minutes)
type: integer
type: object
config.NodeConfig:
properties:
address:
type: string
api_key:
type: string
type: object
config.ServerConfig:
properties:
allowed_headers:
description: Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type",
"X-CSRF-Token")
items:
type: string
type: array
allowed_origins:
description: Allowed origins for CORS (e.g., "http://localhost:3000")
items:
type: string
type: array
enable_swagger:
description: Enable Swagger UI for API documentation
type: boolean
host:
description: Server host to bind to
type: string
port:
description: Server port to bind to
type: integer
response_headers:
additionalProperties:
type: string
description: Response headers to send with responses
type: object
type: object
instance.Instance:
properties:
created:
description: Unix timestamp when the instance was created
description: Unix timestamp when instance was created
type: integer
id:
type: integer
name:
type: string
@@ -13,6 +208,11 @@ definitions:
auto_restart:
description: Auto restart
type: boolean
command_override:
type: string
docker_enabled:
description: Execution context overrides
type: boolean
environment:
additionalProperties:
type: string
@@ -30,6 +230,84 @@ definitions:
description: seconds
type: integer
type: object
server.CreateKeyRequest:
properties:
expiresAt:
format: int64
type: integer
instancePermissions:
items:
$ref: '#/definitions/server.InstancePermission'
type: array
name:
type: string
permissionMode:
$ref: '#/definitions/auth.PermissionMode'
type: object
server.CreateKeyResponse:
properties:
created_at:
type: integer
enabled:
type: boolean
expires_at:
type: integer
id:
type: integer
key:
type: string
last_used_at:
type: integer
name:
type: string
permission_mode:
$ref: '#/definitions/auth.PermissionMode'
updated_at:
type: integer
user_id:
type: string
type: object
server.InstancePermission:
properties:
can_infer:
type: boolean
can_view_logs:
type: boolean
instance_id:
type: integer
type: object
server.KeyPermissionResponse:
properties:
can_infer:
type: boolean
can_view_logs:
type: boolean
instance_id:
type: integer
instance_name:
type: string
type: object
server.KeyResponse:
properties:
created_at:
type: integer
enabled:
type: boolean
expires_at:
type: integer
id:
type: integer
last_used_at:
type: integer
name:
type: string
permission_mode:
$ref: '#/definitions/auth.PermissionMode'
updated_at:
type: integer
user_id:
type: string
type: object
server.NodeResponse:
properties:
address:
@@ -69,6 +347,156 @@ info:
title: llamactl API
version: "1.0"
paths:
/api/v1/auth/keys:
get:
description: Returns a list of all API keys for the system user (excludes key
hash and plain-text key)
produces:
- application/json
responses:
"200":
description: List of API keys
schema:
items:
$ref: '#/definitions/server.KeyResponse'
type: array
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List all API keys
tags:
- Keys
post:
consumes:
- application/json
description: Creates a new API key with the specified permissions and returns
the plain-text key (only shown once)
parameters:
- description: API key configuration
in: body
name: key
required: true
schema:
$ref: '#/definitions/server.CreateKeyRequest'
produces:
- application/json
responses:
"201":
description: Created API key with plain-text key
schema:
$ref: '#/definitions/server.CreateKeyResponse'
"400":
description: Invalid request body or validation error
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
summary: Create a new API key
tags:
- Keys
/api/v1/auth/keys/{id}:
delete:
description: Deletes an API key by ID
parameters:
- description: Key ID
in: path
name: id
required: true
type: integer
responses:
"204":
description: API key deleted successfully
"400":
description: Invalid key ID
schema:
type: string
"404":
description: API key not found
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Delete an API key
tags:
- Keys
get:
description: Returns details for a specific API key by ID (excludes key hash
and plain-text key)
parameters:
- description: Key ID
in: path
name: id
required: true
type: integer
produces:
- application/json
responses:
"200":
description: API key details
schema:
$ref: '#/definitions/server.KeyResponse'
"400":
description: Invalid key ID
schema:
type: string
"404":
description: API key not found
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get details of a specific API key
tags:
- Keys
/api/v1/auth/keys/{id}/permissions:
get:
description: Returns the instance-level permissions for a specific API key (includes
instance names)
parameters:
- description: Key ID
in: path
name: id
required: true
type: integer
produces:
- application/json
responses:
"200":
description: List of key permissions
schema:
items:
$ref: '#/definitions/server.KeyPermissionResponse'
type: array
"400":
description: Invalid key ID
schema:
type: string
"404":
description: API key not found
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get API key permissions
tags:
- Keys
/api/v1/backends/llama-cpp/devices:
get:
description: Returns a list of available devices for the llama server
@@ -216,6 +644,23 @@ paths:
summary: Parse vllm serve command
tags:
- Backends
/api/v1/config:
get:
description: Returns the current server configuration (sanitized)
responses:
"200":
description: Sanitized configuration
schema:
$ref: '#/definitions/config.AppConfig'
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get server configuration
tags:
- System
/api/v1/instances:
get:
description: Returns a list of all instances managed by the server

12
go.mod
View File

@@ -5,8 +5,11 @@ go 1.24.5
require (
github.com/go-chi/chi/v5 v5.2.2
github.com/go-chi/cors v1.2.2
github.com/golang-migrate/migrate/v4 v4.19.1
github.com/mattn/go-sqlite3 v1.14.24
github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.5
golang.org/x/crypto v0.45.0
gopkg.in/yaml.v3 v3.0.1
)
@@ -19,8 +22,9 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/swaggo/files v1.0.1 // indirect
golang.org/x/mod v0.26.0 // indirect
golang.org/x/net v0.42.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/tools v0.35.0 // indirect
golang.org/x/mod v0.29.0 // indirect
golang.org/x/net v0.47.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/tools v0.38.0 // indirect
)

34
go.sum
View File

@@ -1,7 +1,7 @@
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618=
github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE=
@@ -14,6 +14,8 @@ github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9Z
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU=
github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0=
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@@ -22,10 +24,14 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
@@ -39,25 +45,29 @@ github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -68,8 +78,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=

73
pkg/auth/hash.go Normal file
View File

@@ -0,0 +1,73 @@
package auth
import (
"crypto/rand"
"crypto/subtle"
"encoding/base64"
"fmt"
"strings"
"golang.org/x/crypto/argon2"
)
const (
// Argon2 parameters
time uint32 = 1
memory uint32 = 64 * 1024 // 64 MB
threads uint8 = 4
keyLen uint32 = 32
saltLen uint32 = 16
)
// HashKey hashes an API key using Argon2id
func HashKey(plainTextKey string) (string, error) {
// Generate random salt
salt := make([]byte, saltLen)
if _, err := rand.Read(salt); err != nil {
return "", fmt.Errorf("failed to generate salt: %w", err)
}
// Derive key using Argon2id
hash := argon2.IDKey([]byte(plainTextKey), salt, time, memory, threads, keyLen)
// Format: $argon2id$v=19$m=65536,t=1,p=4$<base64-salt>$<base64-hash>
saltB64 := base64.RawStdEncoding.EncodeToString(salt)
hashB64 := base64.RawStdEncoding.EncodeToString(hash)
return fmt.Sprintf("$argon2id$v=19$m=%d,t=%d,p=%d$%s$%s", memory, time, threads, saltB64, hashB64), nil
}
// VerifyKey verifies a plain-text key against an Argon2id hash
func VerifyKey(plainTextKey, hash string) bool {
// Parse the hash format
parts := strings.Split(hash, "$")
if len(parts) != 6 || parts[1] != "argon2id" {
return false
}
// Extract parameters
var version, time, memory, threads int
if _, err := fmt.Sscanf(parts[2], "v=%d", &version); err != nil || version != 19 {
return false
}
if _, err := fmt.Sscanf(parts[3], "m=%d,t=%d,p=%d", &memory, &time, &threads); err != nil {
return false
}
// Decode salt and hash
salt, err := base64.RawStdEncoding.DecodeString(parts[4])
if err != nil {
return false
}
expectedHash, err := base64.RawStdEncoding.DecodeString(parts[5])
if err != nil {
return false
}
// Compute hash of the provided key
computedHash := argon2.IDKey([]byte(plainTextKey), salt, uint32(time), uint32(memory), uint8(threads), uint32(len(expectedHash)))
// Compare hashes using constant-time comparison
return subtle.ConstantTimeCompare(computedHash, expectedHash) == 1
}

48
pkg/auth/key.go Normal file
View File

@@ -0,0 +1,48 @@
package auth
import (
"crypto/rand"
"encoding/hex"
"fmt"
)
type PermissionMode string
const (
PermissionModeAllowAll PermissionMode = "allow_all"
PermissionModePerInstance PermissionMode = "per_instance"
)
type APIKey struct {
ID int
KeyHash string
Name string
UserID string
PermissionMode PermissionMode
ExpiresAt *int64
Enabled bool
CreatedAt int64
UpdatedAt int64
LastUsedAt *int64
}
type KeyPermission struct {
KeyID int
InstanceID int
CanInfer bool
}
// GenerateKey generates a cryptographically secure API key with the given prefix
func GenerateKey(prefix string) (string, error) {
// Generate 32 random bytes
bytes := make([]byte, 32)
_, err := rand.Read(bytes)
if err != nil {
return "", fmt.Errorf("failed to generate random bytes: %w", err)
}
// Convert to hex (64 characters)
hexStr := hex.EncodeToString(bytes)
return fmt.Sprintf("%s-%s", prefix, hexStr), nil
}

View File

@@ -79,26 +79,28 @@ func (o *Options) UnmarshalJSON(data []byte) error {
}
func (o *Options) MarshalJSON() ([]byte, error) {
type Alias Options
aux := &struct {
*Alias
}{
Alias: (*Alias)(o),
}
// Get backend and marshal it
var backendOptions map[string]any
backend := o.getBackend()
if backend != nil {
optionsData, err := json.Marshal(backend)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
if err := json.Unmarshal(optionsData, &aux.BackendOptions); err != nil {
// Create a new map to avoid concurrent map writes
backendOptions = make(map[string]any)
if err := json.Unmarshal(optionsData, &backendOptions); err != nil {
return nil, fmt.Errorf("failed to unmarshal backend options to map: %w", err)
}
}
return json.Marshal(aux)
return json.Marshal(&struct {
BackendType BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
}{
BackendType: o.BackendType,
BackendOptions: backendOptions,
})
}
// setBackendOptions stores the backend in the appropriate typed field
@@ -140,32 +142,54 @@ func (o *Options) getBackend() backend {
}
}
func (o *Options) isDockerEnabled(backend *config.BackendSettings) bool {
if backend.Docker != nil && backend.Docker.Enabled && o.BackendType != BackendTypeMlxLm {
return true
// isDockerEnabled checks if Docker is enabled with an optional override
func (o *Options) isDockerEnabled(backend *config.BackendSettings, dockerEnabledOverride *bool) bool {
// Check if backend supports Docker
if backend.Docker == nil {
return false
}
return false
// MLX doesn't support Docker
if o.BackendType == BackendTypeMlxLm {
return false
}
// Check for instance-level override
if dockerEnabledOverride != nil {
return *dockerEnabledOverride
}
// Fall back to config value
return backend.Docker.Enabled
}
func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig) bool {
func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig, dockerEnabled *bool) bool {
backendSettings := o.getBackendSettings(backendConfig)
return o.isDockerEnabled(backendSettings)
return o.isDockerEnabled(backendSettings, dockerEnabled)
}
// GetCommand builds the command to run the backend
func (o *Options) GetCommand(backendConfig *config.BackendConfig) string {
func (o *Options) GetCommand(backendConfig *config.BackendConfig, dockerEnabled *bool, commandOverride string) string {
backendSettings := o.getBackendSettings(backendConfig)
if o.isDockerEnabled(backendSettings) {
// Determine if Docker is enabled
useDocker := o.isDockerEnabled(backendSettings, dockerEnabled)
if useDocker {
return "docker"
}
// Check for command override (only applies when not in Docker mode)
if commandOverride != "" {
return commandOverride
}
// Fall back to config command
return backendSettings.Command
}
// buildCommandArgs builds command line arguments for the backend
func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string {
func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig, dockerEnabled *bool) []string {
var args []string
@@ -175,7 +199,7 @@ func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string
return args
}
if o.isDockerEnabled(backendSettings) {
if o.isDockerEnabled(backendSettings, dockerEnabled) {
// For Docker, start with Docker args
args = append(args, backendSettings.Docker.Args...)
args = append(args, backendSettings.Docker.Image)
@@ -191,7 +215,7 @@ func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string
}
// BuildEnvironment builds the environment variables for the backend process
func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environment map[string]string) map[string]string {
func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, dockerEnabled *bool, environment map[string]string) map[string]string {
backendSettings := o.getBackendSettings(backendConfig)
env := map[string]string{}
@@ -200,7 +224,7 @@ func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environm
maps.Copy(env, backendSettings.Environment)
}
if o.isDockerEnabled(backendSettings) {
if o.isDockerEnabled(backendSettings, dockerEnabled) {
if backendSettings.Docker.Environment != nil {
maps.Copy(env, backendSettings.Docker.Environment)
}

View File

@@ -93,3 +93,22 @@ func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []st
return "docker", dockerArgs, nil
}
// convertExtraArgsToFlags converts map[string]string to command flags
// Empty values become boolean flags: {"flag": ""} → ["--flag"]
// Non-empty values: {"flag": "value"} → ["--flag", "value"]
func convertExtraArgsToFlags(extraArgs map[string]string) []string {
var args []string
for key, value := range extraArgs {
if value == "" {
// Boolean flag
args = append(args, "--"+key)
} else {
// Value flag
args = append(args, "--"+key, value)
}
}
return args
}

View File

@@ -5,7 +5,6 @@ import (
"fmt"
"llamactl/pkg/validation"
"reflect"
"strconv"
)
// llamaMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
@@ -41,7 +40,7 @@ type LlamaServerOptions struct {
BatchSize int `json:"batch_size,omitempty"`
UBatchSize int `json:"ubatch_size,omitempty"`
Keep int `json:"keep,omitempty"`
FlashAttn bool `json:"flash_attn,omitempty"`
FlashAttn string `json:"flash_attn,omitempty"`
NoPerf bool `json:"no_perf,omitempty"`
Escape bool `json:"escape,omitempty"`
NoEscape bool `json:"no_escape,omitempty"`
@@ -187,6 +186,10 @@ type LlamaServerOptions struct {
FIMQwen7BDefault bool `json:"fim_qwen_7b_default,omitempty"`
FIMQwen7BSpec bool `json:"fim_qwen_7b_spec,omitempty"`
FIMQwen14BSpec bool `json:"fim_qwen_14b_spec,omitempty"`
// ExtraArgs are additional command line arguments.
// Example: {"verbose": "", "log-file": "/logs/llama.log"}
ExtraArgs map[string]string `json:"extra_args,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
@@ -209,6 +212,15 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// Copy to our struct
*o = LlamaServerOptions(temp)
// Track which fields we've processed
processedFields := make(map[string]bool)
// Get all known canonical field names from struct tags
knownFields := getKnownFieldNames(o)
for field := range knownFields {
processedFields[field] = true
}
// Handle alternative field names
fieldMappings := map[string]string{
// Common params
@@ -220,7 +232,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict N
"n-predict": "predict", // --n-predict N
"n_predict": "predict", // -n-predict N
"b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn
@@ -234,7 +246,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
"n_gpu_layers": "gpu_layers", // --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX
@@ -250,9 +262,9 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose
"log-verbose": "verbose", // --log-verbose
"log_verbose": "verbose", // --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"log-verbosity": "verbosity", // --log-verbosity N
"log_verbosity": "verbosity", // --log-verbosity N
// Sampling params
"s": "seed", // -s, --seed SEED
@@ -269,21 +281,23 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"rerank": "reranking", // --reranking
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"draft": "draft-max", // -draft, --draft-max N
"draft-n": "draft-max", // --draft-n-max N
"draft-n-min": "draft_min", // --draft-n-min N
"draft": "draft_max", // -draft, --draft-max N
"draft_n": "draft_max", // --draft-n-max N
"draft_n_min": "draft_min", // --draft-n-min N
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"n_gpu_layers_draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
}
// Process alternative field names
// Process alternative field names and mark them as processed
for altName, canonicalName := range fieldMappings {
processedFields[altName] = true // Mark alternatives as known
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
@@ -294,36 +308,21 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
setFieldValue(field, value)
}
}
}
// Collect unknown fields into ExtraArgs
if o.ExtraArgs == nil {
o.ExtraArgs = make(map[string]string)
}
for key, value := range raw {
if !processedFields[key] {
o.ExtraArgs[key] = fmt.Sprintf("%v", value)
}
}
return nil
}
@@ -354,6 +353,18 @@ func (o *LlamaServerOptions) Validate() error {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
// Validate extra_args keys and values
for key, value := range o.ExtraArgs {
if err := validation.ValidateStringForInjection(key); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args key %q: %w", key, err))
}
if value != "" {
if err := validation.ValidateStringForInjection(value); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args value for %q: %w", key, err))
}
}
}
return nil
}
@@ -361,7 +372,12 @@ func (o *LlamaServerOptions) Validate() error {
func (o *LlamaServerOptions) BuildCommandArgs() []string {
// Llama uses multiple flags for arrays by default (not comma-separated)
// Use package-level llamaMultiValuedFlags variable
return BuildCommandArgs(o, llamaMultiValuedFlags)
args := BuildCommandArgs(o, llamaMultiValuedFlags)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
func (o *LlamaServerOptions) BuildDockerArgs() []string {

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/testutil"
"reflect"
"testing"
@@ -33,12 +34,11 @@ func TestLlamaCppBuildCommandArgs_BooleanFields(t *testing.T) {
{
name: "multiple booleans",
options: backends.LlamaServerOptions{
Verbose: true,
FlashAttn: true,
Mlock: false,
NoMmap: true,
Verbose: true,
Mlock: false,
NoMmap: true,
},
expected: []string{"--verbose", "--flash-attn", "--no-mmap"},
expected: []string{"--verbose", "--no-mmap"},
excluded: []string{"--mlock"},
},
}
@@ -346,7 +346,7 @@ func TestParseLlamaCommand(t *testing.T) {
},
{
name: "multiple value types",
command: "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap",
command: "llama-server --model /test/model.gguf --n-gpu-layers 32 --temp 0.7 --verbose --no-mmap",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/test/model.gguf" {
@@ -434,3 +434,195 @@ func TestParseLlamaCommandArrays(t *testing.T) {
}
}
}
func TestLlamaCppBuildCommandArgs_ExtraArgs(t *testing.T) {
options := backends.LlamaServerOptions{
Model: "/models/test.gguf",
ExtraArgs: map[string]string{
"flash-attn": "", // boolean flag
"log-file": "/logs/test.log", // value flag
},
}
args := options.BuildCommandArgs()
// Check that extra args are present
if !testutil.Contains(args, "--flash-attn") {
t.Error("Expected --flash-attn flag not found")
}
if !testutil.Contains(args, "--log-file") || !testutil.Contains(args, "/logs/test.log") {
t.Error("Expected --log-file flag or value not found")
}
}
func TestParseLlamaCommand_ExtraArgs(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.LlamaServerOptions)
}{
{
name: "extra args with known fields",
command: "llama-server --model /path/to/model.gguf --gpu-layers 32 --unknown-flag value --another-bool-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.Model != "/path/to/model.gguf" {
t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 32 {
t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["unknown_flag"]; !ok || val != "value" {
t.Errorf("expected extra_args[unknown_flag]='value', got '%s'", val)
}
if val, ok := opts.ExtraArgs["another_bool_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[another_bool_flag]='true', got '%s'", val)
}
},
},
{
name: "extra args with alternative field names",
command: "llama-server -m /model.gguf -ngl 16 --custom-arg test --new-feature",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
// Check that alternative names worked for known fields
if opts.Model != "/model.gguf" {
t.Errorf("expected model '/model.gguf', got '%s'", opts.Model)
}
if opts.GPULayers != 16 {
t.Errorf("expected gpu_layers 16, got %d", opts.GPULayers)
}
// Check that unknown args went to ExtraArgs
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["custom_arg"]; !ok || val != "test" {
t.Errorf("expected extra_args[custom_arg]='test', got '%s'", val)
}
if val, ok := opts.ExtraArgs["new_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[new_feature]='true', got '%s'", val)
}
},
},
{
name: "only extra args",
command: "llama-server --experimental-feature --beta-mode enabled",
expectErr: false,
validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["experimental_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[experimental_feature]='true', got '%s'", val)
}
if val, ok := opts.ExtraArgs["beta_mode"]; !ok || val != "enabled" {
t.Errorf("expected extra_args[beta_mode]='enabled', got '%s'", val)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.LlamaServerOptions
result, err := opts.ParseCommand(tt.command)
if tt.expectErr && err == nil {
t.Error("expected error but got none")
return
}
if !tt.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if !tt.expectErr && tt.validate != nil {
llamaOpts, ok := result.(*backends.LlamaServerOptions)
if !ok {
t.Fatal("result is not *LlamaServerOptions")
}
tt.validate(t, llamaOpts)
}
})
}
}
func TestLlamaCppGetCommand_WithOverrides(t *testing.T) {
tests := []struct {
name string
dockerInConfig bool
dockerEnabled *bool
commandOverride string
expected string
}{
{
name: "no overrides - use config command",
dockerInConfig: false,
dockerEnabled: nil,
commandOverride: "",
expected: "/usr/bin/llama-server",
},
{
name: "override to enable docker",
dockerInConfig: false,
dockerEnabled: boolPtr(true),
commandOverride: "",
expected: "docker",
},
{
name: "override to disable docker",
dockerInConfig: true,
dockerEnabled: boolPtr(false),
commandOverride: "",
expected: "/usr/bin/llama-server",
},
{
name: "command override",
dockerInConfig: false,
dockerEnabled: nil,
commandOverride: "/custom/llama-server",
expected: "/custom/llama-server",
},
{
name: "docker takes precedence over command override",
dockerInConfig: false,
dockerEnabled: boolPtr(true),
commandOverride: "/custom/llama-server",
expected: "docker",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
backendConfig := &config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "/usr/bin/llama-server",
Docker: &config.DockerSettings{
Enabled: tt.dockerInConfig,
Image: "test-image",
},
},
}
opts := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Model: "test-model.gguf",
},
}
result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride)
if result != tt.expected {
t.Errorf("GetCommand() = %v, want %v", result, tt.expected)
}
})
}
}
// Helper function to create bool pointer
func boolPtr(b bool) *bool {
return &b
}

View File

@@ -1,6 +1,7 @@
package backends
import (
"encoding/json"
"fmt"
"llamactl/pkg/validation"
)
@@ -29,6 +30,46 @@ type MlxServerOptions struct {
TopK int `json:"top_k,omitempty"`
MinP float64 `json:"min_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
// ExtraArgs are additional command line arguments.
// Example: {"verbose": "", "log-file": "/logs/mlx.log"}
ExtraArgs map[string]string `json:"extra_args,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to collect unknown fields into ExtraArgs
func (o *MlxServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to capture all fields
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions MlxServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = MlxServerOptions(temp)
// Get all known canonical field names from struct tags
knownFields := getKnownFieldNames(o)
// Collect unknown fields into ExtraArgs
if o.ExtraArgs == nil {
o.ExtraArgs = make(map[string]string)
}
for key, value := range raw {
if !knownFields[key] {
o.ExtraArgs[key] = fmt.Sprintf("%v", value)
}
}
return nil
}
func (o *MlxServerOptions) GetPort() int {
@@ -57,13 +98,30 @@ func (o *MlxServerOptions) Validate() error {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
// Validate extra_args keys and values
for key, value := range o.ExtraArgs {
if err := validation.ValidateStringForInjection(key); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args key %q: %w", key, err))
}
if value != "" {
if err := validation.ValidateStringForInjection(value); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args value for %q: %w", key, err))
}
}
}
return nil
}
// BuildCommandArgs converts to command line arguments
func (o *MlxServerOptions) BuildCommandArgs() []string {
multipleFlags := map[string]struct{}{} // MLX doesn't currently have []string fields
return BuildCommandArgs(o, multipleFlags)
args := BuildCommandArgs(o, multipleFlags)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
func (o *MlxServerOptions) BuildDockerArgs() []string {

View File

@@ -2,6 +2,7 @@ package backends_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/testutil"
"testing"
)
@@ -202,3 +203,129 @@ func TestMlxBuildCommandArgs_ZeroValues(t *testing.T) {
}
}
}
func TestParseMlxCommand_ExtraArgs(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.MlxServerOptions)
}{
{
name: "extra args with known fields",
command: "mlx_lm.server --model /path/to/model --port 8080 --unknown-flag value --new-bool-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.Model != "/path/to/model" {
t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("expected port 8080, got %d", opts.Port)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["unknown_flag"]; !ok || val != "value" {
t.Errorf("expected extra_args[unknown_flag]='value', got '%s'", val)
}
if val, ok := opts.ExtraArgs["new_bool_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[new_bool_flag]='true', got '%s'", val)
}
},
},
{
name: "only extra args",
command: "mlx_lm.server --experimental-feature --custom-param test",
expectErr: false,
validate: func(t *testing.T, opts *backends.MlxServerOptions) {
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["experimental_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[experimental_feature]='true', got '%s'", val)
}
if val, ok := opts.ExtraArgs["custom_param"]; !ok || val != "test" {
t.Errorf("expected extra_args[custom_param]='test', got '%s'", val)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.MlxServerOptions
result, err := opts.ParseCommand(tt.command)
if tt.expectErr && err == nil {
t.Error("expected error but got none")
return
}
if !tt.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if !tt.expectErr && tt.validate != nil {
mlxOpts, ok := result.(*backends.MlxServerOptions)
if !ok {
t.Fatal("result is not *MlxServerOptions")
}
tt.validate(t, mlxOpts)
}
})
}
}
func TestMlxGetCommand_NoDocker(t *testing.T) {
// MLX backend should never use Docker
backendConfig := &config.BackendConfig{
MLX: config.BackendSettings{
Command: "/usr/bin/mlx-server",
Docker: &config.DockerSettings{
Enabled: true, // Even if enabled in config
Image: "test-image",
},
},
}
opts := backends.Options{
BackendType: backends.BackendTypeMlxLm,
MlxServerOptions: &backends.MlxServerOptions{
Model: "test-model",
},
}
tests := []struct {
name string
dockerEnabled *bool
commandOverride string
expected string
}{
{
name: "ignores docker in config",
dockerEnabled: nil,
commandOverride: "",
expected: "/usr/bin/mlx-server",
},
{
name: "ignores docker override",
dockerEnabled: boolPtr(true),
commandOverride: "",
expected: "/usr/bin/mlx-server",
},
{
name: "respects command override",
dockerEnabled: nil,
commandOverride: "/custom/mlx-server",
expected: "/custom/mlx-server",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride)
if result != tt.expected {
t.Errorf("GetCommand() = %v, want %v", result, tt.expected)
}
})
}
}

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
@@ -211,3 +212,65 @@ func parseValue(value string) any {
// Return as string
return value
}
// setFieldValue sets a field value using reflection, handling type conversions
// Used by UnmarshalJSON implementations to handle alternative field names
func setFieldValue(field reflect.Value, value any) {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
case reflect.Slice:
// Handle string slices
if field.Type().Elem().Kind() == reflect.String {
if slice, ok := value.([]any); ok {
strSlice := make([]string, 0, len(slice))
for _, v := range slice {
if s, ok := v.(string); ok {
strSlice = append(strSlice, s)
}
}
field.Set(reflect.ValueOf(strSlice))
}
}
}
}
// getKnownFieldNames extracts all known field names from struct json tags
// Used by UnmarshalJSON implementations to identify unknown fields for ExtraArgs
func getKnownFieldNames(v any) map[string]bool {
fields := make(map[string]bool)
t := reflect.TypeOf(v).Elem()
for i := 0; i < t.NumField(); i++ {
field := t.Field(i)
jsonTag := field.Tag.Get("json")
if jsonTag != "" && jsonTag != "-" {
// Handle "name,omitempty" format
name := strings.Split(jsonTag, ",")[0]
fields[name] = true
}
}
return fields
}

View File

@@ -1,6 +1,7 @@
package backends
import (
"encoding/json"
"fmt"
"llamactl/pkg/validation"
)
@@ -142,6 +143,46 @@ type VllmServerOptions struct {
OverridePoolingConfig string `json:"override_pooling_config,omitempty"`
OverrideNeuronConfig string `json:"override_neuron_config,omitempty"`
OverrideKVCacheALIGNSize int `json:"override_kv_cache_align_size,omitempty"`
// ExtraArgs are additional command line arguments.
// Example: {"verbose": "", "log-file": "/logs/vllm.log"}
ExtraArgs map[string]string `json:"extra_args,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to collect unknown fields into ExtraArgs
func (o *VllmServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to capture all fields
var raw map[string]any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions VllmServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = VllmServerOptions(temp)
// Get all known canonical field names from struct tags
knownFields := getKnownFieldNames(o)
// Collect unknown fields into ExtraArgs
if o.ExtraArgs == nil {
o.ExtraArgs = make(map[string]string)
}
for key, value := range raw {
if !knownFields[key] {
o.ExtraArgs[key] = fmt.Sprintf("%v", value)
}
}
return nil
}
func (o *VllmServerOptions) GetPort() int {
@@ -171,6 +212,18 @@ func (o *VllmServerOptions) Validate() error {
return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
}
// Validate extra_args keys and values
for key, value := range o.ExtraArgs {
if err := validation.ValidateStringForInjection(key); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args key %q: %w", key, err))
}
if value != "" {
if err := validation.ValidateStringForInjection(value); err != nil {
return validation.ValidationError(fmt.Errorf("extra_args value for %q: %w", key, err))
}
}
}
return nil
}
@@ -193,6 +246,9 @@ func (o *VllmServerOptions) BuildCommandArgs() []string {
flagArgs := BuildCommandArgs(&optionsCopy, vllmMultiValuedFlags)
args = append(args, flagArgs...)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}
@@ -203,6 +259,9 @@ func (o *VllmServerOptions) BuildDockerArgs() []string {
flagArgs := BuildCommandArgs(o, vllmMultiValuedFlags)
args = append(args, flagArgs...)
// Append extra args at the end
args = append(args, convertExtraArgsToFlags(o.ExtraArgs)...)
return args
}

View File

@@ -321,3 +321,94 @@ func TestVllmBuildCommandArgs_PositionalModel(t *testing.T) {
t.Errorf("Expected --port 8080 not found in %v", args)
}
}
func TestParseVllmCommand_ExtraArgs(t *testing.T) {
tests := []struct {
name string
command string
expectErr bool
validate func(*testing.T, *backends.VllmServerOptions)
}{
{
name: "extra args with known fields",
command: "vllm serve llama-model --tensor-parallel-size 2 --unknown-flag value --new-bool-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "llama-model" {
t.Errorf("expected model 'llama-model', got '%s'", opts.Model)
}
if opts.TensorParallelSize != 2 {
t.Errorf("expected tensor_parallel_size 2, got %d", opts.TensorParallelSize)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["unknown_flag"]; !ok || val != "value" {
t.Errorf("expected extra_args[unknown_flag]='value', got '%s'", val)
}
if val, ok := opts.ExtraArgs["new_bool_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[new_bool_flag]='true', got '%s'", val)
}
},
},
{
name: "only extra args",
command: "vllm serve model --experimental-feature --custom-param test",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["experimental_feature"]; !ok || val != "true" {
t.Errorf("expected extra_args[experimental_feature]='true', got '%s'", val)
}
if val, ok := opts.ExtraArgs["custom_param"]; !ok || val != "test" {
t.Errorf("expected extra_args[custom_param]='test', got '%s'", val)
}
},
},
{
name: "extra args without model positional",
command: "vllm serve --model my-model --new-feature enabled --beta-flag",
expectErr: false,
validate: func(t *testing.T, opts *backends.VllmServerOptions) {
if opts.Model != "my-model" {
t.Errorf("expected model 'my-model', got '%s'", opts.Model)
}
if opts.ExtraArgs == nil {
t.Fatal("expected extra_args to be non-nil")
}
if val, ok := opts.ExtraArgs["new_feature"]; !ok || val != "enabled" {
t.Errorf("expected extra_args[new_feature]='enabled', got '%s'", val)
}
if val, ok := opts.ExtraArgs["beta_flag"]; !ok || val != "true" {
t.Errorf("expected extra_args[beta_flag]='true', got '%s'", val)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var opts backends.VllmServerOptions
result, err := opts.ParseCommand(tt.command)
if tt.expectErr && err == nil {
t.Error("expected error but got none")
return
}
if !tt.expectErr && err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if !tt.expectErr && tt.validate != nil {
vllmOpts, ok := result.(*backends.VllmServerOptions)
if !ok {
t.Fatal("result is not *VllmServerOptions")
}
tt.validate(t, vllmOpts)
}
})
}
}

View File

@@ -1,138 +1,154 @@
package config
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
Command string `yaml:"command" json:"command"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty" json:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled"`
Image string `yaml:"image"`
Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"`
Enabled bool `yaml:"enabled" json:"enabled"`
Image string `yaml:"image" json:"image"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm"`
MLX BackendSettings `yaml:"mlx"`
LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm" json:"vllm"`
MLX BackendSettings `yaml:"mlx" json:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Backends BackendConfig `yaml:"backends"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
LocalNode string `yaml:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
Server ServerConfig `yaml:"server" json:"server"`
Backends BackendConfig `yaml:"backends" json:"backends"`
Instances InstancesConfig `yaml:"instances" json:"instances"`
Database DatabaseConfig `yaml:"database" json:"database"`
Auth AuthConfig `yaml:"auth" json:"auth"`
LocalNode string `yaml:"local_node,omitempty" json:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
// Directory where all llamactl data will be stored (database, instances, logs, etc.)
DataDir string `yaml:"data_dir" json:"data_dir"`
Version string `yaml:"-" json:"version"`
CommitHash string `yaml:"-" json:"commit_hash"`
BuildTime string `yaml:"-" json:"build_time"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
Host string `yaml:"host" json:"host"`
// Server port to bind to
Port int `yaml:"port"`
Port int `yaml:"port" json:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers"`
AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DatabaseConfig contains database configuration settings
type DatabaseConfig struct {
// Database file path (relative to the top-level data_dir or absolute)
Path string `yaml:"path" json:"path"`
// Connection settings
MaxOpenConnections int `yaml:"max_open_connections" json:"max_open_connections"`
MaxIdleConnections int `yaml:"max_idle_connections" json:"max_idle_connections"`
ConnMaxLifetime time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
PortRange [2]int `yaml:"port_range" json:"port_range"`
// Directory where all llamactl data will be stored (instances.json, logs, etc.)
DataDir string `yaml:"data_dir"`
// Instance config directory override (relative to data_dir if not absolute)
InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
// Instance config directory override
InstancesDir string `yaml:"configs_dir"`
// Logs directory override
LogsDir string `yaml:"logs_dir"`
// Logs directory override (relative to data_dir if not absolute)
LogsDir string `yaml:"logs_dir" json:"logs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs"`
AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
MaxInstances int `yaml:"max_instances" json:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth"`
RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys"`
InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth"`
RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys"`
ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
}
type NodeConfig struct {
Address string `yaml:"address"`
APIKey string `yaml:"api_key,omitempty"`
Address string `yaml:"address" json:"address"`
APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence:
@@ -141,6 +157,8 @@ type NodeConfig struct {
// 3. Environment variables
func LoadConfig(configPath string) (AppConfig, error) {
// 1. Start with defaults
defaultDataDir := getDefaultDataDirectory()
cfg := AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
@@ -151,6 +169,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
},
LocalNode: "main",
Nodes: map[string]NodeConfig{},
DataDir: defaultDataDir,
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
@@ -161,7 +180,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
"-v", filepath.Join(defaultDataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
@@ -173,7 +192,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
"-v", filepath.Join(defaultDataDir, "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
@@ -186,7 +205,6 @@ func LoadConfig(configPath string) (AppConfig, error) {
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
// should be relative path to DataDir if not explicitly set.
InstancesDir: "",
@@ -202,6 +220,12 @@ func LoadConfig(configPath string) (AppConfig, error) {
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
},
Database: DatabaseConfig{
Path: "", // Will be set to data_dir/llamactl.db if empty
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
@@ -223,12 +247,29 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables
loadEnvVars(&cfg)
// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
// Log warning if deprecated inference keys are present
if len(cfg.Auth.InferenceKeys) > 0 {
log.Println("⚠️ Config-based inference keys are no longer supported and will be ignored.")
log.Println(" Please create inference keys in web UI or via management API.")
}
// Set default directories if not specified
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
cfg.Instances.InstancesDir = filepath.Join(cfg.DataDir, "instances")
} else {
// Log deprecation warning if using custom instances dir
log.Println("⚠️ Instances directory is deprecated and will be removed in future versions. Instances are persisted in the database.")
}
if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
cfg.Instances.LogsDir = filepath.Join(cfg.DataDir, "logs")
}
if cfg.Database.Path == "" {
cfg.Database.Path = filepath.Join(cfg.DataDir, "llamactl.db")
}
// Validate port range
if cfg.Instances.PortRange[0] <= 0 || cfg.Instances.PortRange[1] <= 0 || cfg.Instances.PortRange[0] >= cfg.Instances.PortRange[1] {
return AppConfig{}, fmt.Errorf("invalid port range: %v", cfg.Instances.PortRange)
}
return cfg, nil
@@ -281,7 +322,7 @@ func loadEnvVars(cfg *AppConfig) {
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.Instances.DataDir = dataDir
cfg.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
@@ -488,6 +529,26 @@ func loadEnvVars(cfg *AppConfig) {
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
// Database config
if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
cfg.Database.Path = dbPath
}
if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
if m, err := strconv.Atoi(maxOpenConns); err == nil {
cfg.Database.MaxOpenConnections = m
}
}
if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
if m, err := strconv.Atoi(maxIdleConns); err == nil {
cfg.Database.MaxIdleConnections = m
}
}
if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
if d, err := time.ParseDuration(connMaxLifetime); err == nil {
cfg.Database.ConnMaxLifetime = d
}
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -604,3 +665,31 @@ func getDefaultConfigLocations() []string {
return locations
}
// SanitizedCopy returns a copy of the AppConfig with sensitive information removed
func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
// Deep copy via JSON marshal/unmarshal to avoid concurrent map access
data, err := json.Marshal(cfg)
if err != nil {
log.Printf("Failed to marshal config for sanitization: %v", err)
return AppConfig{}, err
}
var sanitized AppConfig
if err := json.Unmarshal(data, &sanitized); err != nil {
log.Printf("Failed to unmarshal config for sanitization: %v", err)
return AppConfig{}, err
}
// Clear sensitive information
sanitized.Auth.InferenceKeys = []string{}
sanitized.Auth.ManagementKeys = []string{}
// Clear API keys from nodes
for nodeName, node := range sanitized.Nodes {
node.APIKey = ""
sanitized.Nodes[nodeName] = node
}
return sanitized, nil
}

211
pkg/database/apikeys.go Normal file
View File

@@ -0,0 +1,211 @@
package database
import (
"context"
"database/sql"
"fmt"
"llamactl/pkg/auth"
"time"
)
// CreateKey inserts a new API key with permissions (transactional)
func (db *sqliteDB) CreateKey(ctx context.Context, key *auth.APIKey, permissions []auth.KeyPermission) error {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
}
defer tx.Rollback()
// Insert the API key
query := `
INSERT INTO api_keys (key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`
var expiresAt sql.NullInt64
if key.ExpiresAt != nil {
expiresAt = sql.NullInt64{Int64: *key.ExpiresAt, Valid: true}
}
result, err := tx.ExecContext(ctx, query,
key.KeyHash, key.Name, key.UserID, key.PermissionMode,
expiresAt, key.Enabled, key.CreatedAt, key.UpdatedAt,
)
if err != nil {
return fmt.Errorf("failed to insert API key: %w", err)
}
keyID, err := result.LastInsertId()
if err != nil {
return fmt.Errorf("failed to get last insert ID: %w", err)
}
key.ID = int(keyID)
// Insert permissions if per-instance mode
if key.PermissionMode == auth.PermissionModePerInstance {
for _, perm := range permissions {
query := `
INSERT INTO key_permissions (key_id, instance_id, can_infer)
VALUES (?, ?, ?)
`
_, err := tx.ExecContext(ctx, query, key.ID, perm.InstanceID, perm.CanInfer)
if err != nil {
return fmt.Errorf("failed to insert permission for instance %d: %w", perm.InstanceID, err)
}
}
}
return tx.Commit()
}
// GetKeyByID retrieves an API key by ID
func (db *sqliteDB) GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error) {
query := `
SELECT id, key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at, last_used_at
FROM api_keys
WHERE id = ?
`
var key auth.APIKey
var expiresAt sql.NullInt64
var lastUsedAt sql.NullInt64
err := db.QueryRowContext(ctx, query, id).Scan(
&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
&expiresAt, &key.Enabled, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
)
if err != nil {
if err == sql.ErrNoRows {
return nil, fmt.Errorf("API key not found")
}
return nil, fmt.Errorf("failed to query API key: %w", err)
}
if expiresAt.Valid {
key.ExpiresAt = &expiresAt.Int64
}
if lastUsedAt.Valid {
key.LastUsedAt = &lastUsedAt.Int64
}
return &key, nil
}
// GetUserKeys retrieves all API keys for a user
func (db *sqliteDB) GetUserKeys(ctx context.Context, userID string) ([]*auth.APIKey, error) {
query := `
SELECT id, key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at, last_used_at
FROM api_keys
WHERE user_id = ?
ORDER BY created_at DESC
`
rows, err := db.QueryContext(ctx, query, userID)
if err != nil {
return nil, fmt.Errorf("failed to query API keys: %w", err)
}
defer rows.Close()
var keys []*auth.APIKey
for rows.Next() {
var key auth.APIKey
var expiresAt sql.NullInt64
var lastUsedAt sql.NullInt64
err := rows.Scan(
&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
&expiresAt, &key.Enabled, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
)
if err != nil {
return nil, fmt.Errorf("failed to scan API key: %w", err)
}
if expiresAt.Valid {
key.ExpiresAt = &expiresAt.Int64
}
if lastUsedAt.Valid {
key.LastUsedAt = &lastUsedAt.Int64
}
keys = append(keys, &key)
}
return keys, nil
}
// GetActiveKeys retrieves all enabled, non-expired API keys
func (db *sqliteDB) GetActiveKeys(ctx context.Context) ([]*auth.APIKey, error) {
query := `
SELECT id, key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at, last_used_at
FROM api_keys
WHERE enabled = 1 AND (expires_at IS NULL OR expires_at > ?)
ORDER BY created_at DESC
`
now := time.Now().Unix()
rows, err := db.QueryContext(ctx, query, now)
if err != nil {
return nil, fmt.Errorf("failed to query active API keys: %w", err)
}
defer rows.Close()
var keys []*auth.APIKey
for rows.Next() {
var key auth.APIKey
var expiresAt sql.NullInt64
var lastUsedAt sql.NullInt64
err := rows.Scan(
&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
&expiresAt, &key.Enabled, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
)
if err != nil {
return nil, fmt.Errorf("failed to scan API key: %w", err)
}
if expiresAt.Valid {
key.ExpiresAt = &expiresAt.Int64
}
if lastUsedAt.Valid {
key.LastUsedAt = &lastUsedAt.Int64
}
keys = append(keys, &key)
}
return keys, nil
}
// DeleteKey removes an API key (cascades to permissions)
func (db *sqliteDB) DeleteKey(ctx context.Context, id int) error {
query := `DELETE FROM api_keys WHERE id = ?`
result, err := db.ExecContext(ctx, query, id)
if err != nil {
return fmt.Errorf("failed to delete API key: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("API key not found")
}
return nil
}
// TouchKey updates the last_used_at timestamp
func (db *sqliteDB) TouchKey(ctx context.Context, id int) error {
query := `UPDATE api_keys SET last_used_at = ?, updated_at = ? WHERE id = ?`
now := time.Now().Unix()
_, err := db.ExecContext(ctx, query, now, now, id)
if err != nil {
return fmt.Errorf("failed to update last used timestamp: %w", err)
}
return nil
}

121
pkg/database/database.go Normal file
View File

@@ -0,0 +1,121 @@
package database
import (
"context"
"database/sql"
"fmt"
"llamactl/pkg/auth"
"llamactl/pkg/instance"
"log"
"path/filepath"
"time"
_ "github.com/mattn/go-sqlite3"
)
// InstanceStore defines interface for instance persistence operations
type InstanceStore interface {
Save(inst *instance.Instance) error
Delete(name string) error
LoadAll() ([]*instance.Instance, error)
Close() error
}
// AuthStore defines the interface for authentication operations
type AuthStore interface {
CreateKey(ctx context.Context, key *auth.APIKey, permissions []auth.KeyPermission) error
GetUserKeys(ctx context.Context, userID string) ([]*auth.APIKey, error)
GetActiveKeys(ctx context.Context) ([]*auth.APIKey, error)
GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error)
DeleteKey(ctx context.Context, id int) error
TouchKey(ctx context.Context, id int) error
GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPermission, error)
HasPermission(ctx context.Context, keyID, instanceID int) (bool, error)
}
// Config contains database configuration settings
type Config struct {
// Database file path (relative to data_dir or absolute)
Path string
// Connection settings
MaxOpenConnections int
MaxIdleConnections int
ConnMaxLifetime time.Duration
}
// sqliteDB wraps database connection with configuration
type sqliteDB struct {
*sql.DB
config *Config
}
// Open creates a new database connection with provided configuration
func Open(config *Config) (*sqliteDB, error) {
if config == nil {
return nil, fmt.Errorf("database config cannot be nil")
}
if config.Path == "" {
return nil, fmt.Errorf("database path cannot be empty")
}
// Ensure that database directory exists
dbDir := filepath.Dir(config.Path)
if dbDir != "." && dbDir != "/" {
// Directory will be created by manager if auto_create_dirs is enabled
log.Printf("Database will be created at: %s", config.Path)
}
// Open SQLite database with proper options
// - _journal_mode=WAL: Write-Ahead Logging for better concurrency
// - _busy_timeout=5000: Wait up to 5 seconds if database is locked
// - _foreign_keys=1: Enable foreign key constraints
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=5000&_foreign_keys=1", config.Path)
sqlDB, err := sql.Open("sqlite3", dsn)
if err != nil {
return nil, fmt.Errorf("failed to open database: %w", err)
}
// Configure connection pool
if config.MaxOpenConnections > 0 {
sqlDB.SetMaxOpenConns(config.MaxOpenConnections)
}
if config.MaxIdleConnections > 0 {
sqlDB.SetMaxIdleConns(config.MaxIdleConnections)
}
if config.ConnMaxLifetime > 0 {
sqlDB.SetConnMaxLifetime(config.ConnMaxLifetime)
}
// Verify database connection
if err := sqlDB.Ping(); err != nil {
sqlDB.Close()
return nil, fmt.Errorf("failed to ping database: %w", err)
}
log.Printf("Database connection established: %s", config.Path)
return &sqliteDB{
DB: sqlDB,
config: config,
}, nil
}
// Close closes database connection
func (db *sqliteDB) Close() error {
if db.DB != nil {
log.Println("Closing database connection")
return db.DB.Close()
}
return nil
}
// HealthCheck verifies that database is accessible
func (db *sqliteDB) HealthCheck() error {
if db.DB == nil {
return fmt.Errorf("database connection is nil")
}
return db.DB.Ping()
}

319
pkg/database/instances.go Normal file
View File

@@ -0,0 +1,319 @@
package database
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"log"
"time"
)
// instanceRow represents a row in the instances table
type instanceRow struct {
ID int
Name string
Status string
CreatedAt int64
UpdatedAt int64
OptionsJSON string
OwnerUserID sql.NullString
}
// Create inserts a new instance into the database
func (db *sqliteDB) Create(ctx context.Context, inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("instance cannot be nil")
}
opts := inst.GetOptions()
if opts == nil {
return fmt.Errorf("instance options cannot be nil")
}
// Convert instance to database row
row, err := db.instanceToRow(inst)
if err != nil {
return fmt.Errorf("failed to convert instance to row: %w", err)
}
// Insert into database
query := `
INSERT INTO instances (
name, status, created_at, updated_at, options_json, owner_user_id
) VALUES (?, ?, ?, ?, ?, ?)
`
_, err = db.DB.ExecContext(ctx, query,
row.Name, row.Status, row.CreatedAt, row.UpdatedAt, row.OptionsJSON, row.OwnerUserID,
)
if err != nil {
return fmt.Errorf("failed to insert instance: %w", err)
}
return nil
}
// GetByName retrieves an instance by name
func (db *sqliteDB) GetByName(ctx context.Context, name string) (*instance.Instance, error) {
query := `
SELECT id, name, status, created_at, updated_at, options_json, owner_user_id
FROM instances
WHERE name = ?
`
var row instanceRow
err := db.DB.QueryRowContext(ctx, query, name).Scan(
&row.ID, &row.Name, &row.Status, &row.CreatedAt, &row.UpdatedAt, &row.OptionsJSON, &row.OwnerUserID,
)
if err == sql.ErrNoRows {
return nil, fmt.Errorf("instance not found: %s", name)
}
if err != nil {
return nil, fmt.Errorf("failed to query instance: %w", err)
}
return db.rowToInstance(&row)
}
// GetAll retrieves all instances from the database
func (db *sqliteDB) GetAll(ctx context.Context) ([]*instance.Instance, error) {
query := `
SELECT id, name, status, created_at, updated_at, options_json, owner_user_id
FROM instances
ORDER BY created_at ASC
`
rows, err := db.DB.QueryContext(ctx, query)
if err != nil {
return nil, fmt.Errorf("failed to query instances: %w", err)
}
defer rows.Close()
var instances []*instance.Instance
for rows.Next() {
var row instanceRow
err := rows.Scan(
&row.ID, &row.Name, &row.Status, &row.CreatedAt, &row.UpdatedAt, &row.OptionsJSON, &row.OwnerUserID,
)
if err != nil {
log.Printf("Failed to scan instance row: %v", err)
continue
}
inst, err := db.rowToInstance(&row)
if err != nil {
log.Printf("Failed to convert row to instance: %v", err)
continue
}
instances = append(instances, inst)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("error iterating rows: %w", err)
}
return instances, nil
}
// Update updates an existing instance
func (db *sqliteDB) Update(ctx context.Context, inst *instance.Instance) error {
if inst == nil {
return fmt.Errorf("instance cannot be nil")
}
opts := inst.GetOptions()
if opts == nil {
return fmt.Errorf("instance options cannot be nil")
}
// Convert instance to database row
row, err := db.instanceToRow(inst)
if err != nil {
return fmt.Errorf("failed to convert instance to row: %w", err)
}
// Update in database
query := `
UPDATE instances SET
status = ?, updated_at = ?, options_json = ?
WHERE name = ?
`
result, err := db.DB.ExecContext(ctx, query,
row.Status, row.UpdatedAt, row.OptionsJSON, row.Name,
)
if err != nil {
return fmt.Errorf("failed to update instance: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("instance not found: %s", inst.Name)
}
return nil
}
// UpdateStatus updates only the status of an instance (optimized operation)
func (db *sqliteDB) UpdateStatus(ctx context.Context, name string, status instance.Status) error {
// Convert status to string
statusJSON, err := status.MarshalJSON()
if err != nil {
return fmt.Errorf("failed to marshal status: %w", err)
}
var statusStr string
if err := json.Unmarshal(statusJSON, &statusStr); err != nil {
return fmt.Errorf("failed to unmarshal status string: %w", err)
}
query := `
UPDATE instances SET
status = ?,
updated_at = ?
WHERE name = ?
`
result, err := db.DB.ExecContext(ctx, query, statusStr, time.Now().Unix(), name)
if err != nil {
return fmt.Errorf("failed to update instance status: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("instance not found: %s", name)
}
return nil
}
// DeleteInstance removes an instance from the database
func (db *sqliteDB) DeleteInstance(ctx context.Context, name string) error {
query := `DELETE FROM instances WHERE name = ?`
result, err := db.DB.ExecContext(ctx, query, name)
if err != nil {
return fmt.Errorf("failed to delete instance: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("instance not found: %s", name)
}
return nil
}
// instanceToRow converts an Instance to a database row
func (db *sqliteDB) instanceToRow(inst *instance.Instance) (*instanceRow, error) {
opts := inst.GetOptions()
if opts == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
// Marshal options to JSON using the existing MarshalJSON method
optionsJSON, err := json.Marshal(opts)
if err != nil {
return nil, fmt.Errorf("failed to marshal options: %w", err)
}
// Convert status to string
statusJSON, err := inst.GetStatus().MarshalJSON()
if err != nil {
return nil, fmt.Errorf("failed to marshal status: %w", err)
}
var statusStr string
if err := json.Unmarshal(statusJSON, &statusStr); err != nil {
return nil, fmt.Errorf("failed to unmarshal status string: %w", err)
}
return &instanceRow{
Name: inst.Name,
Status: statusStr,
CreatedAt: inst.Created,
UpdatedAt: time.Now().Unix(),
OptionsJSON: string(optionsJSON),
}, nil
}
// rowToInstance converts a database row to an Instance
func (db *sqliteDB) rowToInstance(row *instanceRow) (*instance.Instance, error) {
// Unmarshal options from JSON using the existing UnmarshalJSON method
var opts instance.Options
if err := json.Unmarshal([]byte(row.OptionsJSON), &opts); err != nil {
return nil, fmt.Errorf("failed to unmarshal options: %w", err)
}
// Build complete instance JSON with all fields
instanceJSON, err := json.Marshal(map[string]any{
"name": row.Name,
"created": row.CreatedAt,
"status": row.Status,
"options": json.RawMessage(row.OptionsJSON),
})
if err != nil {
return nil, fmt.Errorf("failed to marshal instance: %w", err)
}
// Unmarshal into a complete Instance
var inst instance.Instance
if err := json.Unmarshal(instanceJSON, &inst); err != nil {
return nil, fmt.Errorf("failed to unmarshal instance: %w", err)
}
// The UnmarshalJSON doesn't handle BackendOptions and Nodes (they have json:"-" tags)
// So we need to explicitly set the options again to ensure they're properly set
inst.SetOptions(&opts)
return &inst, nil
}
// Database interface implementation
// Save saves an instance to the database (insert or update)
func (db *sqliteDB) Save(inst *instance.Instance) error {
ctx := context.Background()
// Try to get existing instance
existing, err := db.GetByName(ctx, inst.Name)
if err != nil {
// Instance doesn't exist, create it
return db.Create(ctx, inst)
}
// Instance exists, update it
if existing != nil {
return db.Update(ctx, inst)
}
return db.Create(ctx, inst)
}
// Delete removes an instance from the database
func (db *sqliteDB) Delete(name string) error {
ctx := context.Background()
return db.DeleteInstance(ctx, name)
}
// LoadAll loads all instances from the database
func (db *sqliteDB) LoadAll() ([]*instance.Instance, error) {
ctx := context.Background()
return db.GetAll(ctx)
}

View File

@@ -0,0 +1,78 @@
package database
import (
"embed"
"fmt"
"log"
"github.com/golang-migrate/migrate/v4"
"github.com/golang-migrate/migrate/v4/database/sqlite3"
"github.com/golang-migrate/migrate/v4/source/iofs"
)
//go:embed migrations/*.sql
var migrationFiles embed.FS
// RunMigrations applies all pending database migrations
func RunMigrations(db *sqliteDB) error {
if db == nil || db.DB == nil {
return fmt.Errorf("database connection is nil")
}
// Create migration source from embedded files
sourceDriver, err := iofs.New(migrationFiles, "migrations")
if err != nil {
return fmt.Errorf("failed to create migration source: %w", err)
}
// Create database driver for migrations
dbDriver, err := sqlite3.WithInstance(db.DB, &sqlite3.Config{})
if err != nil {
return fmt.Errorf("failed to create database driver: %w", err)
}
// Create migrator
migrator, err := migrate.NewWithInstance("iofs", sourceDriver, "sqlite3", dbDriver)
if err != nil {
return fmt.Errorf("failed to create migrator: %w", err)
}
// Get current version
currentVersion, dirty, err := migrator.Version()
if err != nil && err != migrate.ErrNilVersion {
return fmt.Errorf("failed to get current migration version: %w", err)
}
if dirty {
return fmt.Errorf("database is in dirty state at version %d - manual intervention required", currentVersion)
}
// Run migrations
log.Printf("Running database migrations (current version: %v)", currentVersionString(currentVersion, err))
if err := migrator.Up(); err != nil {
if err == migrate.ErrNoChange {
log.Println("Database schema is up to date")
return nil
}
return fmt.Errorf("failed to run migrations: %w", err)
}
// Get new version
newVersion, _, err := migrator.Version()
if err != nil {
log.Printf("Migrations completed (unable to determine new version: %v)", err)
} else {
log.Printf("Migrations completed successfully (new version: %d)", newVersion)
}
return nil
}
// currentVersionString returns a string representation of the current version
func currentVersionString(version uint, err error) string {
if err == migrate.ErrNilVersion {
return "none"
}
return fmt.Sprintf("%d", version)
}

View File

@@ -0,0 +1,11 @@
-- Drop API key related indexes and tables first
DROP INDEX IF EXISTS idx_key_permissions_instance_id;
DROP INDEX IF EXISTS idx_api_keys_expires_at;
DROP INDEX IF EXISTS idx_api_keys_user_id;
DROP TABLE IF EXISTS key_permissions;
DROP TABLE IF EXISTS api_keys;
-- Drop instance related indexes and tables
DROP INDEX IF EXISTS idx_instances_status;
DROP INDEX IF EXISTS idx_instances_name;
DROP TABLE IF EXISTS instances;

View File

@@ -0,0 +1,62 @@
-- -----------------------------------------------------------------------------
-- Instances Table: Central configuration and state for LLM backends
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS instances (
-- Primary identification
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
-- Instance state
status TEXT NOT NULL CHECK(status IN ('stopped', 'running', 'failed', 'restarting', 'shutting_down')) DEFAULT 'stopped',
-- Timestamps (created_at stored as Unix timestamp for compatibility with existing JSON format)
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
-- All instance options stored as a single JSON blob
options_json TEXT NOT NULL,
-- Future: OIDC user ID for ownership
owner_user_id TEXT NULL
);
-- -----------------------------------------------------------------------------
-- Indexes for performance
-- -----------------------------------------------------------------------------
CREATE UNIQUE INDEX IF NOT EXISTS idx_instances_name ON instances(name);
CREATE INDEX IF NOT EXISTS idx_instances_status ON instances(status);
-- -----------------------------------------------------------------------------
-- API Keys Table: Database-backed inference API keys
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS api_keys (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key_hash TEXT NOT NULL,
name TEXT NOT NULL,
user_id TEXT NOT NULL,
permission_mode TEXT NOT NULL CHECK(permission_mode IN ('allow_all', 'per_instance')) DEFAULT 'per_instance',
expires_at INTEGER NULL,
enabled INTEGER NOT NULL DEFAULT 1,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
last_used_at INTEGER NULL
);
-- -----------------------------------------------------------------------------
-- Key Permissions Table: Per-instance permissions for API keys
-- -----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS key_permissions (
key_id INTEGER NOT NULL,
instance_id INTEGER NOT NULL,
can_infer INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (key_id, instance_id),
FOREIGN KEY (key_id) REFERENCES api_keys (id) ON DELETE CASCADE,
FOREIGN KEY (instance_id) REFERENCES instances (id) ON DELETE CASCADE
);
-- -----------------------------------------------------------------------------
-- Indexes for API keys and permissions
-- -----------------------------------------------------------------------------
CREATE INDEX IF NOT EXISTS idx_api_keys_user_id ON api_keys(user_id);
CREATE INDEX IF NOT EXISTS idx_api_keys_expires_at ON api_keys(expires_at);
CREATE INDEX IF NOT EXISTS idx_key_permissions_instance_id ON key_permissions(instance_id);

View File

@@ -0,0 +1,57 @@
package database
import (
"context"
"database/sql"
"fmt"
"llamactl/pkg/auth"
)
// GetPermissions retrieves all permissions for a key
func (db *sqliteDB) GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPermission, error) {
query := `
SELECT key_id, instance_id, can_infer
FROM key_permissions
WHERE key_id = ?
ORDER BY instance_id
`
rows, err := db.QueryContext(ctx, query, keyID)
if err != nil {
return nil, fmt.Errorf("failed to query key permissions: %w", err)
}
defer rows.Close()
var permissions []auth.KeyPermission
for rows.Next() {
var perm auth.KeyPermission
err := rows.Scan(&perm.KeyID, &perm.InstanceID, &perm.CanInfer)
if err != nil {
return nil, fmt.Errorf("failed to scan key permission: %w", err)
}
permissions = append(permissions, perm)
}
return permissions, nil
}
// HasPermission checks if key has inference permission for instance
func (db *sqliteDB) HasPermission(ctx context.Context, keyID, instanceID int) (bool, error) {
query := `
SELECT can_infer
FROM key_permissions
WHERE key_id = ? AND instance_id = ?
`
var canInfer bool
err := db.QueryRowContext(ctx, query, keyID, instanceID).Scan(&canInfer)
if err != nil {
if err == sql.ErrNoRows {
// No permission record found, deny access
return false, nil
}
return false, fmt.Errorf("failed to check key permission: %w", err)
}
return canInfer, nil
}

View File

@@ -5,14 +5,15 @@ import (
"fmt"
"llamactl/pkg/config"
"log"
"net/http/httputil"
"net/http"
"time"
)
// Instance represents a running instance of the llama server
// Instance represents a running instance of llama server
type Instance struct {
ID int `json:"id"`
Name string `json:"name"`
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
Created int64 `json:"created,omitempty"` // Unix timestamp when instance was created
// Global configuration
globalInstanceSettings *config.InstancesConfig
@@ -48,6 +49,7 @@ func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusCha
options := newOptions(opts)
instance := &Instance{
ID: 0, // Will be set by database
Name: name,
options: options,
globalInstanceSettings: globalInstanceSettings,
@@ -182,15 +184,6 @@ func (i *Instance) GetPort() int {
return i.options.GetPort()
}
// GetProxy returns the reverse proxy for this instance
func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
if i.proxy == nil {
return nil, fmt.Errorf("instance %s has no proxy component", i.Name)
}
return i.proxy.get()
}
func (i *Instance) IsRemote() bool {
opts := i.GetOptions()
if opts == nil {
@@ -242,13 +235,29 @@ func (i *Instance) ShouldTimeout() bool {
return i.proxy.shouldTimeout()
}
// GetInflightRequests returns the current number of inflight requests
func (i *Instance) GetInflightRequests() int32 {
if i.proxy == nil {
return 0
}
return i.proxy.getInflightRequests()
}
// ServeHTTP serves HTTP requests through the proxy with request tracking and shutdown handling
func (i *Instance) ServeHTTP(w http.ResponseWriter, r *http.Request) error {
if i.proxy == nil {
return fmt.Errorf("instance %s has no proxy component", i.Name)
}
return i.proxy.serveHTTP(w, r)
}
func (i *Instance) getCommand() string {
opts := i.GetOptions()
if opts == nil {
return ""
}
return opts.BackendOptions.GetCommand(i.globalBackendSettings)
return opts.BackendOptions.GetCommand(i.globalBackendSettings, opts.DockerEnabled, opts.CommandOverride)
}
func (i *Instance) buildCommandArgs() []string {
@@ -257,7 +266,7 @@ func (i *Instance) buildCommandArgs() []string {
return nil
}
return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings)
return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings, opts.DockerEnabled)
}
func (i *Instance) buildEnvironment() map[string]string {
@@ -266,29 +275,23 @@ func (i *Instance) buildEnvironment() map[string]string {
return nil
}
return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.Environment)
return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.DockerEnabled, opts.Environment)
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Instance) MarshalJSON() ([]byte, error) {
// Get options
opts := i.GetOptions()
// Determine if docker is enabled for this instance's backend
dockerEnabled := opts.BackendOptions.IsDockerEnabled(i.globalBackendSettings)
return json.Marshal(&struct {
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
Options *options `json:"options,omitempty"`
DockerEnabled bool `json:"docker_enabled,omitempty"`
ID int `json:"id"`
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
Options *options `json:"options,omitempty"`
}{
Name: i.Name,
Status: i.status,
Created: i.Created,
Options: i.options,
DockerEnabled: dockerEnabled,
ID: i.ID,
Name: i.Name,
Status: i.status,
Created: i.Created,
Options: i.options,
})
}
@@ -296,6 +299,7 @@ func (i *Instance) MarshalJSON() ([]byte, error) {
func (i *Instance) UnmarshalJSON(data []byte) error {
// Explicitly deserialize to match MarshalJSON format
aux := &struct {
ID int `json:"id"`
Name string `json:"name"`
Status *status `json:"status"`
Created int64 `json:"created,omitempty"`
@@ -307,6 +311,7 @@ func (i *Instance) UnmarshalJSON(data []byte) error {
}
// Set the fields
i.ID = aux.ID
i.Name = aux.Name
i.Created = aux.Created
i.status = aux.Status

View File

@@ -171,64 +171,6 @@ func TestSetOptions(t *testing.T) {
}
}
func TestGetProxy(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "llama-server",
Args: []string{},
},
MLX: config.BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
},
VLLM: config.BackendSettings{
Command: "vllm",
Args: []string{"serve"},
},
},
Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
},
Nodes: map[string]config.NodeConfig{},
LocalNode: "main",
}
options := &instance.Options{
Nodes: map[string]struct{}{"main": {}},
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
inst := instance.New("test-instance", globalConfig, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
globalConfig := &config.AppConfig{
Backends: config.BackendConfig{
@@ -613,11 +555,6 @@ func TestRemoteInstanceOperations(t *testing.T) {
t.Error("Expected error when restarting remote instance")
}
// GetProxy should fail for remote instance
if _, err := inst.GetProxy(); err != nil {
t.Error("Expected no error when getting proxy for remote instance")
}
// GetLogs should fail for remote instance
if _, err := inst.GetLogs(10); err == nil {
t.Error("Expected error when getting logs for remote instance")

View File

@@ -7,13 +7,14 @@ import (
"os"
"strings"
"sync"
"sync/atomic"
"time"
)
type logger struct {
name string
logDir string
logFile *os.File
logFile atomic.Pointer[os.File]
logFilePath string
mu sync.RWMutex
}
@@ -47,11 +48,11 @@ func (i *logger) create() error {
return fmt.Errorf("failed to create stdout log file: %w", err)
}
i.logFile = logFile
i.logFile.Store(logFile)
// Write a startup marker to both files
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
fmt.Fprintf(logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
return nil
}
@@ -102,11 +103,12 @@ func (i *logger) close() {
i.mu.Lock()
defer i.mu.Unlock()
if i.logFile != nil {
logFile := i.logFile.Swap(nil)
if logFile != nil {
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
i.logFile.Close()
i.logFile = nil
fmt.Fprintf(logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
logFile.Sync() // Ensure all buffered data is written to disk
logFile.Close()
}
}
@@ -117,9 +119,9 @@ func (i *logger) readOutput(reader io.ReadCloser) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if i.logFile != nil {
fmt.Fprintln(i.logFile, line)
i.logFile.Sync() // Ensure data is written to disk
// Use atomic load to avoid lock contention on every line
if logFile := i.logFile.Load(); logFile != nil {
fmt.Fprintln(logFile, line)
}
}
}

View File

@@ -5,7 +5,9 @@ import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/validation"
"log"
"maps"
"slices"
"sync"
)
@@ -22,6 +24,11 @@ type Options struct {
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
// Environment variables
Environment map[string]string `json:"environment,omitempty"`
// Execution context overrides
DockerEnabled *bool `json:"docker_enabled,omitempty"`
CommandOverride string `json:"command_override,omitempty"`
// Assigned nodes
Nodes map[string]struct{} `json:"-"`
// Backend options
@@ -138,15 +145,25 @@ func (c *Options) UnmarshalJSON(data []byte) error {
// MarshalJSON implements custom JSON marshaling for Options
func (c *Options) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias Options
aux := struct {
// Make a copy of the struct
temp := *c
// Copy environment map to avoid concurrent access issues
if temp.Environment != nil {
envCopy := make(map[string]string, len(temp.Environment))
maps.Copy(envCopy, temp.Environment)
temp.Environment = envCopy
}
aux := &struct {
Nodes []string `json:"nodes,omitempty"` // Output as JSON array
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
*Alias
}{
Alias: (*Alias)(c),
Alias: (*Alias)(&temp),
}
// Convert nodes map to array (sorted for consistency)
@@ -163,13 +180,12 @@ func (c *Options) MarshalJSON() ([]byte, error) {
aux.BackendType = c.BackendOptions.BackendType
// Marshal the backends.Options struct to get the properly formatted backend options
// Marshal a pointer to trigger the pointer receiver MarshalJSON method
backendData, err := json.Marshal(&c.BackendOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal backend options: %w", err)
}
// Unmarshal into a temporary struct to extract the backend_options map
// Unmarshal into a new temporary map to extract the backend_options
var tempBackend struct {
BackendOptions map[string]any `json:"backend_options,omitempty"`
}
@@ -200,6 +216,28 @@ func (c *Options) validateAndApplyDefaults(name string, globalSettings *config.I
*c.IdleTimeout = 0
}
// Validate docker_enabled and command_override relationship
if c.DockerEnabled != nil && *c.DockerEnabled && c.CommandOverride != "" {
log.Printf("Instance %s: command_override cannot be set when docker_enabled is true, ignoring command_override", name)
c.CommandOverride = "" // Clear invalid configuration
}
// Validate command_override if set
if c.CommandOverride != "" {
if err := validation.ValidateStringForInjection(c.CommandOverride); err != nil {
log.Printf("Instance %s: invalid command_override: %v, clearing value", name, err)
c.CommandOverride = "" // Clear invalid value
}
}
// Validate docker_enabled for MLX backend
if c.BackendOptions.BackendType == backends.BackendTypeMlxLm {
if c.DockerEnabled != nil && *c.DockerEnabled {
log.Printf("Instance %s: docker_enabled is not supported for MLX backend, ignoring", name)
c.DockerEnabled = nil // Clear invalid configuration
}
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {

View File

@@ -132,14 +132,28 @@ func (p *process) stop() error {
p.restartCancel = nil
}
// Set status to stopped first to signal intentional stop
p.instance.SetStatus(Stopped)
// Set status to ShuttingDown first to reject new requests
p.instance.SetStatus(ShuttingDown)
// Get the monitor done channel before releasing the lock
monitorDone := p.monitorDone
p.mu.Unlock()
// Wait for inflight requests to complete (max 30 seconds)
log.Printf("Instance %s shutting down, waiting for inflight requests to complete...", p.instance.Name)
deadline := time.Now().Add(30 * time.Second)
for time.Now().Before(deadline) {
inflight := p.instance.GetInflightRequests()
if inflight == 0 {
break
}
time.Sleep(100 * time.Millisecond)
}
// Now set status to stopped to signal intentional stop
p.instance.SetStatus(Stopped)
// Stop the process with SIGINT if cmd exists
if p.cmd != nil && p.cmd.Process != nil {
if err := p.cmd.Process.Signal(syscall.SIGINT); err != nil {
@@ -156,6 +170,7 @@ func (p *process) stop() error {
select {
case <-monitorDone:
// Process exited normally
log.Printf("Instance %s shut down gracefully", p.instance.Name)
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if p.cmd != nil && p.cmd.Process != nil {

View File

@@ -37,8 +37,9 @@ type proxy struct {
proxyOnce sync.Once
proxyErr error
lastRequestTime atomic.Int64
timeProvider TimeProvider
lastRequestTime atomic.Int64
inflightRequests atomic.Int32
timeProvider TimeProvider
}
// newProxy creates a new Proxy for the given instance
@@ -153,6 +154,23 @@ func (p *proxy) build() (*httputil.ReverseProxy, error) {
return proxy, nil
}
// serveHTTP handles HTTP requests with inflight tracking
func (p *proxy) serveHTTP(w http.ResponseWriter, r *http.Request) error {
// Get the reverse proxy
reverseProxy, err := p.get()
if err != nil {
return err
}
// Track inflight requests
p.incInflightRequests()
defer p.decInflightRequests()
// Serve the request
reverseProxy.ServeHTTP(w, r)
return nil
}
// clear resets the proxy, allowing it to be recreated when options change.
func (p *proxy) clear() {
p.mu.Lock()
@@ -160,7 +178,7 @@ func (p *proxy) clear() {
p.proxy = nil
p.proxyErr = nil
p.proxyOnce = sync.Once{} // Reset Once for next GetProxy call
p.proxyOnce = sync.Once{}
}
// updateLastRequestTime updates the last request access time for the instance
@@ -199,3 +217,18 @@ func (p *proxy) shouldTimeout() bool {
func (p *proxy) setTimeProvider(tp TimeProvider) {
p.timeProvider = tp
}
// incInflightRequests increments the inflight request counter
func (p *proxy) incInflightRequests() {
p.inflightRequests.Add(1)
}
// decInflightRequests decrements the inflight request counter
func (p *proxy) decInflightRequests() {
p.inflightRequests.Add(-1)
}
// getInflightRequests returns the current number of inflight requests
func (p *proxy) getInflightRequests() int32 {
return p.inflightRequests.Load()
}

View File

@@ -14,20 +14,23 @@ const (
Running
Failed
Restarting
ShuttingDown
)
var nameToStatus = map[string]Status{
"stopped": Stopped,
"running": Running,
"failed": Failed,
"restarting": Restarting,
"stopped": Stopped,
"running": Running,
"failed": Failed,
"restarting": Restarting,
"shutting_down": ShuttingDown,
}
var statusToName = map[Status]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
Restarting: "restarting",
Stopped: "stopped",
Running: "running",
Failed: "failed",
Restarting: "restarting",
ShuttingDown: "shutting_down",
}
// Status enum JSON marshaling methods

View File

@@ -10,7 +10,7 @@ import (
)
func TestInstanceTimeoutLogic(t *testing.T) {
testManager := createTestManager()
testManager := createTestManager(t)
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
@@ -42,7 +42,7 @@ func TestInstanceTimeoutLogic(t *testing.T) {
}
func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
testManager := createTestManager()
testManager := createTestManager(t)
defer testManager.Shutdown()
noTimeoutInst := createInstanceWithTimeout(t, testManager, "no-timeout-test", "/path/to/model.gguf", nil)
@@ -64,7 +64,7 @@ func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
defer manager.Shutdown()
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
@@ -121,7 +121,7 @@ func TestEvictLRUInstance_Success(t *testing.T) {
}
func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
defer manager.Shutdown()
err := manager.EvictLRUInstance()
@@ -134,7 +134,7 @@ func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
}
func TestEvictLRUInstance_OnlyEvictsTimeoutEnabledInstances(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"log"
"sync"
@@ -28,11 +29,11 @@ type InstanceManager interface {
type instanceManager struct {
// Components (each with own synchronization)
registry *instanceRegistry
ports *portAllocator
persistence *instancePersister
remote *remoteManager
lifecycle *lifecycleManager
registry *instanceRegistry
ports *portAllocator
db database.InstanceStore
remote *remoteManager
lifecycle *lifecycleManager
// Configuration
globalConfig *config.AppConfig
@@ -42,8 +43,8 @@ type instanceManager struct {
shutdownOnce sync.Once
}
// New creates a new instance of InstanceManager.
func New(globalConfig *config.AppConfig) InstanceManager {
// New creates a new instance of InstanceManager with dependency injection.
func New(globalConfig *config.AppConfig, db database.InstanceStore) InstanceManager {
if globalConfig.Instances.TimeoutCheckInterval <= 0 {
globalConfig.Instances.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
@@ -54,16 +55,7 @@ func New(globalConfig *config.AppConfig) InstanceManager {
// Initialize port allocator
portRange := globalConfig.Instances.PortRange
ports, err := newPortAllocator(portRange[0], portRange[1])
if err != nil {
log.Fatalf("Failed to create port allocator: %v", err)
}
// Initialize persistence
persistence, err := newInstancePersister(globalConfig.Instances.InstancesDir)
if err != nil {
log.Fatalf("Failed to create instance persister: %v", err)
}
ports := newPortAllocator(portRange[0], portRange[1])
// Initialize remote manager
remote := newRemoteManager(globalConfig.Nodes, 30*time.Second)
@@ -72,7 +64,7 @@ func New(globalConfig *config.AppConfig) InstanceManager {
im := &instanceManager{
registry: registry,
ports: ports,
persistence: persistence,
db: db,
remote: remote,
globalConfig: globalConfig,
}
@@ -92,9 +84,9 @@ func New(globalConfig *config.AppConfig) InstanceManager {
return im
}
// persistInstance saves an instance using the persistence component
// persistInstance saves an instance using the persistence layer
func (im *instanceManager) persistInstance(inst *instance.Instance) error {
return im.persistence.save(inst)
return im.db.Save(inst)
}
func (im *instanceManager) Shutdown() {
@@ -116,19 +108,24 @@ func (im *instanceManager) Shutdown() {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", inst.Name)
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", inst.Name, err)
log.Printf("Error stopping instance %s: %v\n", inst.Name, err)
}
}(inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
// 4. Close database connection
if err := im.db.Close(); err != nil {
log.Printf("Error closing database: %v\n", err)
}
})
}
// loadInstances restores all instances from disk using the persistence component
// loadInstances restores all instances from the persistence layer
func (im *instanceManager) loadInstances() error {
// Load all instances from persistence
instances, err := im.persistence.loadAll()
instances, err := im.db.LoadAll()
if err != nil {
return fmt.Errorf("failed to load instances: %w", err)
}
@@ -262,7 +259,7 @@ func (im *instanceManager) autoStartInstances() {
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.Status) {
func (im *instanceManager) onStatusChange(name string, _, newStatus instance.Status) {
if newStatus == instance.Running {
im.registry.markRunning(name)
} else {

View File

@@ -4,20 +4,34 @@ import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"os"
"path/filepath"
"sync"
"testing"
"time"
)
func TestManager_PersistsAndLoadsInstances(t *testing.T) {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
// Use file-based database for this test since we need to persist across connections
appConfig.Database.Path = tempDir + "/test.db"
// Create instance and check file was created
manager1 := manager.New(appConfig)
// Create instance and check database was created
db1, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db1); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
manager1 := manager.New(appConfig, db1)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
@@ -28,18 +42,28 @@ func TestManager_PersistsAndLoadsInstances(t *testing.T) {
},
}
_, err := manager1.CreateInstance("test-instance", options)
_, err = manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
expectedPath := filepath.Join(tempDir, "test-instance.json")
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Errorf("Expected persistence file %s to exist", expectedPath)
}
// Shutdown first manager to close database connection
manager1.Shutdown()
// Load instances from disk
manager2 := manager.New(appConfig)
// Load instances from database
db2, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db2); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
manager2 := manager.New(appConfig, db2)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
@@ -50,13 +74,29 @@ func TestManager_PersistsAndLoadsInstances(t *testing.T) {
if instances[0].Name != "test-instance" {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
}
manager2.Shutdown()
}
func TestDeleteInstance_RemovesPersistenceFile(t *testing.T) {
func TestDeleteInstance_RemovesFromDatabase(t *testing.T) {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
mgr := manager.New(appConfig)
db, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
mgr := manager.New(appConfig, db)
defer mgr.Shutdown()
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
@@ -67,25 +107,38 @@ func TestDeleteInstance_RemovesPersistenceFile(t *testing.T) {
},
}
_, err := mgr.CreateInstance("test-instance", options)
_, err = mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
expectedPath := filepath.Join(tempDir, "test-instance.json")
// Verify instance exists
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Fatalf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = mgr.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
if _, err := os.Stat(expectedPath); !os.IsNotExist(err) {
t.Error("Expected persistence file to be deleted")
// Verify instance was deleted from database
instances, err = mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected 0 instances after deletion, got %d", len(instances))
}
}
func TestConcurrentAccess(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
// Test concurrent operations
@@ -113,7 +166,7 @@ func TestConcurrentAccess(t *testing.T) {
}
// Concurrent list operations
for i := 0; i < 3; i++ {
for range 3 {
wg.Add(1)
go func() {
defer wg.Done()
@@ -134,16 +187,17 @@ func TestConcurrentAccess(t *testing.T) {
// Helper functions for test configuration
func createTestAppConfig(instancesDir string) *config.AppConfig {
// Use 'sleep' as a test command instead of 'llama-server'
// This allows tests to run in CI environments without requiring actual LLM binaries
// The sleep command will be invoked with model paths and other args, which it ignores
// Use 'sh -c "sleep 999999"' as a test command instead of 'llama-server'
// The shell ignores all additional arguments passed after the command
return &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "sleep",
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
MLX: config.BackendSettings{
Command: "sleep",
Command: "sh",
Args: []string{"-c", "sleep 999999"},
},
},
Instances: config.InstancesConfig{
@@ -157,33 +211,31 @@ func createTestAppConfig(instancesDir string) *config.AppConfig {
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
},
Database: config.DatabaseConfig{
Path: ":memory:",
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
}
func createTestManager() manager.InstanceManager {
appConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
Command: "sleep",
},
MLX: config.BackendSettings{
Command: "sleep",
},
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
MaxRunningInstances: 10,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
func createTestManager(t *testing.T) manager.InstanceManager {
tempDir := t.TempDir()
appConfig := createTestAppConfig(tempDir)
db, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
return manager.New(appConfig)
if err := database.RunMigrations(db); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
return manager.New(appConfig, db)
}

View File

@@ -317,9 +317,9 @@ func (im *instanceManager) DeleteInstance(name string) error {
im.remote.removeInstance(name)
im.registry.remove(name)
// Delete the instance's persistence file
if err := im.persistence.delete(name); err != nil {
return fmt.Errorf("failed to delete config file for remote instance %s: %w", name, err)
// Delete the instance's persistence
if err := im.db.Delete(name); err != nil {
return fmt.Errorf("failed to delete remote instance %s: %w", name, err)
}
return nil
@@ -330,7 +330,8 @@ func (im *instanceManager) DeleteInstance(name string) error {
lock.Lock()
defer im.unlockAndCleanup(name)
if inst.IsRunning() {
status := inst.GetStatus()
if status == instance.Running || status == instance.Restarting {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
@@ -342,9 +343,9 @@ func (im *instanceManager) DeleteInstance(name string) error {
return fmt.Errorf("failed to remove instance from registry: %w", err)
}
// Delete persistence file
if err := im.persistence.delete(name); err != nil {
return fmt.Errorf("failed to delete config file for instance %s: %w", name, err)
// Delete from persistence
if err := im.db.Delete(name); err != nil {
return fmt.Errorf("failed to delete instance from persistence %s: %w", name, err)
}
return nil

View File

@@ -3,14 +3,16 @@ package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
"time"
)
func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
mngr := createTestManager()
mngr := createTestManager(t)
options := &instance.Options{
BackendOptions: backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
@@ -36,6 +38,7 @@ func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
}
func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
tempDir := t.TempDir()
appConfig := &config.AppConfig{
Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{
@@ -44,13 +47,32 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
},
Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
},
Database: config.DatabaseConfig{
Path: ":memory:",
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
LocalNode: "main",
Nodes: map[string]config.NodeConfig{},
}
limitedManager := manager.New(appConfig)
db, err := database.Open(&database.Config{
Path: appConfig.Database.Path,
MaxOpenConnections: appConfig.Database.MaxOpenConnections,
MaxIdleConnections: appConfig.Database.MaxIdleConnections,
ConnMaxLifetime: appConfig.Database.ConnMaxLifetime,
})
if err != nil {
t.Fatalf("Failed to open database: %v", err)
}
if err := database.RunMigrations(db); err != nil {
t.Fatalf("Failed to run migrations: %v", err)
}
limitedManager := manager.New(appConfig, db)
options := &instance.Options{
BackendOptions: backends.Options{
@@ -61,7 +83,7 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
},
}
_, err := limitedManager.CreateInstance("instance1", options)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
@@ -77,7 +99,7 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
}
func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
options1 := &instance.Options{
BackendOptions: backends.Options{
@@ -115,7 +137,7 @@ func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
}
func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
manager := createTestManager()
manager := createTestManager(t)
options := &instance.Options{
BackendOptions: backends.Options{
@@ -143,7 +165,7 @@ func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
}
func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
@@ -155,15 +177,13 @@ func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
},
}
_, err := mgr.CreateInstance("test-instance", options)
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
_, err = mgr.StartInstance("test-instance")
if err != nil {
t.Fatalf("StartInstance failed: %v", err)
}
// Simulate starting the instance
inst.SetStatus(instance.Running)
// Should fail to delete running instance
err = mgr.DeleteInstance("test-instance")
@@ -173,7 +193,7 @@ func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
}
func TestUpdateInstance(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{
@@ -186,14 +206,14 @@ func TestUpdateInstance(t *testing.T) {
},
}
_, err := mgr.CreateInstance("test-instance", options)
inst, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
_, err = mgr.StartInstance("test-instance")
if err != nil {
t.Fatalf("StartInstance failed: %v", err)
// Start the instance (will use 'yes' command from test config)
if err := inst.Start(); err != nil {
t.Fatalf("Failed to start instance: %v", err)
}
// Update running instance with new model
@@ -212,9 +232,9 @@ func TestUpdateInstance(t *testing.T) {
t.Fatalf("UpdateInstance failed: %v", err)
}
// Should still be running after update
// Should be running after update (was running before, should be restarted)
if !updated.IsRunning() {
t.Error("Instance should be running after update")
t.Errorf("Instance should be running after update, got: %v", updated.GetStatus())
}
if updated.GetOptions().BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
@@ -223,7 +243,7 @@ func TestUpdateInstance(t *testing.T) {
}
func TestUpdateInstance_ReleasesOldPort(t *testing.T) {
mgr := createTestManager()
mgr := createTestManager(t)
defer mgr.Shutdown()
options := &instance.Options{

View File

@@ -1,223 +0,0 @@
package manager
import (
"encoding/json"
"fmt"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
)
// instancePersister provides atomic file-based persistence with durability guarantees.
type instancePersister struct {
mu sync.Mutex
instancesDir string
enabled bool
}
// newInstancePersister creates a new instance persister.
// If instancesDir is empty, persistence is disabled.
func newInstancePersister(instancesDir string) (*instancePersister, error) {
if instancesDir == "" {
return &instancePersister{
enabled: false,
}, nil
}
// Ensure the instances directory exists
if err := os.MkdirAll(instancesDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create instances directory: %w", err)
}
return &instancePersister{
instancesDir: instancesDir,
enabled: true,
}, nil
}
// Save persists an instance to disk with atomic write
func (p *instancePersister) save(inst *instance.Instance) error {
if !p.enabled {
return nil
}
if inst == nil {
return fmt.Errorf("cannot save nil instance")
}
// Validate instance name to prevent path traversal
validatedName, err := p.validateInstanceName(inst.Name)
if err != nil {
return err
}
p.mu.Lock()
defer p.mu.Unlock()
instancePath := filepath.Join(p.instancesDir, validatedName+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(inst, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", inst.Name, err)
}
// Create temporary file
tempFile, err := os.OpenFile(tempPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return fmt.Errorf("failed to create temp file for instance %s: %w", inst.Name, err)
}
// Write data to temporary file
if _, err := tempFile.Write(jsonData); err != nil {
tempFile.Close()
os.Remove(tempPath)
return fmt.Errorf("failed to write temp file for instance %s: %w", inst.Name, err)
}
// Sync to disk before rename to ensure durability
if err := tempFile.Sync(); err != nil {
tempFile.Close()
os.Remove(tempPath)
return fmt.Errorf("failed to sync temp file for instance %s: %w", inst.Name, err)
}
// Close the file
if err := tempFile.Close(); err != nil {
os.Remove(tempPath)
return fmt.Errorf("failed to close temp file for instance %s: %w", inst.Name, err)
}
// Atomic rename (this is atomic on POSIX systems)
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath)
return fmt.Errorf("failed to rename temp file for instance %s: %w", inst.Name, err)
}
return nil
}
// Delete removes an instance's persistence file from disk.
func (p *instancePersister) delete(name string) error {
if !p.enabled {
return nil
}
validatedName, err := p.validateInstanceName(name)
if err != nil {
return err
}
p.mu.Lock()
defer p.mu.Unlock()
instancePath := filepath.Join(p.instancesDir, validatedName+".json")
if err := os.Remove(instancePath); err != nil {
if os.IsNotExist(err) {
// Not an error if file doesn't exist
return nil
}
return fmt.Errorf("failed to delete instance file for %s: %w", name, err)
}
return nil
}
// LoadAll loads all persisted instances from disk.
// Returns a slice of instances and any errors encountered during loading.
func (p *instancePersister) loadAll() ([]*instance.Instance, error) {
if !p.enabled {
return nil, nil
}
p.mu.Lock()
defer p.mu.Unlock()
// Check if instances directory exists
if _, err := os.Stat(p.instancesDir); os.IsNotExist(err) {
return nil, nil // No instances directory, return empty list
}
// Read all JSON files from instances directory
files, err := os.ReadDir(p.instancesDir)
if err != nil {
return nil, fmt.Errorf("failed to read instances directory: %w", err)
}
instances := make([]*instance.Instance, 0)
var loadErrors []string
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(p.instancesDir, file.Name())
inst, err := p.loadInstanceFile(instanceName, instancePath)
if err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
loadErrors = append(loadErrors, fmt.Sprintf("%s: %v", instanceName, err))
continue
}
instances = append(instances, inst)
}
if len(loadErrors) > 0 {
log.Printf("Loaded %d instances with %d errors", len(instances), len(loadErrors))
} else if len(instances) > 0 {
log.Printf("Loaded %d instances from persistence", len(instances))
}
return instances, nil
}
// loadInstanceFile is an internal helper that loads a single instance file.
// Note: This assumes the mutex is already held by the caller.
func (p *instancePersister) loadInstanceFile(name, path string) (*instance.Instance, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read instance file: %w", err)
}
var inst instance.Instance
if err := json.Unmarshal(data, &inst); err != nil {
return nil, fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if inst.Name != name {
return nil, fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, inst.Name)
}
return &inst, nil
}
// validateInstanceName ensures the instance name is safe for filesystem operations.
// Returns the validated name if valid, or an error if invalid.
func (p *instancePersister) validateInstanceName(name string) (string, error) {
if name == "" {
return "", fmt.Errorf("instance name cannot be empty")
}
// Check for path separators and parent directory references
// This prevents path traversal attacks
if strings.Contains(name, "/") || strings.Contains(name, "\\") || strings.Contains(name, "..") {
return "", fmt.Errorf("invalid instance name: %s (cannot contain path separators or '..')", name)
}
// Additional check: ensure the name doesn't start with a dot (hidden files)
// or contain any other suspicious characters
if strings.HasPrefix(name, ".") {
return "", fmt.Errorf("invalid instance name: %s (cannot start with '.')", name)
}
return name, nil
}

View File

@@ -24,15 +24,7 @@ type portAllocator struct {
}
// newPortAllocator creates a new port allocator for the given port range.
// Returns an error if the port range is invalid.
func newPortAllocator(minPort, maxPort int) (*portAllocator, error) {
if minPort <= 0 || maxPort <= 0 {
return nil, fmt.Errorf("invalid port range: min=%d, max=%d (must be > 0)", minPort, maxPort)
}
if minPort > maxPort {
return nil, fmt.Errorf("invalid port range: min=%d > max=%d", minPort, maxPort)
}
func newPortAllocator(minPort, maxPort int) *portAllocator {
rangeSize := maxPort - minPort + 1
bitmapSize := (rangeSize + 63) / 64 // Round up to nearest uint64
@@ -42,7 +34,7 @@ func newPortAllocator(minPort, maxPort int) (*portAllocator, error) {
minPort: minPort,
maxPort: maxPort,
rangeSize: rangeSize,
}, nil
}
}
// allocate finds and allocates the first available port for the given instance.

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"llamactl/pkg/validation"
@@ -52,20 +53,25 @@ type Handler struct {
InstanceManager manager.InstanceManager
cfg config.AppConfig
httpClient *http.Client
authStore database.AuthStore
authMiddleware *APIAuthMiddleware
}
// NewHandler creates a new Handler instance with the provided instance manager and configuration
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{
func NewHandler(im manager.InstanceManager, cfg config.AppConfig, authStore database.AuthStore) *Handler {
handler := &Handler{
InstanceManager: im,
cfg: cfg,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
authStore: authStore,
}
handler.authMiddleware = NewAPIAuthMiddleware(cfg.Auth, authStore)
return handler
}
// getInstance retrieves an instance by name from the request query parameters
// getInstance retrieves an instance by name from request query parameters
func (h *Handler) getInstance(r *http.Request) (*instance.Instance, error) {
name := chi.URLParam(r, "name")
validatedName, err := validation.ValidateInstanceName(name)
@@ -81,7 +87,7 @@ func (h *Handler) getInstance(r *http.Request) (*instance.Instance, error) {
return inst, nil
}
// ensureInstanceRunning ensures the instance is running by starting it if on-demand start is enabled
// ensureInstanceRunning ensures that an instance is running by starting it if on-demand start is enabled
// It handles LRU eviction when the maximum number of running instances is reached
func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
options := inst.GetOptions()

369
pkg/server/handlers_auth.go Normal file
View File

@@ -0,0 +1,369 @@
package server
import (
"encoding/json"
"fmt"
"llamactl/pkg/auth"
"net/http"
"strconv"
"time"
"github.com/go-chi/chi/v5"
)
// InstancePermission defines the permissions for an API key on a specific instance.
type InstancePermission struct {
InstanceID int `json:"instance_id"`
CanInfer bool `json:"can_infer"`
}
// CreateKeyRequest represents the request body for creating a new API key.
type CreateKeyRequest struct {
Name string
PermissionMode auth.PermissionMode
ExpiresAt *int64
InstancePermissions []InstancePermission
}
// CreateKeyResponse represents the response returned when creating a new API key.
type CreateKeyResponse struct {
ID int `json:"id"`
Name string `json:"name"`
UserID string `json:"user_id"`
PermissionMode auth.PermissionMode `json:"permission_mode"`
ExpiresAt *int64 `json:"expires_at"`
Enabled bool `json:"enabled"`
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
LastUsedAt *int64 `json:"last_used_at"`
Key string `json:"key"`
}
// KeyResponse represents an API key in responses for list and get operations.
type KeyResponse struct {
ID int `json:"id"`
Name string `json:"name"`
UserID string `json:"user_id"`
PermissionMode auth.PermissionMode `json:"permission_mode"`
ExpiresAt *int64 `json:"expires_at"`
Enabled bool `json:"enabled"`
CreatedAt int64 `json:"created_at"`
UpdatedAt int64 `json:"updated_at"`
LastUsedAt *int64 `json:"last_used_at"`
}
// KeyPermissionResponse represents the permissions for an API key on a specific instance.
type KeyPermissionResponse struct {
InstanceID int `json:"instance_id"`
InstanceName string `json:"instance_name"`
CanInfer bool `json:"can_infer"`
}
// CreateKey godoc
// @Summary Create a new API key
// @Description Creates a new API key with the specified permissions and returns the plain-text key (only shown once)
// @Tags Keys
// @Accept json
// @Produce json
// @Param key body CreateKeyRequest true "API key configuration"
// @Success 201 {object} CreateKeyResponse "Created API key with plain-text key"
// @Failure 400 {string} string "Invalid request body or validation error"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys [post]
func (h *Handler) CreateKey() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req CreateKeyRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(w, http.StatusBadRequest, "invalid_json", "Invalid JSON in request body")
return
}
// Validate request
if req.Name == "" {
writeError(w, http.StatusBadRequest, "invalid_name", "Name is required")
return
}
if len(req.Name) > 100 {
writeError(w, http.StatusBadRequest, "invalid_name", "Name must be 100 characters or less")
return
}
if req.PermissionMode != auth.PermissionModeAllowAll && req.PermissionMode != auth.PermissionModePerInstance {
writeError(w, http.StatusBadRequest, "invalid_permission_mode", "Permission mode must be 'allow_all' or 'per_instance'")
return
}
if req.PermissionMode == auth.PermissionModePerInstance && len(req.InstancePermissions) == 0 {
writeError(w, http.StatusBadRequest, "missing_permissions", "Instance permissions required when permission mode is 'per_instance'")
return
}
if req.ExpiresAt != nil && *req.ExpiresAt <= time.Now().Unix() {
writeError(w, http.StatusBadRequest, "invalid_expires_at", "Expiration time must be in future")
return
}
// Validate instance IDs exist
if req.PermissionMode == auth.PermissionModePerInstance {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_instances_failed", fmt.Sprintf("Failed to fetch instances: %v", err))
return
}
instanceIDMap := make(map[int]bool)
for _, inst := range instances {
instanceIDMap[inst.ID] = true
}
for _, perm := range req.InstancePermissions {
if !instanceIDMap[perm.InstanceID] {
writeError(w, http.StatusBadRequest, "invalid_instance_id", fmt.Sprintf("Instance ID %d does not exist", perm.InstanceID))
return
}
}
}
// Generate plain-text key
plainTextKey, err := auth.GenerateKey("llamactl")
if err != nil {
writeError(w, http.StatusInternalServerError, "key_generation_failed", "Failed to generate API key")
return
}
// Hash key
keyHash, err := auth.HashKey(plainTextKey)
if err != nil {
writeError(w, http.StatusInternalServerError, "key_hashing_failed", "Failed to hash API key")
return
}
// Create APIKey struct
now := time.Now().Unix()
apiKey := &auth.APIKey{
KeyHash: keyHash,
Name: req.Name,
UserID: "system",
PermissionMode: req.PermissionMode,
ExpiresAt: req.ExpiresAt,
Enabled: true,
CreatedAt: now,
UpdatedAt: now,
}
// Convert InstancePermissions to KeyPermissions
var keyPermissions []auth.KeyPermission
for _, perm := range req.InstancePermissions {
keyPermissions = append(keyPermissions, auth.KeyPermission{
KeyID: 0, // Will be set by database after key creation
InstanceID: perm.InstanceID,
CanInfer: perm.CanInfer,
})
}
// Create in database
err = h.authStore.CreateKey(r.Context(), apiKey, keyPermissions)
if err != nil {
writeError(w, http.StatusInternalServerError, "creation_failed", fmt.Sprintf("Failed to create API key: %v", err))
return
}
// Return response with plain-text key (only shown once)
response := CreateKeyResponse{
ID: apiKey.ID,
Name: apiKey.Name,
UserID: apiKey.UserID,
PermissionMode: apiKey.PermissionMode,
ExpiresAt: apiKey.ExpiresAt,
Enabled: apiKey.Enabled,
CreatedAt: apiKey.CreatedAt,
UpdatedAt: apiKey.UpdatedAt,
LastUsedAt: apiKey.LastUsedAt,
Key: plainTextKey,
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
json.NewEncoder(w).Encode(response)
}
}
// ListKeys godoc
// @Summary List all API keys
// @Description Returns a list of all API keys for the system user (excludes key hash and plain-text key)
// @Tags Keys
// @Security ApiKeyAuth
// @Produce json
// @Success 200 {array} KeyResponse "List of API keys"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys [get]
func (h *Handler) ListKeys() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
keys, err := h.authStore.GetUserKeys(r.Context(), "system")
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch API keys: %v", err))
return
}
// Remove key_hash from all keys
response := make([]KeyResponse, 0, len(keys))
for _, key := range keys {
response = append(response, KeyResponse{
ID: key.ID,
Name: key.Name,
UserID: key.UserID,
PermissionMode: key.PermissionMode,
ExpiresAt: key.ExpiresAt,
Enabled: key.Enabled,
CreatedAt: key.CreatedAt,
UpdatedAt: key.UpdatedAt,
LastUsedAt: key.LastUsedAt,
})
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
}
// GetKey godoc
// @Summary Get details of a specific API key
// @Description Returns details for a specific API key by ID (excludes key hash and plain-text key)
// @Tags Keys
// @Security ApiKeyAuth
// @Produce json
// @Param id path int true "Key ID"
// @Success 200 {object} KeyResponse "API key details"
// @Failure 400 {string} string "Invalid key ID"
// @Failure 404 {string} string "API key not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys/{id} [get]
func (h *Handler) GetKey() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.Atoi(idStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_id", "Invalid key ID")
return
}
key, err := h.authStore.GetKeyByID(r.Context(), id)
if err != nil {
if err.Error() == "API key not found" {
writeError(w, http.StatusNotFound, "not_found", "API key not found")
return
}
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch API key: %v", err))
return
}
// Remove key_hash from response
response := KeyResponse{
ID: key.ID,
Name: key.Name,
UserID: key.UserID,
PermissionMode: key.PermissionMode,
ExpiresAt: key.ExpiresAt,
Enabled: key.Enabled,
CreatedAt: key.CreatedAt,
UpdatedAt: key.UpdatedAt,
LastUsedAt: key.LastUsedAt,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
}
// DeleteKey godoc
// @Summary Delete an API key
// @Description Deletes an API key by ID
// @Tags Keys
// @Security ApiKeyAuth
// @Param id path int true "Key ID"
// @Success 204 "API key deleted successfully"
// @Failure 400 {string} string "Invalid key ID"
// @Failure 404 {string} string "API key not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys/{id} [delete]
func (h *Handler) DeleteKey() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.Atoi(idStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_id", "Invalid key ID")
return
}
err = h.authStore.DeleteKey(r.Context(), id)
if err != nil {
if err.Error() == "API key not found" {
writeError(w, http.StatusNotFound, "not_found", "API key not found")
return
}
writeError(w, http.StatusInternalServerError, "deletion_failed", fmt.Sprintf("Failed to delete API key: %v", err))
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetKeyPermissions godoc
// @Summary Get API key permissions
// @Description Returns the instance-level permissions for a specific API key (includes instance names)
// @Tags Keys
// @Security ApiKeyAuth
// @Produce json
// @Param id path int true "Key ID"
// @Success 200 {array} KeyPermissionResponse "List of key permissions"
// @Failure 400 {string} string "Invalid key ID"
// @Failure 404 {string} string "API key not found"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/auth/keys/{id}/permissions [get]
func (h *Handler) GetKeyPermissions() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
idStr := chi.URLParam(r, "id")
id, err := strconv.Atoi(idStr)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid_id", "Invalid key ID")
return
}
// Verify key exists
_, err = h.authStore.GetKeyByID(r.Context(), id)
if err != nil {
if err.Error() == "API key not found" {
writeError(w, http.StatusNotFound, "not_found", "API key not found")
return
}
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch API key: %v", err))
return
}
permissions, err := h.authStore.GetPermissions(r.Context(), id)
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_failed", fmt.Sprintf("Failed to fetch permissions: %v", err))
return
}
// Get instance names for the permissions
instances, err := h.InstanceManager.ListInstances()
if err != nil {
writeError(w, http.StatusInternalServerError, "fetch_instances_failed", fmt.Sprintf("Failed to fetch instances: %v", err))
return
}
instanceNameMap := make(map[int]string)
for _, inst := range instances {
instanceNameMap[inst.ID] = inst.Name
}
response := make([]KeyPermissionResponse, 0, len(permissions))
for _, perm := range permissions {
response = append(response, KeyPermissionResponse{
InstanceID: perm.InstanceID,
InstanceName: instanceNameMap[perm.InstanceID],
CanInfer: perm.CanInfer,
})
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
}

View File

@@ -66,17 +66,16 @@ func (h *Handler) LlamaCppUIProxy() http.HandlerFunc {
return
}
proxy, err := inst.GetProxy()
if err != nil {
writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error())
return
}
if !inst.IsRemote() {
h.stripLlamaCppPrefix(r, inst.Name)
}
proxy.ServeHTTP(w, r)
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
@@ -110,6 +109,18 @@ func (h *Handler) LlamaCppProxy() http.HandlerFunc {
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
@@ -118,17 +129,16 @@ func (h *Handler) LlamaCppProxy() http.HandlerFunc {
}
}
proxy, err := inst.GetProxy()
if err != nil {
writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error())
return
}
if !inst.IsRemote() {
h.stripLlamaCppPrefix(r, inst.Name)
}
proxy.ServeHTTP(w, r)
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -327,14 +327,14 @@ func (h *Handler) InstanceProxy() http.HandlerFunc {
return
}
if !inst.IsRunning() {
writeError(w, http.StatusServiceUnavailable, "instance_not_running", "Instance is not running")
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
proxy, err := inst.GetProxy()
if err != nil {
writeError(w, http.StatusInternalServerError, "proxy_failed", "Failed to get proxy: "+err.Error())
if !inst.IsRunning() {
writeError(w, http.StatusServiceUnavailable, "instance_not_running", "Instance is not running")
return
}
@@ -348,6 +348,11 @@ func (h *Handler) InstanceProxy() http.HandlerFunc {
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
proxy.ServeHTTP(w, r)
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"encoding/json"
"io"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"net/http"
)
@@ -106,6 +107,18 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
@@ -114,16 +127,15 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
}
}
proxy, err := inst.GetProxy()
if err != nil {
writeError(w, http.StatusInternalServerError, "proxy_failed", err.Error())
return
}
// Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes))
proxy.ServeHTTP(w, r)
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -20,3 +20,23 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
writeText(w, http.StatusOK, versionInfo)
}
}
// ConfigHandler godoc
// @Summary Get server configuration
// @Description Returns the current server configuration (sanitized)
// @Tags System
// @Security ApiKeyAuth
// @Produces application/json
// @Success 200 {object} config.AppConfig "Sanitized configuration"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/config [get]
func (h *Handler) ConfigHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
sanitizedConfig, err := h.cfg.SanitizedCopy()
if err != nil {
writeError(w, http.StatusInternalServerError, "sanitized_copy_error", "Failed to get sanitized config")
return
}
writeJSON(w, http.StatusOK, sanitizedConfig)
}
}

View File

@@ -1,107 +1,76 @@
package server
import (
"crypto/rand"
"context"
"crypto/subtle"
"encoding/hex"
"fmt"
"llamactl/pkg/auth"
"llamactl/pkg/config"
"llamactl/pkg/database"
"log"
"net/http"
"os"
"strings"
"time"
)
type KeyType int
// contextKey is a custom type for context keys to avoid collisions
type contextKey string
const (
KeyTypeInference KeyType = iota
KeyTypeManagement
apiKeyContextKey contextKey = "apiKey"
)
type APIAuthMiddleware struct {
authStore database.AuthStore
requireInferenceAuth bool
inferenceKeys map[string]bool
requireManagementAuth bool
managementKeys map[string]bool
managementKeys map[string]bool // Config-based management keys
}
// NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
func NewAPIAuthMiddleware(authCfg config.AuthConfig) *APIAuthMiddleware {
func NewAPIAuthMiddleware(authCfg config.AuthConfig, authStore database.AuthStore) *APIAuthMiddleware {
// Load management keys from config into managementKeys map
managementKeys := make(map[string]bool)
for _, key := range authCfg.ManagementKeys {
managementKeys[key] = true
}
// Handle legacy auto-generation for management keys if none provided and auth is required
var generated bool = false
inferenceAPIKeys := make(map[string]bool)
managementAPIKeys := make(map[string]bool)
const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if authCfg.RequireManagementAuth && len(authCfg.ManagementKeys) == 0 {
key := generateAPIKey(KeyTypeManagement)
managementAPIKeys[key] = true
key, err := auth.GenerateKey("llamactl-mgmt")
if err != nil {
log.Printf("Warning: Failed to generate management key: %v", err)
// Fallback to PID-based key for safety
key = fmt.Sprintf("sk-management-fallback-%d", os.Getpid())
}
managementKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Management API Key:\n\n %s\n\n", key)
}
for _, key := range authCfg.ManagementKeys {
managementAPIKeys[key] = true
}
if authCfg.RequireInferenceAuth && len(authCfg.InferenceKeys) == 0 {
key := generateAPIKey(KeyTypeInference)
inferenceAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Inference API Key:\n\n %s\n\n", key)
}
for _, key := range authCfg.InferenceKeys {
inferenceAPIKeys[key] = true
}
if generated {
fmt.Printf("%s\n⚠ IMPORTANT\n%s\n", banner, banner)
fmt.Println("• These keys are auto-generated and will change on restart")
fmt.Println("• This key is auto-generated and will change on restart")
fmt.Println("• For production, add explicit keys to your configuration")
fmt.Println("• Copy these keys before they disappear from the terminal")
fmt.Println("• Copy this key before it disappears from the terminal")
fmt.Println(banner)
}
return &APIAuthMiddleware{
authStore: authStore,
requireInferenceAuth: authCfg.RequireInferenceAuth,
inferenceKeys: inferenceAPIKeys,
requireManagementAuth: authCfg.RequireManagementAuth,
managementKeys: managementAPIKeys,
managementKeys: managementKeys,
}
}
// generateAPIKey creates a cryptographically secure API key
func generateAPIKey(keyType KeyType) string {
// Generate 32 random bytes (256 bits)
randomBytes := make([]byte, 32)
var prefix string
switch keyType {
case KeyTypeInference:
prefix = "sk-inference"
case KeyTypeManagement:
prefix = "sk-management"
default:
prefix = "sk-unknown"
}
if _, err := rand.Read(randomBytes); err != nil {
log.Printf("Warning: Failed to generate secure random key, using fallback")
// Fallback to a less secure method if crypto/rand fails
return fmt.Sprintf("%s-fallback-%d", prefix, os.Getpid())
}
// Convert to hex and add prefix
return fmt.Sprintf("%s-%s", prefix, hex.EncodeToString(randomBytes))
}
// AuthMiddleware returns a middleware that checks API keys for the given key type
func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) http.Handler {
// InferenceAuthMiddleware returns middleware for inference endpoints
func (a *APIAuthMiddleware) InferenceAuthMiddleware() func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
@@ -109,24 +78,74 @@ func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) h
return
}
// Extract API key from request
apiKey := a.extractAPIKey(r)
if apiKey == "" {
a.unauthorized(w, "Missing API key")
return
}
var isValid bool
switch keyType {
case KeyTypeInference:
// Management keys also work for OpenAI endpoints (higher privilege)
isValid = a.isValidKey(apiKey, KeyTypeInference) || a.isValidKey(apiKey, KeyTypeManagement)
case KeyTypeManagement:
isValid = a.isValidKey(apiKey, KeyTypeManagement)
default:
isValid = false
// Try database authentication first
var foundKey *auth.APIKey
if a.requireInferenceAuth && a.authStore != nil {
activeKeys, err := a.authStore.GetActiveKeys(r.Context())
if err != nil {
log.Printf("Failed to get active inference keys: %v", err)
// Continue to management key fallback
} else {
for _, key := range activeKeys {
if auth.VerifyKey(apiKey, key.KeyHash) {
foundKey = key
// Async update last_used_at
go func(keyID int) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := a.authStore.TouchKey(ctx, keyID); err != nil {
log.Printf("Failed to update last used timestamp for key %d: %v", keyID, err)
}
}(key.ID)
break
}
}
}
}
if !isValid {
// If no database key found, try management key authentication (config-based)
if foundKey == nil {
if !a.isValidManagementKey(apiKey) {
a.unauthorized(w, "Invalid API key")
return
}
// Management key was used, continue without adding APIKey to context
} else {
// Add APIKey to context for permission checking
ctx := context.WithValue(r.Context(), apiKeyContextKey, foundKey)
r = r.WithContext(ctx)
}
next.ServeHTTP(w, r)
})
}
}
// ManagementAuthMiddleware returns middleware for management endpoints
func (a *APIAuthMiddleware) ManagementAuthMiddleware() func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
next.ServeHTTP(w, r)
return
}
// Extract API key from request
apiKey := a.extractAPIKey(r)
if apiKey == "" {
a.unauthorized(w, "Missing API key")
return
}
// Check if key exists in managementKeys map using constant-time comparison
if !a.isValidManagementKey(apiKey) {
a.unauthorized(w, "Invalid API key")
return
}
@@ -136,6 +155,33 @@ func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) h
}
}
// CheckInstancePermission checks if the authenticated key has permission for the instance
func (a *APIAuthMiddleware) CheckInstancePermission(ctx context.Context, instanceID int) error {
// Extract APIKey from context
apiKey, ok := ctx.Value(apiKeyContextKey).(*auth.APIKey)
if !ok {
// APIKey is nil, management key was used, allow all
return nil
}
// If permission_mode == "allow_all", allow all
if apiKey.PermissionMode == auth.PermissionModeAllowAll {
return nil
}
// Check per-instance permissions
canInfer, err := a.authStore.HasPermission(ctx, apiKey.ID, instanceID)
if err != nil {
return fmt.Errorf("failed to check permission: %w", err)
}
if !canInfer {
return fmt.Errorf("permission denied: key does not have access to this instance")
}
return nil
}
// extractAPIKey extracts the API key from the request
func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
// Check Authorization header: "Bearer sk-..."
@@ -158,20 +204,9 @@ func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
return ""
}
// isValidKey checks if the provided API key is valid for the given key type
func (a *APIAuthMiddleware) isValidKey(providedKey string, keyType KeyType) bool {
var validKeys map[string]bool
switch keyType {
case KeyTypeInference:
validKeys = a.inferenceKeys
case KeyTypeManagement:
validKeys = a.managementKeys
default:
return false
}
for validKey := range validKeys {
// isValidManagementKey checks if the provided API key is a valid management key
func (a *APIAuthMiddleware) isValidManagementKey(providedKey string) bool {
for validKey := range a.managementKeys {
if len(providedKey) == len(validKey) &&
subtle.ConstantTimeCompare([]byte(providedKey), []byte(validKey)) == 1 {
return true
@@ -187,3 +222,11 @@ func (a *APIAuthMiddleware) unauthorized(w http.ResponseWriter, message string)
response := fmt.Sprintf(`{"error": {"message": "%s", "type": "authentication_error"}}`, message)
w.Write([]byte(response))
}
// forbidden sends a forbidden response
func (a *APIAuthMiddleware) forbidden(w http.ResponseWriter, message string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusForbidden)
response := fmt.Sprintf(`{"error": {"message": "%s", "type": "permission_denied"}}`, message)
w.Write([]byte(response))
}

View File

@@ -9,107 +9,44 @@ import (
"testing"
)
func TestAuthMiddleware(t *testing.T) {
func TestInferenceAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType server.KeyType
inferenceKeys []string
managementKeys []string
requestKey string
method string
expectedStatus int
}{
// Valid key tests
{
name: "valid inference key for inference",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-valid123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for inference", // Management keys work for inference
keyType: server.KeyTypeInference,
name: "valid management key for inference",
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for management",
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
// Invalid key tests
{
name: "inference key for management should fail",
keyType: server.KeyTypeManagement,
inferenceKeys: []string{"sk-inference-user123"},
requestKey: "sk-inference-user123",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid inference key",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing inference key",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid management key",
keyType: server.KeyTypeManagement,
name: "invalid key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing management key",
keyType: server.KeyTypeManagement,
name: "missing key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
// OPTIONS requests should always pass
{
name: "OPTIONS request bypasses inference auth",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
{
name: "OPTIONS request bypasses management auth",
keyType: server.KeyTypeManagement,
name: "OPTIONS request bypasses auth",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
// Cross-key-type validation
{
name: "management key works for inference endpoint",
keyType: server.KeyTypeInference,
inferenceKeys: []string{},
managementKeys: []string{"sk-management-admin"},
requestKey: "sk-management-admin",
method: "POST",
@@ -120,10 +57,10 @@ func TestAuthMiddleware(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
InferenceKeys: tt.inferenceKeys,
ManagementKeys: tt.managementKeys,
RequireInferenceAuth: true,
ManagementKeys: tt.managementKeys,
}
middleware := server.NewAPIAuthMiddleware(cfg)
middleware := server.NewAPIAuthMiddleware(cfg, nil)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
@@ -131,24 +68,17 @@ func TestAuthMiddleware(t *testing.T) {
req.Header.Set("Authorization", "Bearer "+tt.requestKey)
}
// Create test handler using the appropriate middleware
var handler http.Handler
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
// Create test handler
handler := middleware.InferenceAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
// Execute request
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != tt.expectedStatus {
t.Errorf("AuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
t.Errorf("InferenceAuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
}
// Check that unauthorized responses have proper format
@@ -167,178 +97,171 @@ func TestAuthMiddleware(t *testing.T) {
}
}
func TestGenerateAPIKey(t *testing.T) {
func TestManagementAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType server.KeyType
}{
{"inference key generation", server.KeyTypeInference},
{"management key generation", server.KeyTypeManagement},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test auto-generation by creating config that will trigger it
var config config.AuthConfig
if tt.keyType == server.KeyTypeInference {
config.RequireInferenceAuth = true
config.InferenceKeys = []string{} // Empty to trigger generation
} else {
config.RequireManagementAuth = true
config.ManagementKeys = []string{} // Empty to trigger generation
}
// Create middleware - this should trigger key generation
middleware := server.NewAPIAuthMiddleware(config)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
var handler http.Handler
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
handler.ServeHTTP(recorder, req)
// Should be unauthorized without a key (proving that a key was generated and auth is working)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
}
// Test uniqueness by creating another middleware instance
middleware2 := server.NewAPIAuthMiddleware(config)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
if tt.keyType == server.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
} else {
handler2 := middleware2.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
}
// Both should require auth (proving keys were generated for both instances)
if recorder2.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
}
})
}
}
func TestAutoGeneration(t *testing.T) {
tests := []struct {
name string
requireInference bool
requireManagement bool
providedInference []string
providedManagement []string
shouldGenerateInf bool // Whether inference key should be generated
shouldGenerateMgmt bool // Whether management key should be generated
name string
managementKeys []string
requestKey string
method string
expectedStatus int
}{
{
name: "inference auth required, keys provided - no generation",
requireInference: true,
requireManagement: false,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
name: "valid management key",
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "inference auth required, no keys - should auto-generate",
requireInference: true,
requireManagement: false,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: false,
name: "invalid management key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "management auth required, keys provided - no generation",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
name: "missing management key",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "management auth required, no keys - should auto-generate",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: true,
},
{
name: "both required, both provided - no generation",
requireInference: true,
requireManagement: true,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "both required, none provided - should auto-generate both",
requireInference: true,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: true,
name: "OPTIONS request bypasses management auth",
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
RequireManagementAuth: true,
ManagementKeys: tt.managementKeys,
}
middleware := server.NewAPIAuthMiddleware(cfg, nil)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
if tt.requestKey != "" {
req.Header.Set("Authorization", "Bearer "+tt.requestKey)
}
// Create test handler
handler := middleware.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
// Execute request
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != tt.expectedStatus {
t.Errorf("ManagementAuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
}
// Check that unauthorized responses have proper format
if recorder.Code == http.StatusUnauthorized {
contentType := recorder.Header().Get("Content-Type")
if contentType != "application/json" {
t.Errorf("Unauthorized response Content-Type = %v, expected application/json", contentType)
}
body := recorder.Body.String()
if !strings.Contains(body, `"type": "authentication_error"`) {
t.Errorf("Unauthorized response missing proper error type: %v", body)
}
}
})
}
}
func TestManagementKeyAutoGeneration(t *testing.T) {
// Test auto-generation for management keys
config := config.AuthConfig{
RequireManagementAuth: true,
ManagementKeys: []string{}, // Empty to trigger generation
}
// Create middleware - this should trigger key generation
middleware := server.NewAPIAuthMiddleware(config, nil)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
handler := middleware.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should be unauthorized without a key (proving that a key was generated and auth is working)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
}
// Test uniqueness by creating another middleware instance
middleware2 := server.NewAPIAuthMiddleware(config, nil)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
handler2 := middleware2.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
// Both should require auth (proving keys were generated for both instances)
if recorder2.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
}
}
func TestAutoGenerationScenarios(t *testing.T) {
tests := []struct {
name string
requireManagement bool
providedManagement []string
shouldGenerate bool
}{
{
name: "management auth required, keys provided - no generation",
requireManagement: true,
providedManagement: []string{"sk-management-provided"},
shouldGenerate: false,
},
{
name: "management auth required, no keys - should auto-generate",
requireManagement: true,
providedManagement: []string{},
shouldGenerate: true,
},
{
name: "management auth not required - no generation",
requireManagement: false,
providedManagement: []string{},
shouldGenerate: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
RequireInferenceAuth: tt.requireInference,
RequireManagementAuth: tt.requireManagement,
InferenceKeys: tt.providedInference,
ManagementKeys: tt.providedManagement,
}
middleware := server.NewAPIAuthMiddleware(cfg)
// Test inference behavior if inference auth is required
if tt.requireInference {
req := httptest.NewRequest("GET", "/v1/models", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should always be unauthorized without a key (since middleware assumes auth is required)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for inference without key, got status %v", recorder.Code)
}
}
middleware := server.NewAPIAuthMiddleware(cfg, nil)
// Test management behavior if management auth is required
if tt.requireManagement {
req := httptest.NewRequest("GET", "/api/v1/instances", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handler := middleware.ManagementAuthMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
@@ -352,3 +275,16 @@ func TestAutoGeneration(t *testing.T) {
})
}
}
func TestConfigBasedInferenceKeysDeprecationWarning(t *testing.T) {
// Test that config-based inference keys trigger a warning (captured in logs)
cfg := config.AuthConfig{
InferenceKeys: []string{"sk-inference-old"},
}
// Creating middleware should log a warning, but shouldn't fail
_ = server.NewAPIAuthMiddleware(cfg, nil)
// If we get here without panic, the test passes
// The warning is logged but not returned as an error
}

View File

@@ -1,7 +1,7 @@
package server
import (
"fmt"
"log"
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
@@ -27,7 +27,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
}))
// Add API authentication middleware
authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth)
authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth, handler.authStore)
if handler.cfg.Server.EnableSwagger {
r.Get("/swagger/*", httpSwagger.Handler(
@@ -39,10 +39,23 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route("/api/v1", func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
r.Use(authMiddleware.ManagementAuthMiddleware())
}
r.Get("/version", handler.VersionHandler()) // Get server version
r.Get("/version", handler.VersionHandler())
r.Get("/config", handler.ConfigHandler())
// API key management endpoints
r.Route("/auth", func(r chi.Router) {
r.Route("/keys", func(r chi.Router) {
r.Post("/", handler.CreateKey()) // Create API key
r.Get("/", handler.ListKeys()) // List API keys
r.Get("/{id}", handler.GetKey()) // Get API key details
r.Delete("/{id}", handler.DeleteKey()) // Delete API key
r.Get("/{id}/permissions", handler.GetKeyPermissions()) // Get key permissions
})
})
// Backend-specific endpoints
r.Route("/backends", func(r chi.Router) {
@@ -65,7 +78,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Get("/", handler.ListNodes()) // List all nodes
r.Route("/{name}", func(r chi.Router) {
r.Get("/", handler.GetNode())
r.Get("/", handler.GetNode()) // Get node details
})
})
@@ -92,13 +105,13 @@ func SetupRouter(handler *Handler) *chi.Mux {
})
})
r.Route(("/v1"), func(r chi.Router) {
r.Route("/v1", func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
r.Use(authMiddleware.InferenceAuthMiddleware())
}
r.Get(("/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
r.Get("/models", handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
// OpenAI-compatible proxy endpoint
// Handles all POST requests to /v1/*, including:
@@ -123,10 +136,10 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
r.Use(authMiddleware.InferenceAuthMiddleware())
}
// This handler auto start the server if it's not running
// This handler auto starts the server if it's not running
llamaCppHandler := handler.LlamaCppProxy()
// llama.cpp server specific proxy endpoints
@@ -159,7 +172,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Serve WebUI files
if err := webui.SetupWebUI(r); err != nil {
fmt.Printf("Failed to set up WebUI: %v\n", err)
log.Printf("Failed to set up WebUI: %v\n", err)
}
return r

View File

@@ -239,25 +239,3 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
})
}
}
func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
// Test that non-string fields don't interfere with validation
options := backends.Options{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &backends.LlamaServerOptions{
Port: 8080,
GPULayers: 32,
CtxSize: 4096,
Temperature: 0.7,
TopK: 40,
TopP: 0.9,
Verbose: true,
FlashAttn: false,
},
}
err := options.ValidateInstanceOptions()
if err != nil {
t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err)
}
}

1518
webui/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -21,38 +21,40 @@
"@radix-ui/react-checkbox": "^1.3.2",
"@radix-ui/react-dialog": "^1.1.14",
"@radix-ui/react-label": "^2.1.7",
"@radix-ui/react-radio-group": "^1.3.8",
"@radix-ui/react-slot": "^1.2.3",
"@tailwindcss/vite": "^4.1.11",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.525.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"date-fns": "^4.1.0",
"lucide-react": "^0.555.0",
"react": "^19.2.0",
"react-dom": "^19.2.0",
"sonner": "^2.0.7",
"tailwind-merge": "^3.3.1",
"tailwind-merge": "^3.4.0",
"tailwindcss": "^4.1.11",
"zod": "^4.0.5"
"zod": "^4.1.12"
},
"devDependencies": {
"@eslint/js": "^9.32.0",
"@testing-library/jest-dom": "^6.6.3",
"@eslint/js": "^9.39.1",
"@testing-library/jest-dom": "^6.9.1",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/eslint__js": "^8.42.3",
"@types/node": "^24.0.15",
"@types/react": "^19.1.8",
"@types/react-dom": "^19.1.6",
"@vitejs/plugin-react": "^4.7.0",
"@vitest/ui": "^3.2.4",
"eslint": "^9.32.0",
"@types/eslint__js": "^9.14.0",
"@types/node": "^24.10.1",
"@types/react": "^19.2.4",
"@types/react-dom": "^19.2.3",
"@vitejs/plugin-react": "^5.1.1",
"@vitest/ui": "^4.0.8",
"eslint": "^9.39.1",
"eslint-plugin-react": "^7.37.5",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.4.20",
"jsdom": "^26.1.0",
"tw-animate-css": "^1.3.5",
"typescript": "^5.8.3",
"typescript-eslint": "^8.38.0",
"vite": "^7.1.11",
"vitest": "^3.2.4"
"jsdom": "^27.2.0",
"tw-animate-css": "^1.4.0",
"typescript": "^5.9.3",
"typescript-eslint": "^8.48.0",
"vite": "^7.2.2",
"vitest": "^4.0.8"
}
}

View File

@@ -4,6 +4,7 @@ import InstanceList from "@/components/InstanceList";
import InstanceDialog from "@/components/InstanceDialog";
import LoginDialog from "@/components/LoginDialog";
import SystemInfoDialog from "./components/SystemInfoDialog";
import SettingsDialog from "./components/settings/SettingsDialog";
import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext";
import { useAuth } from "@/contexts/AuthContext";
@@ -14,6 +15,7 @@ function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth();
const [isInstanceModalOpen, setIsInstanceModalOpen] = useState(false);
const [isSystemInfoModalOpen, setIsSystemInfoModalOpen] = useState(false);
const [isSettingsModalOpen, setIsSettingsModalOpen] = useState(false);
const [editingInstance, setEditingInstance] = useState<Instance | undefined>(
undefined
);
@@ -41,6 +43,10 @@ function App() {
setIsSystemInfoModalOpen(true);
};
const handleShowSettings = () => {
setIsSettingsModalOpen(true);
};
// Show loading spinner while checking auth
if (authLoading) {
return (
@@ -70,7 +76,11 @@ function App() {
return (
<ThemeProvider>
<div className="min-h-screen bg-background">
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<Header
onCreateInstance={handleCreateInstance}
onShowSystemInfo={handleShowSystemInfo}
onShowSettings={handleShowSettings}
/>
<main className="container mx-auto max-w-4xl px-4 py-8">
<InstanceList editInstance={handleEditInstance} />
</main>
@@ -86,7 +96,12 @@ function App() {
open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen}
/>
<SettingsDialog
open={isSettingsModalOpen}
onOpenChange={setIsSettingsModalOpen}
/>
<Toaster />
</div>
</ThemeProvider>

View File

@@ -4,8 +4,7 @@ import userEvent from '@testing-library/user-event'
import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import {BackendType, type Instance } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API
@@ -49,6 +48,21 @@ vi.mock('@/lib/healthService', () => ({
})),
}))
// Mock the ConfigContext helper hooks
vi.mock('@/hooks/useConfig', () => ({
useInstanceDefaults: () => ({
autoRestart: true,
maxRestarts: 3,
restartDelay: 5,
onDemandStart: false,
}),
useBackendSettings: () => ({
command: '/usr/bin/llama-server',
dockerEnabled: false,
dockerImage: '',
}),
}))
function renderApp() {
return render(
<AuthProvider>
@@ -61,8 +75,8 @@ function renderApp() {
describe('App Component - Critical Business Logic Only', () => {
const mockInstances: Instance[] = [
{ name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
{ id: 1, name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ id: 2, name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
]
beforeEach(() => {
@@ -95,6 +109,7 @@ describe('App Component - Critical Business Logic Only', () => {
it('creates new instance with correct API call and updates UI', async () => {
const user = userEvent.setup()
const newInstance: Instance = {
id: 3,
name: 'new-test-instance',
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
@@ -119,8 +134,12 @@ describe('App Component - Critical Business Logic Only', () => {
// Verify correct API call
await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
auto_restart: true, // Default value from config
backend_type: BackendType.LLAMA_CPP,
docker_enabled: false,
max_restarts: 3,
on_demand_start: false,
restart_delay: 5
})
})
@@ -133,6 +152,7 @@ describe('App Component - Critical Business Logic Only', () => {
it('updates existing instance with correct API call', async () => {
const user = userEvent.setup()
const updatedInstance: Instance = {
id: 1,
name: 'test-instance-1',
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }

View File

@@ -3,22 +3,36 @@ import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
import ExtraArgsInput from '@/components/form/ExtraArgsInput'
interface BackendFormFieldProps {
fieldKey: string
value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
value: string | number | boolean | string[] | Record<string, string> | undefined
onChange: (key: string, value: string | number | boolean | string[] | Record<string, string> | undefined) => void
}
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Special handling for extra_args
if (fieldKey === 'extra_args') {
return (
<ExtraArgsInput
id={fieldKey}
label="Extra Arguments"
value={value as Record<string, string> | undefined}
onChange={(newValue) => onChange(fieldKey, newValue)}
description="Additional command line arguments to pass to the backend"
/>
)
}
// Get configuration for basic fields, or use field name for advanced fields
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
const config = basicBackendFieldsConfig[fieldKey] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey as string, newValue)
onChange(fieldKey, newValue)
}
const renderField = () => {

View File

@@ -1,14 +1,15 @@
import { Button } from "@/components/ui/button";
import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
import { HelpCircle, LogOut, Moon, Settings, Sun } from "lucide-react";
import { useAuth } from "@/contexts/AuthContext";
import { useTheme } from "@/contexts/ThemeContext";
interface HeaderProps {
onCreateInstance: () => void;
onShowSystemInfo: () => void;
onShowSettings: () => void;
}
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
function Header({ onCreateInstance, onShowSystemInfo, onShowSettings }: HeaderProps) {
const { logout } = useAuth();
const { theme, toggleTheme } = useTheme();
@@ -41,6 +42,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
{theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
</Button>
<Button
variant="outline"
size="icon"
onClick={onShowSettings}
data-testid="settings-button"
title="Settings"
>
<Settings className="h-4 w-4" />
</Button>
<Button
variant="outline"
size="icon"

View File

@@ -21,6 +21,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return <Loader2 className="h-3 w-3 animate-spin" />;
case "restarting":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "shutting_down":
return <Loader2 className="h-3 w-3 animate-spin" />;
case "stopped":
return <Clock className="h-3 w-3" />;
case "failed":
@@ -36,6 +38,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "outline";
case "restarting":
return "outline";
case "shutting_down":
return "outline";
case "stopped":
return "secondary";
case "failed":
@@ -51,6 +55,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "Starting";
case "restarting":
return "Restarting";
case "shutting_down":
return "Shutting Down";
case "stopped":
return "Stopped";
case "failed":

View File

@@ -2,12 +2,13 @@
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal } from "lucide-react";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal, Download } from "lucide-react";
import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge";
import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth";
import { instancesApi } from "@/lib/api";
interface InstanceCardProps {
instance: Instance;
@@ -52,6 +53,36 @@ function InstanceCard({
setIsLogsOpen(true);
};
const handleExport = () => {
void (async () => {
try {
// Fetch the most up-to-date instance data from the backend
const instanceData = await instancesApi.get(instance.name);
// Convert to JSON string with pretty formatting (matching backend format)
const jsonString = JSON.stringify(instanceData, null, 2);
// Create a blob and download link
const blob = new Blob([jsonString], { type: "application/json" });
const url = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = url;
link.download = `${instance.name}.json`;
// Trigger download
document.body.appendChild(link);
link.click();
// Cleanup
document.body.removeChild(link);
URL.revokeObjectURL(url);
} catch (error) {
console.error("Failed to export instance:", error);
alert(`Failed to export instance: ${error instanceof Error ? error.message : "Unknown error"}`);
}
})();
};
const running = instance.status === "running";
return (
@@ -66,7 +97,7 @@ function InstanceCard({
{/* Badges row */}
<div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
<BackendBadge backend={instance.options?.backend_type} docker={instance.options?.docker_enabled} />
{running && <HealthBadge health={health} />}
</div>
</div>
@@ -131,6 +162,18 @@ function InstanceCard({
Logs
</Button>
<Button
size="sm"
variant="outline"
onClick={handleExport}
title="Export instance"
data-testid="export-instance-button"
className="flex-1"
>
<Download className="h-4 w-4 mr-1" />
Export
</Button>
<Button
size="sm"
variant="destructive"

View File

@@ -1,4 +1,4 @@
import React, { useState, useEffect } from "react";
import React, { useState, useEffect, useRef } from "react";
import { Button } from "@/components/ui/button";
import {
Dialog,
@@ -9,9 +9,12 @@ import {
DialogTitle,
} from "@/components/ui/dialog";
import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import type { BackendOptions } from "@/schemas/instanceOptions";
import ParseCommandDialog from "@/components/ParseCommandDialog";
import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard";
import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard";
import { Upload } from "lucide-react";
import { useInstanceDefaults, useBackendSettings } from "@/hooks/useConfig";
interface InstanceDialogProps {
open: boolean;
@@ -27,12 +30,18 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
instance,
}) => {
const isEditing = !!instance;
const instanceDefaults = useInstanceDefaults();
const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({});
const [nameError, setNameError] = useState("");
const [showParseDialog, setShowParseDialog] = useState(false);
const fileInputRef = useRef<HTMLInputElement>(null);
// Get backend settings for all backends (we'll use this to update docker_enabled on backend type change)
const llamaCppSettings = useBackendSettings(BackendType.LLAMA_CPP);
const vllmSettings = useBackendSettings(BackendType.VLLM);
const mlxSettings = useBackendSettings(BackendType.MLX_LM);
// Reset form when dialog opens/closes or when instance changes
useEffect(() => {
@@ -42,43 +51,58 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setInstanceName(instance.name);
setFormData(instance.options || {});
} else {
// Reset form for new instance
// Reset form for new instance with defaults from config
setInstanceName("");
setFormData({
auto_restart: true, // Default value
auto_restart: instanceDefaults?.autoRestart ?? true,
max_restarts: instanceDefaults?.maxRestarts,
restart_delay: instanceDefaults?.restartDelay,
on_demand_start: instanceDefaults?.onDemandStart,
backend_type: BackendType.LLAMA_CPP, // Default backend type
docker_enabled: llamaCppSettings?.dockerEnabled ?? false,
backend_options: {},
});
}
setNameError(""); // Reset any name errors
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [open, instance]);
const handleFieldChange = (key: keyof CreateInstanceOptions, value: any) => {
const handleFieldChange = (key: keyof CreateInstanceOptions, value: unknown) => {
setFormData((prev) => {
// If backend_type is changing, clear backend_options
// If backend_type is changing, update docker_enabled default and clear backend_options
if (key === 'backend_type' && prev.backend_type !== value) {
let dockerEnabled = false;
if (value === BackendType.LLAMA_CPP) {
dockerEnabled = llamaCppSettings?.dockerEnabled ?? false;
} else if (value === BackendType.VLLM) {
dockerEnabled = vllmSettings?.dockerEnabled ?? false;
} else if (value === BackendType.MLX_LM) {
dockerEnabled = mlxSettings?.dockerEnabled ?? false;
}
return {
...prev,
[key]: value,
backend_type: value as CreateInstanceOptions['backend_type'],
docker_enabled: dockerEnabled,
backend_options: {}, // Clear backend options when backend type changes
};
}
return {
...prev,
[key]: value,
};
} as CreateInstanceOptions;
});
};
const handleBackendFieldChange = (key: string, value: any) => {
const handleBackendFieldChange = (key: string, value: unknown) => {
setFormData((prev) => ({
...prev,
backend_options: {
...prev.backend_options,
[key]: value,
} as any,
} as BackendOptions,
}));
};
@@ -103,12 +127,22 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
return;
}
// Validate docker_enabled and command_override relationship
if (formData.backend_type !== BackendType.MLX_LM) {
if (formData.docker_enabled === true && formData.command_override) {
setNameError("Command override cannot be set when Docker is enabled");
return;
}
}
// Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {};
const cleanOptions: CreateInstanceOptions = {} as CreateInstanceOptions;
Object.entries(formData).forEach(([key, value]) => {
const typedKey = key as keyof CreateInstanceOptions;
if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
const cleanBackendOptions: Record<string, unknown> = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
@@ -121,7 +155,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
(cleanOptions as Record<string, unknown>)[typedKey] = cleanBackendOptions as BackendOptions;
}
} else if (value !== undefined && value !== null) {
// Skip empty strings
@@ -132,7 +166,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
if (Array.isArray(value) && value.length === 0) {
return;
}
(cleanOptions as any)[key] = value;
(cleanOptions as Record<string, unknown>)[typedKey] = value;
}
});
@@ -153,6 +187,49 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setShowParseDialog(false);
};
const handleImportFile = () => {
fileInputRef.current?.click();
};
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (!file) return;
const reader = new FileReader();
reader.onload = (e) => {
try {
const content = e.target?.result as string;
const importedData = JSON.parse(content) as { name?: string; options?: CreateInstanceOptions };
// Validate that it's an instance export
if (!importedData.name || !importedData.options) {
alert('Invalid instance file: Missing required fields (name, options)');
return;
}
// Set the instance name (only for new instances, not editing)
if (!isEditing && typeof importedData.name === 'string') {
handleNameChange(importedData.name);
}
// Populate all the options from the imported file
if (importedData.options) {
setFormData(prev => ({
...prev,
...importedData.options,
}));
}
// Reset the file input
event.target.value = '';
} catch (error) {
console.error('Failed to parse instance file:', error);
alert(`Failed to parse instance file: ${error instanceof Error ? error.message : 'Invalid JSON'}`);
}
};
reader.readAsText(file);
};
// Save button label logic
let saveButtonLabel = "Create Instance";
@@ -168,14 +245,38 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col">
<DialogHeader>
<DialogTitle>
{isEditing ? "Edit Instance" : "Create New Instance"}
</DialogTitle>
<DialogDescription>
{isEditing
? "Modify the instance configuration below."
: "Configure your new llama-server instance below."}
</DialogDescription>
<div className="flex items-center justify-between">
<div className="flex-1">
<DialogTitle>
{isEditing ? "Edit Instance" : "Create New Instance"}
</DialogTitle>
<DialogDescription>
{isEditing
? "Modify the instance configuration below."
: "Configure your new llama-server instance below."}
</DialogDescription>
</div>
{!isEditing && (
<Button
type="button"
variant="ghost"
size="sm"
onClick={handleImportFile}
title="Import instance configuration from JSON file"
className="ml-2"
>
<Upload className="h-4 w-4 mr-2" />
Import
</Button>
)}
</div>
<input
ref={fileInputRef}
type="file"
accept=".json"
onChange={handleFileChange}
className="hidden"
/>
</DialogHeader>
<div className="flex-1 overflow-y-auto">

View File

@@ -56,9 +56,9 @@ function InstanceList({ editInstance }: InstanceListProps) {
<MemoizedInstanceCard
key={instance.name}
instance={instance}
startInstance={startInstance}
stopInstance={stopInstance}
deleteInstance={deleteInstance}
startInstance={() => { void startInstance(instance.name) }}
stopInstance={() => { void stopInstance(instance.name) }}
deleteInstance={() => { void deleteInstance(instance.name) }}
editInstance={editInstance}
/>
))}

View File

@@ -54,7 +54,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
options = await backendsApi.vllm.parseCommand(command);
break;
default:
throw new Error(`Unsupported backend type: ${backendType}`);
throw new Error(`Unsupported backend type: ${String(backendType)}`);
}
onParsed(options);

View File

@@ -21,12 +21,14 @@ describe('InstanceCard - Instance Actions and State', () => {
const mockEditInstance = vi.fn()
const stoppedInstance: Instance = {
id: 1,
name: 'test-instance',
status: 'stopped',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
}
const runningInstance: Instance = {
id: 2,
name: 'running-instance',
status: 'running',
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
@@ -342,6 +344,7 @@ afterEach(() => {
describe('Error Edge Cases', () => {
it('handles instance with minimal data', () => {
const minimalInstance: Instance = {
id: 3,
name: 'minimal',
status: 'stopped',
options: {}
@@ -364,6 +367,7 @@ afterEach(() => {
it('handles instance with undefined options', () => {
const instanceWithoutOptions: Instance = {
id: 4,
name: 'no-options',
status: 'running',
options: undefined

View File

@@ -59,9 +59,9 @@ describe('InstanceList - State Management and UI Logic', () => {
const mockEditInstance = vi.fn()
const mockInstances: Instance[] = [
{ name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
{ id: 1, name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ id: 1, name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ id: 1, name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
]
const DUMMY_API_KEY = 'test-api-key-123'

View File

@@ -2,8 +2,22 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import InstanceDialog from '@/components/InstanceDialog'
import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { BackendType, type Instance } from '@/types/instance'
// Mock the ConfigContext helper hooks
vi.mock('@/hooks/useConfig', () => ({
useInstanceDefaults: () => ({
autoRestart: true,
maxRestarts: 3,
restartDelay: 5,
onDemandStart: false,
}),
useBackendSettings: () => ({
command: '/usr/bin/llama-server',
dockerEnabled: false,
dockerImage: '',
}),
}))
describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn()
@@ -75,7 +89,7 @@ afterEach(() => {
it('submits form with correct data structure', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
@@ -86,13 +100,17 @@ afterEach(() => {
// Fill required name
await user.type(screen.getByLabelText(/Instance Name/), 'my-instance')
// Submit form
await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
auto_restart: true, // Default value from config
backend_type: BackendType.LLAMA_CPP,
docker_enabled: false,
max_restarts: 3,
on_demand_start: false,
restart_delay: 5
})
})
@@ -135,6 +153,7 @@ afterEach(() => {
describe('Edit Mode', () => {
const mockInstance: Instance = {
id: 1,
name: 'existing-instance',
status: 'stopped',
options: {
@@ -253,7 +272,7 @@ afterEach(() => {
it('includes restart options in form submission when enabled', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
@@ -264,17 +283,23 @@ afterEach(() => {
// Fill form
await user.type(screen.getByLabelText(/Instance Name/), 'test-instance')
// Set restart options
await user.type(screen.getByLabelText(/Max Restarts/), '5')
await user.type(screen.getByLabelText(/Restart Delay/), '10')
// Clear default values and set new restart options
const maxRestartsInput = screen.getByLabelText(/Max Restarts/)
const restartDelayInput = screen.getByLabelText(/Restart Delay/)
await user.clear(maxRestartsInput)
await user.type(maxRestartsInput, '5')
await user.clear(restartDelayInput)
await user.type(restartDelayInput, '10')
await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
docker_enabled: false,
max_restarts: 5,
on_demand_start: false,
restart_delay: 10
})
})
@@ -284,7 +309,7 @@ afterEach(() => {
describe('Form Data Handling', () => {
it('cleans up undefined values before submission', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
@@ -298,16 +323,20 @@ afterEach(() => {
await user.click(screen.getByTestId('dialog-save-button'))
// Should only include non-empty values
// Should include default values from config
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
auto_restart: true, // Only this default value should be included
backend_type: BackendType.LLAMA_CPP
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
docker_enabled: false,
max_restarts: 3,
on_demand_start: false,
restart_delay: 5
})
})
it('handles numeric fields correctly', async () => {
const user = userEvent.setup()
render(
<InstanceDialog
open={true}
@@ -317,7 +346,7 @@ afterEach(() => {
)
await user.type(screen.getByLabelText(/Instance Name/), 'numeric-test')
// Test GPU layers field (numeric)
const gpuLayersInput = screen.getByLabelText(/GPU Layers/)
await user.type(gpuLayersInput, '15')
@@ -328,6 +357,10 @@ afterEach(() => {
auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
docker_enabled: false,
max_restarts: 3,
on_demand_start: false,
restart_delay: 5
})
})
})

View File

@@ -0,0 +1,236 @@
import { useState } from "react";
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogFooter } from "@/components/ui/dialog";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
import { Checkbox } from "@/components/ui/checkbox";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Loader2 } from "lucide-react";
import { apiKeysApi } from "@/lib/api";
import { CreateKeyRequest, PermissionMode, InstancePermission } from "@/types/apiKey";
import { useInstances } from "@/contexts/InstancesContext";
import { format, addDays } from "date-fns";
interface CreateApiKeyDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onKeyCreated: (plainTextKey: string) => void;
}
function CreateApiKeyDialog({ open, onOpenChange, onKeyCreated }: CreateApiKeyDialogProps) {
const { instances } = useInstances();
const [name, setName] = useState("");
const [permissionMode, setPermissionMode] = useState<PermissionMode>(PermissionMode.AllowAll);
const [expiresAt, setExpiresAt] = useState<string>("");
const [instancePermissions, setInstancePermissions] = useState<Record<number, boolean>>({});
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const formatDisplayDate = (dateString: string) => {
if (!dateString) return null;
try {
const date = new Date(dateString);
return format(date, "d MMMM yyyy");
} catch {
return null;
}
};
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
setError(null);
// Validation
if (!name.trim()) {
setError("Name is required");
return;
}
if (name.length > 100) {
setError("Name must be 100 characters or less");
return;
}
if (permissionMode === PermissionMode.PerInstance) {
const hasAnyPermission = Object.values(instancePermissions).some(v => v);
if (!hasAnyPermission) {
setError("At least one instance permission is required for per-instance mode");
return;
}
}
// Build request
const permissions: InstancePermission[] = [];
if (permissionMode === PermissionMode.PerInstance) {
Object.entries(instancePermissions).forEach(([instanceId, canInfer]) => {
if (canInfer) {
permissions.push({
InstanceID: parseInt(instanceId),
CanInfer: true,
});
}
});
}
const request: CreateKeyRequest = {
Name: name.trim(),
PermissionMode: permissionMode,
InstancePermissions: permissions,
};
// Add expiration if provided
if (expiresAt) {
const expirationDate = new Date(expiresAt);
const now = new Date();
if (expirationDate <= now) {
setError("Expiration date must be in the future");
return;
}
request.ExpiresAt = Math.floor(expirationDate.getTime() / 1000);
}
setLoading(true);
try {
const response = await apiKeysApi.create(request);
onKeyCreated(response.key);
// Reset form
setName("");
setPermissionMode(PermissionMode.AllowAll);
setExpiresAt("");
setInstancePermissions({});
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to create API key");
} finally {
setLoading(false);
}
};
const handleInstancePermissionChange = (instanceId: number, checked: boolean) => {
setInstancePermissions({
...instancePermissions,
[instanceId]: checked,
});
};
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-2xl">
<DialogHeader>
<DialogTitle>Create API Key</DialogTitle>
</DialogHeader>
<form onSubmit={handleSubmit} className="space-y-4">
{error && (
<Alert variant="destructive">
<AlertDescription>{error}</AlertDescription>
</Alert>
)}
<div className="space-y-2">
<Label htmlFor="name">Name</Label>
<Input
id="name"
value={name}
onChange={(e) => setName(e.target.value)}
placeholder="My API Key"
maxLength={100}
disabled={loading}
/>
</div>
<div className="space-y-3">
<Label>Permission Mode</Label>
<RadioGroup
value={permissionMode}
onValueChange={(value) => setPermissionMode(value as PermissionMode)}
disabled={loading}
>
<div className="flex items-center space-x-2">
<RadioGroupItem value={PermissionMode.AllowAll} id="allow-all" />
<Label htmlFor="allow-all" className="font-normal cursor-pointer">
Full Access
</Label>
</div>
<div className="flex items-center space-x-2">
<RadioGroupItem value={PermissionMode.PerInstance} id="per-instance" />
<Label htmlFor="per-instance" className="font-normal cursor-pointer">
Per-Instance Access
</Label>
</div>
</RadioGroup>
{permissionMode === PermissionMode.AllowAll && (
<p className="text-sm text-muted-foreground">
This key will have access to all instances
</p>
)}
{permissionMode === PermissionMode.PerInstance && (
<div className="space-y-2 border rounded-lg p-4">
<Label className="text-sm font-semibold">Instance Permissions</Label>
{instances.length === 0 ? (
<p className="text-sm text-muted-foreground">No instances available</p>
) : (
<div className="space-y-2">
{instances.map((instance) => (
<div key={instance.id} className="flex items-center space-x-2">
<Checkbox
id={`instance-${instance.id}`}
checked={instancePermissions[instance.id] || false}
onCheckedChange={(checked) =>
handleInstancePermissionChange(instance.id, checked as boolean)
}
disabled={loading}
/>
<Label
htmlFor={`instance-${instance.id}`}
className="font-normal cursor-pointer flex-1"
>
{instance.name}
</Label>
<span className="text-sm text-muted-foreground">Can Infer</span>
</div>
))}
</div>
)}
</div>
)}
</div>
<div className="space-y-2">
<Label htmlFor="expires-at">Expiration Date (Optional)</Label>
<Input
id="expires-at"
type="date"
value={expiresAt}
onChange={(e) => setExpiresAt(e.target.value)}
disabled={loading}
/>
{expiresAt && formatDisplayDate(expiresAt) && (
<p className="text-sm text-muted-foreground">
Expires on {formatDisplayDate(expiresAt)}
</p>
)}
</div>
<DialogFooter>
<Button
type="button"
variant="outline"
onClick={() => onOpenChange(false)}
disabled={loading}
>
Cancel
</Button>
<Button type="submit" disabled={loading}>
{loading && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
Create
</Button>
</DialogFooter>
</form>
</DialogContent>
</Dialog>
);
}
export default CreateApiKeyDialog;

View File

@@ -0,0 +1,26 @@
import React from 'react'
import KeyValueInput from './KeyValueInput'
interface EnvVarsInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
const EnvVarsInput: React.FC<EnvVarsInputProps> = (props) => {
return (
<KeyValueInput
{...props}
keyPlaceholder="Variable name"
valuePlaceholder="Variable value"
addButtonText="Add Variable"
allowEmptyValues={false}
/>
)
}
export default EnvVarsInput

View File

@@ -1,144 +0,0 @@
import React, { useState } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface EnvironmentVariablesInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
interface EnvVar {
key: string
value: string
}
const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className
}) => {
// Convert the value object to an array of key-value pairs for editing
const envVarsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [envVars, setEnvVars] = useState<EnvVar[]>(
envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
)
// Update parent component when env vars change
const updateParent = (newEnvVars: EnvVar[]) => {
// Filter out empty entries
const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
if (validVars.length === 0) {
onChange(undefined)
} else {
const envObject = validVars.reduce((acc, env) => {
acc[env.key.trim()] = env.value.trim()
return acc
}, {} as Record<string, string>)
onChange(envObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].key = newKey
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const handleValueChange = (index: number, newValue: string) => {
const newEnvVars = [...envVars]
newEnvVars[index].value = newValue
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
const addEnvVar = () => {
const newEnvVars = [...envVars, { key: '', value: '' }]
setEnvVars(newEnvVars)
}
const removeEnvVar = (index: number) => {
if (envVars.length === 1) {
// Reset to empty if it's the last one
const newEnvVars = [{ key: '', value: '' }]
setEnvVars(newEnvVars)
updateParent(newEnvVars)
} else {
const newEnvVars = envVars.filter((_, i) => i !== index)
setEnvVars(newEnvVars)
updateParent(newEnvVars)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{envVars.map((envVar, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder="Variable name"
value={envVar.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder="Variable value"
value={envVar.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removeEnvVar(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addEnvVar}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
Add Variable
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
<p className="text-xs text-muted-foreground">
Environment variables that will be passed to the backend process
</p>
</div>
)
}
export default EnvironmentVariablesInput

View File

@@ -0,0 +1,27 @@
import React from 'react'
import KeyValueInput from './KeyValueInput'
interface ExtraArgsInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
}
const ExtraArgsInput: React.FC<ExtraArgsInputProps> = (props) => {
return (
<KeyValueInput
{...props}
keyPlaceholder="Flag name (without --)"
valuePlaceholder="Value (empty for boolean flags)"
addButtonText="Add Argument"
helperText="Additional command line arguments to pass to the backend. Leave value empty for boolean flags."
allowEmptyValues={true}
/>
)
}
export default ExtraArgsInput

View File

@@ -0,0 +1,171 @@
import React, { useState, useEffect } from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Button } from '@/components/ui/button'
import { X, Plus } from 'lucide-react'
interface KeyValueInputProps {
id: string
label: string
value: Record<string, string> | undefined
onChange: (value: Record<string, string> | undefined) => void
description?: string
disabled?: boolean
className?: string
keyPlaceholder?: string
valuePlaceholder?: string
addButtonText?: string
helperText?: string
allowEmptyValues?: boolean // If true, entries with empty values are considered valid
}
interface KeyValuePair {
key: string
value: string
}
const KeyValueInput: React.FC<KeyValueInputProps> = ({
id,
label,
value,
onChange,
description,
disabled = false,
className,
keyPlaceholder = 'Key',
valuePlaceholder = 'Value',
addButtonText = 'Add Entry',
helperText,
allowEmptyValues = false
}) => {
// Convert the value object to an array of key-value pairs for editing
const pairsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
const [pairs, setPairs] = useState<KeyValuePair[]>(
pairsFromValue.length > 0 ? pairsFromValue : [{ key: '', value: '' }]
)
// Sync internal state when value prop changes
useEffect(() => {
const newPairsFromValue = value
? Object.entries(value).map(([key, val]) => ({ key, value: val }))
: []
if (newPairsFromValue.length > 0) {
setPairs(newPairsFromValue)
} else if (!value) {
// Reset to single empty row if value is explicitly undefined/null
setPairs([{ key: '', value: '' }])
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [value])
// Update parent component when pairs change
const updateParent = (newPairs: KeyValuePair[]) => {
// Filter based on validation rules
const validPairs = allowEmptyValues
? newPairs.filter(pair => pair.key.trim() !== '')
: newPairs.filter(pair => pair.key.trim() !== '' && pair.value.trim() !== '')
if (validPairs.length === 0) {
onChange(undefined)
} else {
const pairsObject = validPairs.reduce((acc, pair) => {
acc[pair.key.trim()] = pair.value.trim()
return acc
}, {} as Record<string, string>)
onChange(pairsObject)
}
}
const handleKeyChange = (index: number, newKey: string) => {
const newPairs = [...pairs]
newPairs[index].key = newKey
setPairs(newPairs)
updateParent(newPairs)
}
const handleValueChange = (index: number, newValue: string) => {
const newPairs = [...pairs]
newPairs[index].value = newValue
setPairs(newPairs)
updateParent(newPairs)
}
const addPair = () => {
const newPairs = [...pairs, { key: '', value: '' }]
setPairs(newPairs)
}
const removePair = (index: number) => {
if (pairs.length === 1) {
// Reset to empty if it's the last one
const newPairs = [{ key: '', value: '' }]
setPairs(newPairs)
updateParent(newPairs)
} else {
const newPairs = pairs.filter((_, i) => i !== index)
setPairs(newPairs)
updateParent(newPairs)
}
}
return (
<div className={`grid gap-2 ${className || ''}`}>
<Label htmlFor={id}>
{label}
</Label>
<div className="space-y-2">
{pairs.map((pair, index) => (
<div key={index} className="flex gap-2 items-center">
<Input
placeholder={keyPlaceholder}
value={pair.key}
onChange={(e) => handleKeyChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Input
placeholder={valuePlaceholder}
value={pair.value}
onChange={(e) => handleValueChange(index, e.target.value)}
disabled={disabled}
className="flex-1"
/>
<Button
type="button"
variant="outline"
size="sm"
onClick={() => removePair(index)}
disabled={disabled}
className="shrink-0"
>
<X className="h-4 w-4" />
</Button>
</div>
))}
<Button
type="button"
variant="outline"
size="sm"
onClick={addPair}
disabled={disabled}
className="w-fit"
>
<Plus className="h-4 w-4 mr-2" />
{addButtonText}
</Button>
</div>
{description && (
<p className="text-sm text-muted-foreground">{description}</p>
)}
{helperText && (
<p className="text-xs text-muted-foreground">{helperText}</p>
)}
</div>
)
}
export default KeyValueInput

View File

@@ -47,8 +47,18 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
))}
</div>
)}
{/* Extra Args - Always visible as a separate section */}
<div className="space-y-4">
<BackendFormField
key="extra_args"
fieldKey="extra_args"
value={(formData.backend_options as any)?.extra_args}
onChange={onBackendFieldChange}
/>
</div>
</div>
)
}
export default BackendConfiguration
export default BackendConfiguration

View File

@@ -6,6 +6,7 @@ import { Terminal, ChevronDown, ChevronRight } from 'lucide-react'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField'
import SelectInput from '@/components/form/SelectInput'
import ExecutionContextSection from '@/components/instance/ExecutionContextSection'
interface BackendConfigurationCardProps {
formData: CreateInstanceOptions
@@ -59,6 +60,12 @@ const BackendConfigurationCard: React.FC<BackendConfigurationCardProps> = ({
</p>
</div>
{/* Execution Context Section */}
<ExecutionContextSection
formData={formData}
onChange={onChange}
/>
{/* Basic Backend Options */}
{basicBackendFields.length > 0 && (
<div className="space-y-4">
@@ -109,6 +116,16 @@ const BackendConfigurationCard: React.FC<BackendConfigurationCardProps> = ({
)}
</div>
)}
{/* Extra Arguments - Always visible */}
<div className="space-y-4">
<BackendFormField
key="extra_args"
fieldKey="extra_args"
value={(formData.backend_options as Record<string, unknown>)?.extra_args as Record<string, string> | undefined}
onChange={onBackendFieldChange}
/>
</div>
</CardContent>
</Card>
)

View File

@@ -0,0 +1,76 @@
import React from 'react'
import { BackendType, type CreateInstanceOptions } from '@/types/instance'
import CheckboxInput from '@/components/form/CheckboxInput'
import TextInput from '@/components/form/TextInput'
import EnvVarsInput from '@/components/form/EnvVarsInput'
import { useBackendSettings } from '@/hooks/useConfig'
interface ExecutionContextSectionProps {
formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: unknown) => void
}
const ExecutionContextSection: React.FC<ExecutionContextSectionProps> = ({
formData,
onChange
}) => {
const backendSettings = useBackendSettings(formData.backend_type)
// Get placeholder for command override based on backend type and config
const getCommandPlaceholder = () => {
if (backendSettings?.command) {
return backendSettings.command
}
// Fallback placeholders if config is not loaded
switch (formData.backend_type) {
case BackendType.LLAMA_CPP:
return "llama-server"
case BackendType.VLLM:
return "vllm"
case BackendType.MLX_LM:
return "mlx_lm.server"
default:
return ""
}
}
return (
<div className="space-y-4">
<h3 className="text-md font-medium">Execution Context</h3>
{/* Docker Mode Toggle - only for backends that support Docker */}
{formData.backend_type !== BackendType.MLX_LM && (
<CheckboxInput
id="docker_enabled"
label="Enable Docker"
value={formData.docker_enabled}
onChange={(value) => onChange('docker_enabled', value)}
description="Run backend in Docker container"
/>
)}
{/* Command Override - only shown when Docker is disabled or backend is MLX */}
{(formData.backend_type === BackendType.MLX_LM || formData.docker_enabled !== true) && (
<TextInput
id="command_override"
label="Command Override"
value={formData.command_override || ''}
onChange={(value) => onChange('command_override', value)}
placeholder={getCommandPlaceholder()}
description="Custom path to backend executable (leave empty to use config default)"
/>
)}
<EnvVarsInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div>
)
}
export default ExecutionContextSection

View File

@@ -1,12 +1,11 @@
import React, { useState, useEffect } from 'react'
import type { CreateInstanceOptions } from '@/types/instance'
import { type CreateInstanceOptions } from '@/types/instance'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Label } from '@/components/ui/label'
import { Input } from '@/components/ui/input'
import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
import NumberInput from '@/components/form/NumberInput'
import CheckboxInput from '@/components/form/CheckboxInput'
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
import SelectInput from '@/components/form/SelectInput'
import { nodesApi, type NodesMap } from '@/lib/api'
@@ -131,14 +130,6 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
onChange={(value) => onChange('on_demand_start', value)}
description="Start instance only when needed"
/>
<EnvironmentVariablesInput
id="environment"
label="Environment Variables"
value={formData.environment}
onChange={(value) => onChange('environment', value)}
description="Custom environment variables for the instance"
/>
</div>
</CardContent>
</Card>

View File

@@ -0,0 +1,285 @@
import { useEffect, useState } from "react";
import { Button } from "@/components/ui/button";
import { Badge } from "@/components/ui/badge";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { Trash2, Copy, Check, X, ChevronDown, ChevronRight } from "lucide-react";
import { apiKeysApi } from "@/lib/api";
import { ApiKey, KeyPermissionResponse, PermissionMode } from "@/types/apiKey";
import CreateApiKeyDialog from "@/components/apikeys/CreateApiKeyDialog";
import { format, formatDistanceToNow } from "date-fns";
function ApiKeysSection() {
const [keys, setKeys] = useState<ApiKey[]>([]);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [expandedRowId, setExpandedRowId] = useState<number | null>(null);
const [newKeyPlainText, setNewKeyPlainText] = useState<string | null>(null);
const [createDialogOpen, setCreateDialogOpen] = useState(false);
const [copiedKey, setCopiedKey] = useState(false);
const [permissions, setPermissions] = useState<Record<number, KeyPermissionResponse[]>>({});
const [loadingPermissions, setLoadingPermissions] = useState<Record<number, boolean>>({});
useEffect(() => {
fetchKeys();
}, []);
const fetchKeys = async () => {
setLoading(true);
setError(null);
try {
const data = await apiKeysApi.list();
setKeys(data);
} catch (err) {
setError(err instanceof Error ? err.message : "Failed to load API keys");
} finally {
setLoading(false);
}
};
const fetchPermissions = async (keyId: number) => {
if (permissions[keyId]) return;
setLoadingPermissions({ ...loadingPermissions, [keyId]: true });
try {
const data = await apiKeysApi.getPermissions(keyId);
setPermissions({ ...permissions, [keyId]: data });
} catch (err) {
console.error("Failed to load permissions:", err);
} finally {
setLoadingPermissions({ ...loadingPermissions, [keyId]: false });
}
};
const handleKeyCreated = (plainTextKey: string) => {
setNewKeyPlainText(plainTextKey);
fetchKeys();
setCreateDialogOpen(false);
};
const dismissSuccessBanner = () => {
setNewKeyPlainText(null);
};
const handleCopyKey = async () => {
if (newKeyPlainText) {
await navigator.clipboard.writeText(newKeyPlainText);
setCopiedKey(true);
setTimeout(() => setCopiedKey(false), 2000);
}
};
const handleDeleteKey = async (id: number, name: string) => {
if (!confirm(`Are you sure you want to delete the key '${name}'?\n\nThis action cannot be undone.`)) {
return;
}
try {
await apiKeysApi.delete(id);
fetchKeys();
} catch (err) {
alert(err instanceof Error ? err.message : "Failed to delete API key");
}
};
const handleRowClick = (key: ApiKey) => {
if (expandedRowId === key.id) {
setExpandedRowId(null);
} else {
setExpandedRowId(key.id);
if (key.permission_mode === PermissionMode.PerInstance) {
fetchPermissions(key.id);
}
}
};
const formatDate = (timestamp: number) => {
return format(new Date(timestamp * 1000), "MMM d, yyyy");
};
const formatLastUsed = (timestamp: number | null) => {
if (!timestamp) return "Never";
return formatDistanceToNow(new Date(timestamp * 1000), { addSuffix: true });
};
const isExpired = (expiresAt: number | null) => {
if (!expiresAt) return false;
return expiresAt * 1000 < Date.now();
};
return (
<div className="space-y-6">
<div className="flex items-center justify-between">
<h3 className="text-lg font-semibold">API Keys</h3>
<Button onClick={() => setCreateDialogOpen(true)}>Create API Key</Button>
</div>
{newKeyPlainText && (
<Alert className="bg-green-50 dark:bg-green-950 border-green-200 dark:border-green-900">
<AlertDescription className="space-y-3">
<div className="flex items-start justify-between">
<div>
<p className="font-semibold text-green-900 dark:text-green-100">API key created successfully</p>
<p className="text-sm text-green-800 dark:text-green-200 mt-1">
Make sure to copy this key now. You won't be able to see it again!
</p>
</div>
<Button
variant="ghost"
size="icon"
onClick={dismissSuccessBanner}
className="h-6 w-6"
>
<X className="h-4 w-4" />
</Button>
</div>
<div className="flex items-center gap-2">
<code className="flex-1 p-3 bg-white dark:bg-gray-900 border border-green-300 dark:border-green-800 rounded font-mono text-sm break-all">
{newKeyPlainText}
</code>
<Button onClick={handleCopyKey} variant="outline" size="sm">
{copiedKey ? <Check className="h-4 w-4" /> : <Copy className="h-4 w-4" />}
</Button>
</div>
</AlertDescription>
</Alert>
)}
{error && (
<Alert variant="destructive">
<AlertDescription>{error}</AlertDescription>
</Alert>
)}
{loading ? (
<div className="space-y-2">
{[1, 2, 3].map((i) => (
<div key={i} className="h-16 bg-muted animate-pulse rounded" />
))}
</div>
) : keys.length === 0 ? (
<div className="text-center py-12 text-muted-foreground">
No API keys yet. Create your first key to get started.
</div>
) : (
<div className="border rounded-lg overflow-hidden">
<table className="w-full">
<thead className="bg-muted">
<tr>
<th className="text-left p-3 font-semibold text-sm">Name</th>
<th className="text-left p-3 font-semibold text-sm">Permissions</th>
<th className="text-left p-3 font-semibold text-sm">Created</th>
<th className="text-left p-3 font-semibold text-sm">Expires</th>
<th className="text-left p-3 font-semibold text-sm">Last Accessed</th>
<th className="text-left p-3 font-semibold text-sm">Actions</th>
</tr>
</thead>
<tbody>
{keys.map((key) => (
<>
<tr
key={key.id}
className="border-t hover:bg-muted/50 cursor-pointer"
onClick={() => handleRowClick(key)}
>
<td className="p-3">
<div className="flex items-center gap-2">
{expandedRowId === key.id ? (
<ChevronDown className="h-4 w-4 text-muted-foreground" />
) : (
<ChevronRight className="h-4 w-4 text-muted-foreground" />
)}
{key.name}
</div>
</td>
<td className="p-3">
{key.permission_mode === PermissionMode.AllowAll ? (
<Badge variant="default">Full Access</Badge>
) : (
<Badge variant="secondary">Limited Access</Badge>
)}
</td>
<td className="p-3 text-sm text-muted-foreground">{formatDate(key.created_at)}</td>
<td className="p-3">
{key.expires_at ? (
isExpired(key.expires_at) ? (
<Badge variant="destructive">Expired</Badge>
) : (
<span className="text-sm text-muted-foreground">{formatDate(key.expires_at)}</span>
)
) : (
<span className="text-sm text-muted-foreground">Never</span>
)}
</td>
<td className="p-3 text-sm text-muted-foreground">{formatLastUsed(key.last_used_at)}</td>
<td className="p-3">
<Button
variant="ghost"
size="icon"
onClick={(e) => {
e.stopPropagation();
handleDeleteKey(key.id, key.name);
}}
title="Delete key"
>
<Trash2 className="h-4 w-4 text-destructive" />
</Button>
</td>
</tr>
{expandedRowId === key.id && (
<tr key={`${key.id}-expanded`} className="border-t bg-muted/30">
<td colSpan={6} className="p-4">
{key.permission_mode === PermissionMode.AllowAll ? (
<p className="text-sm text-muted-foreground">
This key has full access to all instances
</p>
) : loadingPermissions[key.id] ? (
<p className="text-sm text-muted-foreground">Loading permissions...</p>
) : permissions[key.id] ? (
<div className="space-y-2">
<p className="text-sm font-semibold">Instance Permissions:</p>
<table className="w-full text-sm">
<thead>
<tr className="border-b">
<th className="text-left py-2">Instance Name</th>
<th className="text-left py-2">Can Infer</th>
</tr>
</thead>
<tbody>
{permissions[key.id].map((perm) => (
<tr key={perm.instance_id} className="border-b">
<td className="py-2">{perm.instance_name}</td>
<td className="py-2">
{perm.can_infer ? (
<Check className="h-4 w-4 text-green-600" />
) : (
<X className="h-4 w-4 text-red-600" />
)}
</td>
</tr>
))}
</tbody>
</table>
</div>
) : (
<p className="text-sm text-muted-foreground">No permissions data</p>
)}
</td>
</tr>
)}
</>
))}
</tbody>
</table>
</div>
)}
<CreateApiKeyDialog
open={createDialogOpen}
onOpenChange={setCreateDialogOpen}
onKeyCreated={handleKeyCreated}
/>
</div>
);
}
export default ApiKeysSection;

View File

@@ -0,0 +1,22 @@
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog";
import ApiKeysSection from "./ApiKeysSection";
interface SettingsDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
}
function SettingsDialog({ open, onOpenChange }: SettingsDialogProps) {
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-5xl max-h-[90vh] overflow-y-auto">
<DialogHeader>
<DialogTitle>Settings</DialogTitle>
</DialogHeader>
<ApiKeysSection />
</DialogContent>
</Dialog>
);
}
export default SettingsDialog;

View File

@@ -0,0 +1,59 @@
import * as React from "react"
import { cva, type VariantProps } from "class-variance-authority"
import { cn } from "@/lib/utils"
const alertVariants = cva(
"relative w-full rounded-lg border p-4",
{
variants: {
variant: {
default: "bg-background text-foreground",
destructive:
"border-destructive/50 text-destructive dark:border-destructive [&>svg]:text-destructive",
},
},
defaultVariants: {
variant: "default",
},
}
)
const Alert = React.forwardRef<
HTMLDivElement,
React.HTMLAttributes<HTMLDivElement> & VariantProps<typeof alertVariants>
>(({ className, variant, ...props }, ref) => (
<div
ref={ref}
role="alert"
className={cn(alertVariants({ variant }), className)}
{...props}
/>
))
Alert.displayName = "Alert"
const AlertTitle = React.forwardRef<
HTMLParagraphElement,
React.HTMLAttributes<HTMLHeadingElement>
>(({ className, ...props }, ref) => (
<h5
ref={ref}
className={cn("mb-1 font-medium leading-none tracking-tight", className)}
{...props}
/>
))
AlertTitle.displayName = "AlertTitle"
const AlertDescription = React.forwardRef<
HTMLParagraphElement,
React.HTMLAttributes<HTMLParagraphElement>
>(({ className, ...props }, ref) => (
<div
ref={ref}
className={cn("text-sm [&_p]:leading-relaxed", className)}
{...props}
/>
))
AlertDescription.displayName = "AlertDescription"
export { Alert, AlertTitle, AlertDescription }

View File

@@ -0,0 +1,42 @@
import * as React from "react"
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group"
import { Circle } from "lucide-react"
import { cn } from "@/lib/utils"
const RadioGroup = React.forwardRef<
React.ElementRef<typeof RadioGroupPrimitive.Root>,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root>
>(({ className, ...props }, ref) => {
return (
<RadioGroupPrimitive.Root
className={cn("grid gap-2", className)}
{...props}
ref={ref}
/>
)
})
RadioGroup.displayName = RadioGroupPrimitive.Root.displayName
const RadioGroupItem = React.forwardRef<
React.ElementRef<typeof RadioGroupPrimitive.Item>,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item>
>(({ className, ...props }, ref) => {
return (
<RadioGroupPrimitive.Item
ref={ref}
className={cn(
"aspect-square h-4 w-4 rounded-full border border-primary text-primary ring-offset-background focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
className
)}
{...props}
>
<RadioGroupPrimitive.Indicator className="flex items-center justify-center">
<Circle className="h-2.5 w-2.5 fill-current text-current" />
</RadioGroupPrimitive.Indicator>
</RadioGroupPrimitive.Item>
)
})
RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName
export { RadioGroup, RadioGroupItem }

View File

@@ -147,16 +147,3 @@ export const useAuth = (): AuthContextType => {
}
return context
}
// Helper hook for getting auth headers
export const useAuthHeaders = (): HeadersInit => {
const { apiKey, isAuthenticated } = useAuth()
if (!isAuthenticated || !apiKey) {
return {}
}
return {
'Authorization': `Bearer ${apiKey}`
}
}

View File

@@ -0,0 +1,62 @@
import { type ReactNode, createContext, useContext, useEffect, useState, useRef } from 'react'
import { serverApi } from '@/lib/api'
import type { AppConfig } from '@/types/config'
import { useAuth } from './AuthContext'
interface ConfigContextType {
config: AppConfig | null
isLoading: boolean
error: string | null
}
const ConfigContext = createContext<ConfigContextType | undefined>(undefined)
interface ConfigProviderProps {
children: ReactNode
}
export const ConfigProvider = ({ children }: ConfigProviderProps) => {
const { isAuthenticated } = useAuth()
const [config, setConfig] = useState<AppConfig | null>(null)
const [isLoading, setIsLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const loadedRef = useRef(false)
useEffect(() => {
if (!isAuthenticated || loadedRef.current) {
setIsLoading(false)
return
}
loadedRef.current = true
const loadConfig = async () => {
try {
const data = await serverApi.getConfig()
setConfig(data)
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Failed to load configuration'
setError(errorMessage)
console.error('Error loading config:', err)
} finally {
setIsLoading(false)
}
}
void loadConfig()
}, [isAuthenticated])
return (
<ConfigContext.Provider value={{ config, isLoading, error }}>
{children}
</ConfigContext.Provider>
)
}
export const useConfig = (): ConfigContextType => {
const context = useContext(ConfigContext)
if (context === undefined) {
throw new Error('useConfig must be used within a ConfigProvider')
}
return context
}

View File

@@ -123,8 +123,8 @@ function renderWithProvider(children: ReactNode) {
describe("InstancesContext", () => {
const mockInstances: Instance[] = [
{ name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
{ id: 1, name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ id: 2, name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
];
beforeEach(() => {
@@ -181,6 +181,7 @@ describe("InstancesContext", () => {
describe("Create Instance", () => {
it("creates instance and adds it to state", async () => {
const newInstance: Instance = {
id: 3,
name: "new-instance",
status: "stopped",
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
@@ -238,6 +239,7 @@ describe("InstancesContext", () => {
describe("Update Instance", () => {
it("updates instance and maintains it in state", async () => {
const updatedInstance: Instance = {
id: 1,
name: "instance1",
status: "running",
options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
@@ -408,6 +410,7 @@ describe("InstancesContext", () => {
it("maintains consistent state during multiple operations", async () => {
// Test that operations don't interfere with each other
const newInstance: Instance = {
id: 3,
name: "new-instance",
status: "stopped",
options: {},

View File

@@ -0,0 +1,51 @@
import { useConfig } from '@/contexts/ConfigContext'
// Helper hook to get instance default values from config
export const useInstanceDefaults = () => {
const { config } = useConfig()
if (!config || !config.instances) {
return null
}
return {
autoRestart: config.instances.default_auto_restart,
maxRestarts: config.instances.default_max_restarts,
restartDelay: config.instances.default_restart_delay,
onDemandStart: config.instances.default_on_demand_start,
}
}
// Helper hook to get specific backend settings by backend type
export const useBackendSettings = (backendType: string | undefined) => {
const { config } = useConfig()
if (!config || !config.backends || !backendType) {
return null
}
// Map backend type to config key
const backendKey = backendType === 'llama_cpp'
? 'llama-cpp'
: backendType === 'mlx_lm'
? 'mlx'
: backendType === 'vllm'
? 'vllm'
: null
if (!backendKey) {
return null
}
const backendConfig = config.backends[backendKey as keyof typeof config.backends]
if (!backendConfig) {
return null
}
return {
command: backendConfig.command || '',
dockerEnabled: backendConfig.docker?.enabled ?? false,
dockerImage: backendConfig.docker?.image || '',
}
}

View File

@@ -18,7 +18,7 @@ export function useInstanceHealth(instanceName: string, instanceStatus: Instance
// Trigger health check when instance status changes to active states
useEffect(() => {
if (instanceStatus === 'running' || instanceStatus === 'restarting') {
if (instanceStatus === 'running' || instanceStatus === 'restarting' || instanceStatus === 'shutting_down') {
healthService.refreshHealth(instanceName).catch(error => {
console.error(`Failed to refresh health for ${instanceName}:`, error)
})

View File

@@ -1,4 +1,6 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance";
import type { AppConfig } from "@/types/config";
import type { ApiKey, CreateKeyRequest, CreateKeyResponse, KeyPermissionResponse } from "@/types/apiKey";
import { handleApiError } from "./errorUtils";
// Adding baseURI as a prefix to support being served behind a subpath
@@ -73,6 +75,9 @@ export const serverApi = {
// GET /backends/llama-cpp/devices
getDevices: () => apiCall<string>("/backends/llama-cpp/devices", {}, "text"),
// GET /config
getConfig: () => apiCall<AppConfig>("/config"),
};
// Backend API functions
@@ -174,3 +179,29 @@ export const instancesApi = {
// GET /instances/{name}/proxy/health
getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${encodeURIComponent(name)}/proxy/health`),
};
// API Keys API functions
export const apiKeysApi = {
// GET /auth/keys
list: () => apiCall<ApiKey[]>("/auth/keys"),
// GET /auth/keys/{id}
get: (id: number) => apiCall<ApiKey>(`/auth/keys/${id}`),
// POST /auth/keys
create: (request: CreateKeyRequest) =>
apiCall<CreateKeyResponse>("/auth/keys", {
method: "POST",
body: JSON.stringify(request),
}),
// DELETE /auth/keys/{id}
delete: (id: number) =>
apiCall<void>(`/auth/keys/${id}`, {
method: "DELETE",
}),
// GET /auth/keys/{id}/permissions
getPermissions: (id: number) =>
apiCall<KeyPermissionResponse[]>(`/auth/keys/${id}/permissions`),
};

View File

@@ -5,11 +5,12 @@ type HealthCallback = (health: HealthStatus) => void
// Polling intervals based on health state (in milliseconds)
const POLLING_INTERVALS: Record<HealthState, number> = {
'starting': 5000, // 5 seconds - frequent during startup
'restarting': 5000, // 5 seconds - restart in progress
'ready': 60000, // 60 seconds - stable state
'stopped': 0, // No polling
'failed': 0, // No polling
'starting': 5000, // 5 seconds - frequent during startup
'restarting': 5000, // 5 seconds - restart in progress
'shutting_down': 3000, // 3 seconds - monitor shutdown progress
'ready': 60000, // 60 seconds - stable state
'stopped': 0, // No polling
'failed': 0, // No polling
}
class HealthService {
@@ -96,6 +97,7 @@ class HealthService {
case 'running': return 'starting' // Should not happen as we check HTTP for running
case 'failed': return 'failed'
case 'restarting': return 'restarting'
case 'shutting_down': return 'shutting_down'
}
}

View File

@@ -126,7 +126,7 @@ export function getAdvancedBackendFields(backendType?: string): string[] {
const fieldGetter = backendFieldGetters[normalizedType] || getAllLlamaCppFieldKeys
const basicConfig = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
return fieldGetter().filter(key => !(key in basicConfig))
return fieldGetter().filter(key => !(key in basicConfig) && key !== 'extra_args')
}
// Combined backend fields config for use in BackendFormField

View File

@@ -4,13 +4,16 @@ import App from './App'
import { InstancesProvider } from './contexts/InstancesContext'
import './index.css'
import { AuthProvider } from './contexts/AuthContext'
import { ConfigProvider } from './contexts/ConfigContext'
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<AuthProvider>
<InstancesProvider>
<App />
</InstancesProvider>
<ConfigProvider>
<InstancesProvider>
<App />
</InstancesProvider>
</ConfigProvider>
</AuthProvider>
</React.StrictMode>,
)

View File

@@ -167,6 +167,9 @@ export const LlamaCppBackendOptionsSchema = z.object({
fim_qwen_7b_default: z.boolean().optional(),
fim_qwen_7b_spec: z.boolean().optional(),
fim_qwen_14b_spec: z.boolean().optional(),
// Extra args
extra_args: z.record(z.string(), z.string()).optional(),
})
// Infer the TypeScript type from the schema

View File

@@ -25,6 +25,9 @@ export const MlxBackendOptionsSchema = z.object({
top_k: z.number().optional(),
min_p: z.number().optional(),
max_tokens: z.number().optional(),
// Extra args
extra_args: z.record(z.string(), z.string()).optional(),
})
// Infer the TypeScript type from the schema

View File

@@ -125,6 +125,9 @@ export const VllmBackendOptionsSchema = z.object({
override_pooling_config: z.string().optional(),
override_neuron_config: z.string().optional(),
override_kv_cache_align_size: z.number().optional(),
// Extra args
extra_args: z.record(z.string(), z.string()).optional(),
})
// Infer the TypeScript type from the schema

View File

@@ -36,6 +36,10 @@ export const CreateInstanceOptionsSchema = z.object({
// Environment variables
environment: z.record(z.string(), z.string()).optional(),
// Execution context overrides
docker_enabled: z.boolean().optional(),
command_override: z.string().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
backend_options: BackendOptionsSchema.optional(),

38
webui/src/types/apiKey.ts Normal file
View File

@@ -0,0 +1,38 @@
export enum PermissionMode {
AllowAll = "allow_all",
PerInstance = "per_instance"
}
export interface ApiKey {
id: number
name: string
user_id: string
permission_mode: PermissionMode
expires_at: number | null
enabled: boolean
created_at: number
updated_at: number
last_used_at: number | null
}
export interface CreateKeyRequest {
Name: string
PermissionMode: PermissionMode
ExpiresAt?: number
InstancePermissions: InstancePermission[]
}
export interface InstancePermission {
InstanceID: number
CanInfer: boolean
}
export interface CreateKeyResponse extends ApiKey {
key: string
}
export interface KeyPermissionResponse {
instance_id: number
instance_name: string
can_infer: boolean
}

78
webui/src/types/config.ts Normal file
View File

@@ -0,0 +1,78 @@
export interface BackendSettings {
command: string
args: string[]
environment?: Record<string, string>
docker?: DockerSettings
response_headers?: Record<string, string>
}
export interface DockerSettings {
enabled: boolean
image: string
args: string[]
environment?: Record<string, string>
}
export interface BackendConfig {
'llama-cpp': BackendSettings
vllm: BackendSettings
mlx: BackendSettings
}
export interface ServerConfig {
host: string
port: number
allowed_origins: string[]
allowed_headers: string[]
enable_swagger: boolean
response_headers?: Record<string, string>
}
export interface InstancesConfig {
port_range: [number, number]
configs_dir: string
logs_dir: string
auto_create_dirs: boolean
max_instances: number
max_running_instances: number
enable_lru_eviction: boolean
default_auto_restart: boolean
default_max_restarts: number
default_restart_delay: number
default_on_demand_start: boolean
on_demand_start_timeout: number
timeout_check_interval: number
}
export interface DatabaseConfig {
path: string
max_open_connections: number
max_idle_connections: number
connection_max_lifetime: number
}
export interface AuthConfig {
require_inference_auth: boolean
inference_keys: string[] // Will be empty in sanitized response
require_management_auth: boolean
management_keys: string[] // Will be empty in sanitized response
}
export interface NodeConfig {
address: string
api_key: string // Will be empty in sanitized response
}
export interface AppConfig {
server: ServerConfig
backends: BackendConfig
instances: InstancesConfig
database: DatabaseConfig
auth: AuthConfig
local_node: string
nodes: Record<string, NodeConfig>
data_dir: string
version?: string
commit_hash?: string
build_time?: string
}

View File

@@ -11,9 +11,9 @@ export const BackendType = {
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
export type InstanceStatus = 'running' | 'stopped' | 'failed' | 'restarting'
export type InstanceStatus = 'running' | 'stopped' | 'failed' | 'restarting' | 'shutting_down'
export type HealthState = 'stopped' | 'starting' | 'ready' | 'failed' | 'restarting'
export type HealthState = 'stopped' | 'starting' | 'ready' | 'failed' | 'restarting' | 'shutting_down'
export interface HealthStatus {
state: HealthState
@@ -24,8 +24,8 @@ export interface HealthStatus {
}
export interface Instance {
id: number;
name: string;
status: InstanceStatus;
options?: CreateInstanceOptions;
docker_enabled?: boolean; // indicates backend is running via Docker
}

View File

@@ -19,7 +19,7 @@
"paths": {
"@/*": ["./src/*"]
},
"types": ["vite/client"]
"types": ["vite/client", "@types/node"]
},
"include": ["src", "src/vite-env.d.ts"],
"references": [{ "path": "./tsconfig.node.json" }]