52 Commits

Author SHA1 Message Date
70dc635e4c Merge pull request #119 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-6a651e08cd
chore: bump lucide-react from 0.561.0 to 0.562.0 in /webui in the npm-production group
2025-12-23 01:25:38 +01:00
dependabot[bot]
e0d342f31f chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.561.0 to 0.562.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.562.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.562.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-22 21:33:33 +00:00
9cea295305 Merge pull request #118 from lordmathis/chore/remove-deprecated
chore: Remove deprecated code
2025-12-22 21:53:19 +01:00
1f78d3f780 Remove deprecated code 2025-12-22 21:49:37 +01:00
e7baeb9ece Merge pull request #117 from lordmathis/docs/llama-router
docs: Document llama.cpp router mode
2025-12-22 21:23:58 +01:00
3cec850e74 Document llama.cpp router mode 2025-12-22 21:20:42 +01:00
67098d7801 Merge pull request #113 from lordmathis/feat/llama-cpp-router
feat: Integrate native llama.cpp router
2025-12-22 20:55:46 +01:00
3c95e76137 Poll models during loading 2025-12-22 20:38:58 +01:00
761cdfe7d8 Improve InstanceCard to display models for llama.cpp instances 2025-12-22 20:38:58 +01:00
99eba3daa9 Update test client 2025-12-22 20:38:58 +01:00
d9d7b6d814 Allow empty backend options 2025-12-22 20:38:58 +01:00
5062c882de Update dependencies 2025-12-22 20:38:58 +01:00
ee122d669c Support llama.cpp router mode for openai endpoints 2025-12-22 20:38:58 +01:00
41d904475c Remove model registry 2025-12-22 20:38:58 +01:00
7f5292412c Implement model management for llama.cpp instances 2025-12-22 20:38:58 +01:00
ec84a7d331 Merge pull request #112 from lordmathis/fix/auth-middleware
fix: Remove duplicate auth middleware init
2025-12-17 19:09:12 +01:00
b45219a01e Reuse handler auth middleware 2025-12-17 19:06:04 +01:00
463bb561e1 Merge pull request #111 from lordmathis/fix/cgo-enabled-build
fix: Add multiplatform CGO_ENABLED=1 build
2025-12-17 14:38:27 +01:00
ebdb9143c0 Remove separate windows build step 2025-12-17 14:32:10 +01:00
4269d04381 Update release.yaml 2025-12-17 14:25:50 +01:00
c734329a62 Merge pull request #109 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-62cd70be13
chore: bump the npm-production group in /webui with 2 updates
2025-12-15 23:50:27 +01:00
15fcf7c377 Merge pull request #110 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-8fdc2c429d
chore: bump the npm-development group in /webui with 3 updates
2025-12-15 23:48:42 +01:00
dependabot[bot]
795f530956 chore: bump the npm-development group in /webui with 3 updates
Bumps the npm-development group in /webui with 3 updates: [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node), [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) and [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite).


Updates `@types/node` from 24.10.1 to 25.0.2
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node)

Updates `typescript-eslint` from 8.49.0 to 8.50.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.50.0/packages/typescript-eslint)

Updates `vite` from 7.2.2 to 7.3.0
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/v7.3.0/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v7.3.0/packages/vite)

---
updated-dependencies:
- dependency-name: "@types/node"
  dependency-version: 25.0.2
  dependency-type: direct:development
  update-type: version-update:semver-major
  dependency-group: npm-development
- dependency-name: typescript-eslint
  dependency-version: 8.50.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: vite
  dependency-version: 7.3.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-15 22:06:41 +00:00
dependabot[bot]
4507358310 chore: bump the npm-production group in /webui with 2 updates
Bumps the npm-production group in /webui with 2 updates: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react) and [zod](https://github.com/colinhacks/zod).


Updates `lucide-react` from 0.560.0 to 0.561.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.561.0/packages/lucide-react)

Updates `zod` from 4.1.12 to 4.2.0
- [Release notes](https://github.com/colinhacks/zod/releases)
- [Commits](https://github.com/colinhacks/zod/compare/v4.1.12...v4.2.0)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.561.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
- dependency-name: zod
  dependency-version: 4.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-15 22:06:01 +00:00
f3c02b4939 Merge pull request #108 from lordmathis/refactor/config
refactor: Split large config file
2025-12-13 14:27:25 +01:00
0a85409deb Split large config file 2025-12-13 13:50:59 +01:00
22fd295250 Merge pull request #107 from lordmathis/feat/logrotate
feat: Add log rotation for instance logs
2025-12-13 13:30:20 +01:00
c0cecdd377 Clean up logger 2025-12-13 13:18:30 +01:00
4d57b37a5d Remove verbose _mb suffix 2025-12-13 13:06:22 +01:00
c13b71d07f Document new log rotation config options 2025-12-13 13:02:22 +01:00
406a711682 Move LogRotationConfig to logger package 2025-12-13 12:48:50 +01:00
0b3d654945 Simplify logging config 2025-12-13 12:48:50 +01:00
e2a49402d6 Implement instance log rotation 2025-12-13 12:48:50 +01:00
48836c9c12 Merge pull request #105 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-3fe24f4500
chore: bump lucide-react from 0.555.0 to 0.556.0 in /webui in the npm-production group
2025-12-12 10:43:12 +01:00
4200b8eed9 Merge pull request #104 from lordmathis/dependabot/go_modules/go-dependencies-f180a085e8
chore: bump golang.org/x/crypto from 0.45.0 to 0.46.0 in the go-dependencies group
2025-12-11 18:51:40 +01:00
dependabot[bot]
9a7ae87df8 chore: bump lucide-react in /webui in the npm-production group
Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react).


Updates `lucide-react` from 0.555.0 to 0.556.0
- [Release notes](https://github.com/lucide-icons/lucide/releases)
- [Commits](https://github.com/lucide-icons/lucide/commits/0.556.0/packages/lucide-react)

---
updated-dependencies:
- dependency-name: lucide-react
  dependency-version: 0.556.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: npm-production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-11 17:17:18 +00:00
e54c495528 Merge pull request #106 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-808d3127cd
chore: bump the npm-development group in /webui with 2 updates
2025-12-11 18:12:15 +01:00
dependabot[bot]
83006968ca chore: bump the npm-development group in /webui with 2 updates
Bumps the npm-development group in /webui with 2 updates: [jsdom](https://github.com/jsdom/jsdom) and [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint).


Updates `jsdom` from 27.2.0 to 27.3.0
- [Release notes](https://github.com/jsdom/jsdom/releases)
- [Changelog](https://github.com/jsdom/jsdom/blob/main/Changelog.md)
- [Commits](https://github.com/jsdom/jsdom/compare/27.2.0...27.3.0)

Updates `typescript-eslint` from 8.48.0 to 8.49.0
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.49.0/packages/typescript-eslint)

---
updated-dependencies:
- dependency-name: jsdom
  dependency-version: 27.3.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
- dependency-name: typescript-eslint
  dependency-version: 8.49.0
  dependency-type: direct:development
  update-type: version-update:semver-minor
  dependency-group: npm-development
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-08 21:31:36 +00:00
dependabot[bot]
c8d9c6907c chore: bump golang.org/x/crypto in the go-dependencies group
Bumps the go-dependencies group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.45.0 to 0.46.0
- [Commits](https://github.com/golang/crypto/compare/v0.45.0...v0.46.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.46.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: go-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-08 21:30:38 +00:00
c776785f30 Merge pull request #103 from lordmathis/docs/api-keys
docs: Improve API key management documentation
2025-12-08 19:23:39 +01:00
1cfbd42eda Update swagger docs 2025-12-08 19:16:02 +01:00
8fee27054d Update docs for API key management 2025-12-08 19:15:42 +01:00
fd33837026 Merge pull request #101 from lordmathis/feat/api-key-mgmt
feat: Add inference api key management
2025-12-08 18:49:49 +01:00
3c4ebf7403 Addsimple python LLM test client 2025-12-08 18:44:28 +01:00
b7a0f7e3d8 Unhide migrated directory 2025-12-08 18:08:22 +01:00
d5b68a900f Add .migrated directory for migrated json files 2025-12-08 18:06:15 +01:00
00cd8c8877 Update shadcn componments 2025-12-07 18:50:52 +01:00
4b1b12a7a8 Fix lint errors 2025-12-07 18:28:01 +01:00
0ce9016488 Fix some lint issues 2025-12-07 17:40:09 +01:00
1acbcafe1c Add DialogDescription to SettingsDialog 2025-12-07 17:26:38 +01:00
00a502a268 Implement LocalStorageMock for testing 2025-12-07 17:16:40 +01:00
54fe0f7421 Fix eslint issues 2025-12-07 16:16:13 +01:00
59 changed files with 2884 additions and 1417 deletions

View File

@@ -45,15 +45,23 @@ jobs:
build: build:
name: Build Binaries name: Build Binaries
needs: build-webui needs: build-webui
runs-on: ubuntu-latest runs-on: ${{ matrix.runner }}
strategy: strategy:
matrix: matrix:
goos: [linux, windows, darwin] include:
goarch: [amd64, arm64] - goos: linux
exclude: goarch: amd64
# Windows ARM64 support is limited runner: ubuntu-latest
- goos: windows - goos: linux
goarch: arm64 goarch: arm64
runner: ubuntu-latest
cc: aarch64-linux-gnu-gcc
- goos: darwin
goarch: arm64
runner: macos-latest
- goos: windows
goarch: amd64
runner: windows-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -70,11 +78,19 @@ jobs:
name: webui-dist name: webui-dist
path: webui/dist/ path: webui/dist/
- name: Install cross-compilation tools (Linux ARM64 only)
if: matrix.cc != ''
run: |
sudo apt-get update
sudo apt-get install -y gcc-aarch64-linux-gnu
- name: Build binary - name: Build binary
env: env:
GOOS: ${{ matrix.goos }} GOOS: ${{ matrix.goos }}
GOARCH: ${{ matrix.goarch }} GOARCH: ${{ matrix.goarch }}
CGO_ENABLED: 0 CGO_ENABLED: 1
CC: ${{ matrix.cc }}
shell: bash
run: | run: |
# Set binary extension for Windows # Set binary extension for Windows
BINARY_NAME="llamactl" BINARY_NAME="llamactl"
@@ -91,8 +107,10 @@ jobs:
ARCHIVE_OS="macos" ARCHIVE_OS="macos"
fi fi
ARCHIVE_NAME="llamactl-${{ github.ref_name }}-${ARCHIVE_OS}-${{ matrix.goarch }}" ARCHIVE_NAME="llamactl-${{ github.ref_name }}-${ARCHIVE_OS}-${{ matrix.goarch }}"
if [ "${{ matrix.goos }}" = "windows" ]; then if [ "${{ matrix.goos }}" = "windows" ]; then
zip "${ARCHIVE_NAME}.zip" "${BINARY_NAME}" # Use 7z on Windows (pre-installed)
7z a "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
echo "ASSET_PATH=${ARCHIVE_NAME}.zip" >> $GITHUB_ENV echo "ASSET_PATH=${ARCHIVE_NAME}.zip" >> $GITHUB_ENV
else else
tar -czf "${ARCHIVE_NAME}.tar.gz" "${BINARY_NAME}" tar -czf "${ARCHIVE_NAME}.tar.gz" "${BINARY_NAME}"
@@ -179,4 +197,4 @@ jobs:
with: with:
files: assets/checksums.txt files: assets/checksums.txt
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -12,6 +12,7 @@
**🚀 Easy Model Management** **🚀 Easy Model Management**
- **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality) - **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality)
- **Dynamic Multi-Model Instances**: llama.cpp router mode - serve multiple models from a single instance with on-demand loading
- **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits - **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits
- **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs - **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs
@@ -183,7 +184,6 @@ data_dir: ~/.local/share/llamactl # Main data directory (database, instances, l
instances: instances:
port_range: [8000, 9000] # Port range for instances port_range: [8000, 9000] # Port range for instances
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory (platform dependent)
logs_dir: ~/.local/share/llamactl/logs # Logs directory (platform dependent) logs_dir: ~/.local/share/llamactl/logs # Logs directory (platform dependent)
auto_create_dirs: true # Auto-create data/config/logs dirs if missing auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited) max_instances: -1 # Max instances (-1 = unlimited)
@@ -195,6 +195,9 @@ instances:
default_on_demand_start: true # Default on-demand start setting default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes timeout_check_interval: 5 # Idle instance timeout check in minutes
log_rotation_enabled: true # Enable log rotation (default: true)
log_rotation_max_size: 100 # Max log file size in MB before rotation (default: 100)
log_rotation_compress: false # Compress rotated log files (default: false)
database: database:
path: ~/.local/share/llamactl/llamactl.db # Database file path (platform dependent) path: ~/.local/share/llamactl/llamactl.db # Database file path (platform dependent)
@@ -203,8 +206,7 @@ database:
connection_max_lifetime: 5m # Connection max lifetime connection_max_lifetime: 5m # Connection max lifetime
auth: auth:
require_inference_auth: true # Require auth for inference endpoints require_inference_auth: true # Require auth for inference endpoints, API keys are created in web UI
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints management_keys: [] # Keys for management endpoints
``` ```

View File

@@ -57,11 +57,6 @@ func main() {
log.Printf("Error creating data directory %s: %v\nData persistence may not be available.", cfg.DataDir, err) log.Printf("Error creating data directory %s: %v\nData persistence may not be available.", cfg.DataDir, err)
} }
// Create instances directory
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
log.Printf("Error creating instances directory %s: %v\nPersistence will not be available.", cfg.Instances.InstancesDir, err)
}
// Create logs directory // Create logs directory
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil { if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err) log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err)
@@ -84,11 +79,6 @@ func main() {
log.Fatalf("Failed to run database migrations: %v", err) log.Fatalf("Failed to run database migrations: %v", err)
} }
// Migrate from JSON files if needed (one-time migration)
if err := migrateFromJSON(&cfg, db); err != nil {
log.Printf("Warning: Failed to migrate from JSON: %v", err)
}
// Initialize the instance manager with dependency injection // Initialize the instance manager with dependency injection
instanceManager := manager.New(&cfg, db) instanceManager := manager.New(&cfg, db)

View File

@@ -1,82 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/database"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
)
// migrateFromJSON migrates instances from JSON files to SQLite database
// This is a one-time migration that runs on first startup with existing JSON files.
func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
instancesDir := cfg.Instances.InstancesDir
if instancesDir == "" {
return nil // No instances directory configured
}
// Check if instances directory exists
if _, err := os.Stat(instancesDir); os.IsNotExist(err) {
return nil // No instances directory, nothing to migrate
}
// Check if database is empty (no instances)
existing, err := db.LoadAll()
if err != nil {
return fmt.Errorf("failed to check existing instances: %w", err)
}
if len(existing) > 0 {
return nil // Database already has instances, skip migration
}
// Find all JSON files
files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
if err != nil {
return fmt.Errorf("failed to list instance files: %w", err)
}
if len(files) == 0 {
return nil // No JSON files to migrate
}
log.Printf("Migrating %d instances from JSON to SQLite...", len(files))
// Migrate each JSON file
var migrated int
for _, file := range files {
if err := migrateJSONFile(file, db); err != nil {
log.Printf("Failed to migrate %s: %v", file, err)
continue
}
migrated++
}
log.Printf("Successfully migrated %d/%d instances to SQLite", migrated, len(files))
return nil
}
// migrateJSONFile migrates a single JSON file to the database
func migrateJSONFile(filename string, db database.InstanceStore) error {
data, err := os.ReadFile(filename)
if err != nil {
return fmt.Errorf("failed to read file: %w", err)
}
var inst instance.Instance
if err := json.Unmarshal(data, &inst); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
if err := db.Save(&inst); err != nil {
return fmt.Errorf("failed to save instance to database: %w", err)
}
log.Printf("Migrated instance %s from JSON to SQLite", inst.Name)
return nil
}

View File

@@ -74,7 +74,6 @@ database:
auth: auth:
require_inference_auth: true # Require auth for inference endpoints require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints management_keys: [] # Keys for management endpoints
@@ -231,6 +230,9 @@ instances:
default_on_demand_start: true # Default on-demand start setting default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes timeout_check_interval: 5 # Default instance timeout check interval in minutes
log_rotation_enabled: true # Enable log rotation (default: true)
log_rotation_max_size: 100 # Max log file size in MB before rotation (default: 100)
log_rotation_compress: false # Compress rotated log files (default: false)
``` ```
**Environment Variables:** **Environment Variables:**
@@ -247,6 +249,9 @@ instances:
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false) - `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds - `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
- `LLAMACTL_LOG_ROTATION_ENABLED` - Enable log rotation (true/false)
- `LLAMACTL_LOG_ROTATION_MAX_SIZE` - Max log file size in MB
- `LLAMACTL_LOG_ROTATION_COMPRESS` - Compress rotated logs (true/false)
### Database Configuration ### Database Configuration
@@ -266,17 +271,33 @@ database:
### Authentication Configuration ### Authentication Configuration
llamactl supports two types of authentication:
- **Management API Keys**: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables.
- **Inference API Keys**: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database.
```yaml ```yaml
auth: auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true) require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true) require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys management_keys: [] # List of valid management API keys
``` ```
**Managing Inference API Keys:**
Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys:
1. Open the web UI and log in with a management API key
2. Navigate to **Settings → API Keys**
3. Click **Create API Key**
4. Configure the key:
- **Name**: A descriptive name for the key
- **Expiration**: Optional expiration date
- **Permissions**: Grant access to all instances or specific instances only
5. Copy the generated key - it won't be shown again
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false) - `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false) - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys

View File

@@ -999,6 +999,156 @@ const docTemplate = `{
} }
} }
}, },
"/api/v1/llama-cpp/{name}/models": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of models available in the specified llama.cpp instance",
"tags": [
"Llama.cpp"
],
"summary": "List models in a llama.cpp instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Models list response",
"schema": {
"type": "object",
"additionalProperties": true
}
},
"400": {
"description": "Invalid instance",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/llama-cpp/{name}/models/{model}/load": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Loads the specified model in the given llama.cpp instance",
"tags": [
"Llama.cpp"
],
"summary": "Load a model in a llama.cpp instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Model Name",
"name": "model",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"400": {
"description": "Invalid request",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/llama-cpp/{name}/models/{model}/unload": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Unloads the specified model in the given llama.cpp instance",
"tags": [
"Llama.cpp"
],
"summary": "Unload a model in a llama.cpp instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Model Name",
"name": "model",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"400": {
"description": "Invalid request",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/nodes": { "/api/v1/nodes": {
"get": { "get": {
"security": [ "security": [
@@ -1788,13 +1938,6 @@ const docTemplate = `{
"config.AuthConfig": { "config.AuthConfig": {
"type": "object", "type": "object",
"properties": { "properties": {
"inference_keys": {
"description": "List of keys for OpenAI compatible inference endpoints",
"type": "array",
"items": {
"type": "string"
}
},
"management_keys": { "management_keys": {
"description": "List of keys for management endpoints", "description": "List of keys for management endpoints",
"type": "array", "type": "array",
@@ -1905,10 +2048,6 @@ const docTemplate = `{
"description": "Automatically create the data directory if it doesn't exist", "description": "Automatically create the data directory if it doesn't exist",
"type": "boolean" "type": "boolean"
}, },
"configs_dir": {
"description": "Instance config directory override (relative to data_dir if not absolute)",
"type": "string"
},
"default_auto_restart": { "default_auto_restart": {
"description": "Default auto-restart setting for new instances", "description": "Default auto-restart setting for new instances",
"type": "boolean" "type": "boolean"
@@ -1929,6 +2068,21 @@ const docTemplate = `{
"description": "Enable LRU eviction for instance logs", "description": "Enable LRU eviction for instance logs",
"type": "boolean" "type": "boolean"
}, },
"logRotationCompress": {
"description": "Whether to compress rotated log files",
"type": "boolean",
"default": false
},
"logRotationEnabled": {
"description": "Log rotation enabled",
"type": "boolean",
"default": true
},
"logRotationMaxSize": {
"description": "Maximum log file size in MB before rotation",
"type": "integer",
"default": 100
},
"logs_dir": { "logs_dir": {
"description": "Logs directory override (relative to data_dir if not absolute)", "description": "Logs directory override (relative to data_dir if not absolute)",
"type": "string" "type": "string"
@@ -2063,20 +2217,19 @@ const docTemplate = `{
"server.CreateKeyRequest": { "server.CreateKeyRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"expiresAt": { "expires_at": {
"type": "integer", "type": "integer"
"format": "int64"
}, },
"instancePermissions": { "instance_ids": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/server.InstancePermission" "type": "integer"
} }
}, },
"name": { "name": {
"type": "string" "type": "string"
}, },
"permissionMode": { "permission_mode": {
"$ref": "#/definitions/auth.PermissionMode" "$ref": "#/definitions/auth.PermissionMode"
} }
} }
@@ -2087,9 +2240,6 @@ const docTemplate = `{
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },
@@ -2116,29 +2266,9 @@ const docTemplate = `{
} }
} }
}, },
"server.InstancePermission": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
}
}
},
"server.KeyPermissionResponse": { "server.KeyPermissionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": { "instance_id": {
"type": "integer" "type": "integer"
}, },
@@ -2153,9 +2283,6 @@ const docTemplate = `{
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },

View File

@@ -222,6 +222,100 @@ curl -X DELETE http://localhost:8080/api/v1/instances/{name} \
-H "Authorization: Bearer <token>" -H "Authorization: Bearer <token>"
``` ```
## Multi-Model llama.cpp Instances
!!! info "llama.cpp Router Mode"
llama.cpp instances support [**router mode**](https://huggingface.co/blog/ggml-org/model-management-in-llamacpp), allowing a single instance to serve multiple models dynamically. Models are loaded on-demand from the llama.cpp cache without restarting the instance.
### Creating a Multi-Model Instance
**Via Web UI**
1. Click **"Create Instance"**
2. Select **Backend Type**: "Llama Server"
3. Leave **Backend Options** empty `{}` or omit the model field
4. Create the instance
**Via API**
```bash
# Create instance without specifying a model (router mode)
curl -X POST http://localhost:8080/api/v1/instances/my-router \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {},
"nodes": ["main"]
}'
```
### Managing Models
**Via Web UI**
1. Start the router mode instance
2. Instance card displays a badge showing loaded/total models (e.g., "2/5 models")
3. Click the **"Models"** button on the instance card
4. Models dialog opens showing:
- All available models from llama.cpp instance
- Status indicator (loaded, loading, or unloaded)
- Load/Unload buttons for each model
5. Click **"Load"** to load a model into memory
6. Click **"Unload"** to free up memory
**Via API**
```bash
# List available models
curl http://localhost:8080/api/v1/llama-cpp/my-router/models \
-H "Authorization: Bearer <token>"
# Load a model
curl -X POST http://localhost:8080/api/v1/llama-cpp/my-router/models/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf/load \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{"model": "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"}'
# Unload a model
curl -X POST http://localhost:8080/api/v1/llama-cpp/my-router/models/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf/unload \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <token>" \
-d '{"model": "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"}'
```
### Using Multi-Model Instances
When making inference requests to a multi-model instance, specify the model using the format `instance_name/model_name`:
```bash
# OpenAI-compatible chat completion with specific model
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <inference-key>" \
-d '{
"model": "my-router/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf",
"messages": [
{"role": "user", "content": "Hello!"}
]
}'
# List all available models (includes multi-model instances)
curl http://localhost:8080/v1/models \
-H "Authorization: Bearer <inference-key>"
```
The response from `/v1/models` will include each model from multi-model instances as separate entries in the format `instance_name/model_name`.
### Model Discovery
Models are automatically discovered from the llama.cpp cache directory. The default cache locations are:
- **Linux/macOS**: `~/.cache/llama.cpp/`
- **Windows**: `%LOCALAPPDATA%\llama.cpp\`
Place your GGUF model files in the cache directory, and they will appear in the models list when you start a router mode instance.
## Instance Proxy ## Instance Proxy
Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM). Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).

View File

@@ -17,10 +17,10 @@ Before you start, let's clarify a few key terms:
Llamactl uses two types of API keys: Llamactl uses two types of API keys:
- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances). - **Management API Key**: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal.
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). - **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). These are created and managed via the web UI.
By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide. By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the [Configuration](configuration.md) guide.
## Start Llamactl ## Start Llamactl
@@ -38,24 +38,17 @@ llamactl
sk-management-... sk-management-...
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ INFERENCE AUTHENTICATION REQUIRED
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
🔑 Generated Inference API Key:
sk-inference-...
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
⚠️ IMPORTANT ⚠️ IMPORTANT
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
• These keys are auto-generated and will change on restart • This key is auto-generated and will change on restart
• For production, add explicit keys to your configuration • For production, add explicit management_keys to your configuration
• Copy these keys before they disappear from the terminal • Copy this key before it disappears from the terminal
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Llamactl server listening on 0.0.0.0:8080 Llamactl server listening on 0.0.0.0:8080
``` ```
Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests. Copy the **Management API Key** from the terminal - you'll need it to access the web UI.
By default, Llamactl will start on `http://localhost:8080`. By default, Llamactl will start on `http://localhost:8080`.
@@ -82,7 +75,7 @@ You should see the Llamactl web interface.
- **Additional Options**: Backend-specific parameters - **Additional Options**: Backend-specific parameters
!!! tip "Auto-Assignment" !!! tip "Auto-Assignment"
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values. Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values.
!!! note "Remote Node Deployment" !!! note "Remote Node Deployment"
If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes. If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
@@ -98,6 +91,24 @@ Once created, you can:
- **View logs** by clicking the logs button - **View logs** by clicking the logs button
- **Stop** the instance when needed - **Stop** the instance when needed
## Create an Inference API Key
To make inference requests to your instances, you'll need an inference API key:
1. In the web UI, click the **Settings** icon (gear icon in the top-right)
2. Navigate to the **API Keys** tab
3. Click **Create API Key**
4. Configure your key:
- **Name**: Give it a descriptive name (e.g., "Production Key", "Development Key")
- **Expiration**: Optionally set an expiration date for the key
- **Permissions**: Choose whether the key can access all instances or only specific ones
5. Click **Create**
6. **Copy the generated key** - it will only be shown once!
The key will look like: `llamactl-...`
You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances).
## Example Configurations ## Example Configurations
Here are basic example configurations for each backend: Here are basic example configurations for each backend:
@@ -246,7 +257,7 @@ print(response.choices[0].message.content)
``` ```
!!! note "API Key" !!! note "API Key"
If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup. If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key you created via the web UI (Settings → API Keys).
### List Available Models ### List Available Models

View File

@@ -992,6 +992,156 @@
} }
} }
}, },
"/api/v1/llama-cpp/{name}/models": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of models available in the specified llama.cpp instance",
"tags": [
"Llama.cpp"
],
"summary": "List models in a llama.cpp instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Models list response",
"schema": {
"type": "object",
"additionalProperties": true
}
},
"400": {
"description": "Invalid instance",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/llama-cpp/{name}/models/{model}/load": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Loads the specified model in the given llama.cpp instance",
"tags": [
"Llama.cpp"
],
"summary": "Load a model in a llama.cpp instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Model Name",
"name": "model",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"400": {
"description": "Invalid request",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/llama-cpp/{name}/models/{model}/unload": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Unloads the specified model in the given llama.cpp instance",
"tags": [
"Llama.cpp"
],
"summary": "Unload a model in a llama.cpp instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Model Name",
"name": "model",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Success message",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
},
"400": {
"description": "Invalid request",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/api/v1/nodes": { "/api/v1/nodes": {
"get": { "get": {
"security": [ "security": [
@@ -1781,13 +1931,6 @@
"config.AuthConfig": { "config.AuthConfig": {
"type": "object", "type": "object",
"properties": { "properties": {
"inference_keys": {
"description": "List of keys for OpenAI compatible inference endpoints",
"type": "array",
"items": {
"type": "string"
}
},
"management_keys": { "management_keys": {
"description": "List of keys for management endpoints", "description": "List of keys for management endpoints",
"type": "array", "type": "array",
@@ -1898,10 +2041,6 @@
"description": "Automatically create the data directory if it doesn't exist", "description": "Automatically create the data directory if it doesn't exist",
"type": "boolean" "type": "boolean"
}, },
"configs_dir": {
"description": "Instance config directory override (relative to data_dir if not absolute)",
"type": "string"
},
"default_auto_restart": { "default_auto_restart": {
"description": "Default auto-restart setting for new instances", "description": "Default auto-restart setting for new instances",
"type": "boolean" "type": "boolean"
@@ -1922,6 +2061,21 @@
"description": "Enable LRU eviction for instance logs", "description": "Enable LRU eviction for instance logs",
"type": "boolean" "type": "boolean"
}, },
"logRotationCompress": {
"description": "Whether to compress rotated log files",
"type": "boolean",
"default": false
},
"logRotationEnabled": {
"description": "Log rotation enabled",
"type": "boolean",
"default": true
},
"logRotationMaxSize": {
"description": "Maximum log file size in MB before rotation",
"type": "integer",
"default": 100
},
"logs_dir": { "logs_dir": {
"description": "Logs directory override (relative to data_dir if not absolute)", "description": "Logs directory override (relative to data_dir if not absolute)",
"type": "string" "type": "string"
@@ -2056,20 +2210,19 @@
"server.CreateKeyRequest": { "server.CreateKeyRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"expiresAt": { "expires_at": {
"type": "integer", "type": "integer"
"format": "int64"
}, },
"instancePermissions": { "instance_ids": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/server.InstancePermission" "type": "integer"
} }
}, },
"name": { "name": {
"type": "string" "type": "string"
}, },
"permissionMode": { "permission_mode": {
"$ref": "#/definitions/auth.PermissionMode" "$ref": "#/definitions/auth.PermissionMode"
} }
} }
@@ -2080,9 +2233,6 @@
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },
@@ -2109,29 +2259,9 @@
} }
} }
}, },
"server.InstancePermission": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
}
}
},
"server.KeyPermissionResponse": { "server.KeyPermissionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": { "instance_id": {
"type": "integer" "type": "integer"
}, },
@@ -2146,9 +2276,6 @@
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },

View File

@@ -39,11 +39,6 @@ definitions:
type: object type: object
config.AuthConfig: config.AuthConfig:
properties: properties:
inference_keys:
description: List of keys for OpenAI compatible inference endpoints
items:
type: string
type: array
management_keys: management_keys:
description: List of keys for management endpoints description: List of keys for management endpoints
items: items:
@@ -118,10 +113,6 @@ definitions:
auto_create_dirs: auto_create_dirs:
description: Automatically create the data directory if it doesn't exist description: Automatically create the data directory if it doesn't exist
type: boolean type: boolean
configs_dir:
description: Instance config directory override (relative to data_dir if not
absolute)
type: string
default_auto_restart: default_auto_restart:
description: Default auto-restart setting for new instances description: Default auto-restart setting for new instances
type: boolean type: boolean
@@ -137,6 +128,18 @@ definitions:
enable_lru_eviction: enable_lru_eviction:
description: Enable LRU eviction for instance logs description: Enable LRU eviction for instance logs
type: boolean type: boolean
logRotationCompress:
default: false
description: Whether to compress rotated log files
type: boolean
logRotationEnabled:
default: true
description: Log rotation enabled
type: boolean
logRotationMaxSize:
default: 100
description: Maximum log file size in MB before rotation
type: integer
logs_dir: logs_dir:
description: Logs directory override (relative to data_dir if not absolute) description: Logs directory override (relative to data_dir if not absolute)
type: string type: string
@@ -232,24 +235,21 @@ definitions:
type: object type: object
server.CreateKeyRequest: server.CreateKeyRequest:
properties: properties:
expiresAt: expires_at:
format: int64
type: integer type: integer
instancePermissions: instance_ids:
items: items:
$ref: '#/definitions/server.InstancePermission' type: integer
type: array type: array
name: name:
type: string type: string
permissionMode: permission_mode:
$ref: '#/definitions/auth.PermissionMode' $ref: '#/definitions/auth.PermissionMode'
type: object type: object
server.CreateKeyResponse: server.CreateKeyResponse:
properties: properties:
created_at: created_at:
type: integer type: integer
enabled:
type: boolean
expires_at: expires_at:
type: integer type: integer
id: id:
@@ -267,21 +267,8 @@ definitions:
user_id: user_id:
type: string type: string
type: object type: object
server.InstancePermission:
properties:
can_infer:
type: boolean
can_view_logs:
type: boolean
instance_id:
type: integer
type: object
server.KeyPermissionResponse: server.KeyPermissionResponse:
properties: properties:
can_infer:
type: boolean
can_view_logs:
type: boolean
instance_id: instance_id:
type: integer type: integer
instance_name: instance_name:
@@ -291,8 +278,6 @@ definitions:
properties: properties:
created_at: created_at:
type: integer type: integer
enabled:
type: boolean
expires_at: expires_at:
type: integer type: integer
id: id:
@@ -973,6 +958,102 @@ paths:
summary: Stop a running instance summary: Stop a running instance
tags: tags:
- Instances - Instances
/api/v1/llama-cpp/{name}/models:
get:
description: Returns a list of models available in the specified llama.cpp instance
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Models list response
schema:
additionalProperties: true
type: object
"400":
description: Invalid instance
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List models in a llama.cpp instance
tags:
- Llama.cpp
/api/v1/llama-cpp/{name}/models/{model}/load:
post:
description: Loads the specified model in the given llama.cpp instance
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Model Name
in: path
name: model
required: true
type: string
responses:
"200":
description: Success message
schema:
additionalProperties:
type: string
type: object
"400":
description: Invalid request
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Load a model in a llama.cpp instance
tags:
- Llama.cpp
/api/v1/llama-cpp/{name}/models/{model}/unload:
post:
description: Unloads the specified model in the given llama.cpp instance
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Model Name
in: path
name: model
required: true
type: string
responses:
"200":
description: Success message
schema:
additionalProperties:
type: string
type: object
"400":
description: Invalid request
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Unload a model in a llama.cpp instance
tags:
- Llama.cpp
/api/v1/nodes: /api/v1/nodes:
get: get:
description: Returns a map of all nodes configured in the server (node name description: Returns a map of all nodes configured in the server (node name

View File

@@ -115,15 +115,15 @@ vllm serve microsoft/DialoGPT-medium --port 8081
require_inference_auth: false require_inference_auth: false
``` ```
2. **Configure API keys:** 2. **Configure management API keys:**
```yaml ```yaml
auth: auth:
management_keys: management_keys:
- "your-management-key" - "your-management-key"
inference_keys:
- "your-inference-key"
``` ```
For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key.
3. **Use correct Authorization header:** 3. **Use correct Authorization header:**
```bash ```bash
curl -H "Authorization: Bearer your-api-key" \ curl -H "Authorization: Bearer your-api-key" \

6
go.mod
View File

@@ -3,13 +3,14 @@ module llamactl
go 1.24.5 go 1.24.5
require ( require (
github.com/DeRuina/timberjack v1.3.9
github.com/go-chi/chi/v5 v5.2.2 github.com/go-chi/chi/v5 v5.2.2
github.com/go-chi/cors v1.2.2 github.com/go-chi/cors v1.2.2
github.com/golang-migrate/migrate/v4 v4.19.1 github.com/golang-migrate/migrate/v4 v4.19.1
github.com/mattn/go-sqlite3 v1.14.24 github.com/mattn/go-sqlite3 v1.14.24
github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.5 github.com/swaggo/swag v1.16.5
golang.org/x/crypto v0.45.0 golang.org/x/crypto v0.46.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
) )
@@ -20,11 +21,12 @@ require (
github.com/go-openapi/spec v0.21.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/swag v0.23.1 // indirect github.com/go-openapi/swag v0.23.1 // indirect
github.com/josharian/intern v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/mailru/easyjson v0.9.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect
github.com/swaggo/files v1.0.1 // indirect github.com/swaggo/files v1.0.1 // indirect
golang.org/x/mod v0.29.0 // indirect golang.org/x/mod v0.29.0 // indirect
golang.org/x/net v0.47.0 // indirect golang.org/x/net v0.47.0 // indirect
golang.org/x/sync v0.18.0 // indirect golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect golang.org/x/sys v0.39.0 // indirect
golang.org/x/tools v0.38.0 // indirect golang.org/x/tools v0.38.0 // indirect
) )

14
go.sum
View File

@@ -1,7 +1,11 @@
github.com/DeRuina/timberjack v1.3.9 h1:6UXZ1I7ExPGTX/1UNYawR58LlOJUHKBPiYC7WQ91eBo=
github.com/DeRuina/timberjack v1.3.9/go.mod h1:RLoeQrwrCGIEF8gO5nV5b/gMD0QIy7bzQhBUgpp1EqE=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618= github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618=
github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops= github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE= github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE=
@@ -20,6 +24,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -45,8 +51,8 @@ github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
@@ -66,8 +72,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=

View File

@@ -14,6 +14,7 @@ const (
BackendTypeLlamaCpp BackendType = "llama_cpp" BackendTypeLlamaCpp BackendType = "llama_cpp"
BackendTypeMlxLm BackendType = "mlx_lm" BackendTypeMlxLm BackendType = "mlx_lm"
BackendTypeVllm BackendType = "vllm" BackendTypeVllm BackendType = "vllm"
BackendTypeUnknown BackendType = "unknown"
) )
type backend interface { type backend interface {
@@ -55,13 +56,15 @@ func (o *Options) UnmarshalJSON(data []byte) error {
} }
// Create backend from constructor map // Create backend from constructor map
if o.BackendOptions != nil { constructor, exists := backendConstructors[o.BackendType]
constructor, exists := backendConstructors[o.BackendType] if !exists {
if !exists { return fmt.Errorf("unsupported backend type: %s", o.BackendType)
return fmt.Errorf("unsupported backend type: %s", o.BackendType) }
}
backend := constructor() backend := constructor()
// If backend_options is provided, unmarshal into the backend
if o.BackendOptions != nil {
optionsData, err := json.Marshal(o.BackendOptions) optionsData, err := json.Marshal(o.BackendOptions)
if err != nil { if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err) return fmt.Errorf("failed to marshal backend options: %w", err)
@@ -70,10 +73,11 @@ func (o *Options) UnmarshalJSON(data []byte) error {
if err := json.Unmarshal(optionsData, backend); err != nil { if err := json.Unmarshal(optionsData, backend); err != nil {
return fmt.Errorf("failed to unmarshal backend options: %w", err) return fmt.Errorf("failed to unmarshal backend options: %w", err)
} }
// Store in the appropriate typed field for backward compatibility
o.setBackendOptions(backend)
} }
// If backend_options is nil or empty, backend remains as empty struct (for router mode)
// Store in the appropriate typed field
o.setBackendOptions(backend)
return nil return nil
} }

View File

@@ -327,20 +327,30 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
} }
func (o *LlamaServerOptions) GetPort() int { func (o *LlamaServerOptions) GetPort() int {
if o == nil {
return 0
}
return o.Port return o.Port
} }
func (o *LlamaServerOptions) SetPort(port int) { func (o *LlamaServerOptions) SetPort(port int) {
if o == nil {
return
}
o.Port = port o.Port = port
} }
func (o *LlamaServerOptions) GetHost() string { func (o *LlamaServerOptions) GetHost() string {
if o == nil {
return "localhost"
}
return o.Host return o.Host
} }
func (o *LlamaServerOptions) Validate() error { func (o *LlamaServerOptions) Validate() error {
// Allow nil options for router mode where llama.cpp manages models dynamically
if o == nil { if o == nil {
return validation.ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend")) return nil
} }
// Use reflection to check all string fields for injection patterns // Use reflection to check all string fields for injection patterns
@@ -370,6 +380,9 @@ func (o *LlamaServerOptions) Validate() error {
// BuildCommandArgs converts InstanceOptions to command line arguments // BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string { func (o *LlamaServerOptions) BuildCommandArgs() []string {
if o == nil {
return []string{}
}
// Llama uses multiple flags for arrays by default (not comma-separated) // Llama uses multiple flags for arrays by default (not comma-separated)
// Use package-level llamaMultiValuedFlags variable // Use package-level llamaMultiValuedFlags variable
args := BuildCommandArgs(o, llamaMultiValuedFlags) args := BuildCommandArgs(o, llamaMultiValuedFlags)
@@ -381,6 +394,9 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
} }
func (o *LlamaServerOptions) BuildDockerArgs() []string { func (o *LlamaServerOptions) BuildDockerArgs() []string {
if o == nil {
return []string{}
}
// For llama, Docker args are the same as normal args // For llama, Docker args are the same as normal args
return o.BuildCommandArgs() return o.BuildCommandArgs()
} }

View File

@@ -6,233 +6,18 @@ import (
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"strconv"
"strings"
"time"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command" json:"command"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty" json:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled" json:"enabled"`
Image string `yaml:"image" json:"image"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm" json:"vllm"`
MLX BackendSettings `yaml:"mlx" json:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server" json:"server"`
Backends BackendConfig `yaml:"backends" json:"backends"`
Instances InstancesConfig `yaml:"instances" json:"instances"`
Database DatabaseConfig `yaml:"database" json:"database"`
Auth AuthConfig `yaml:"auth" json:"auth"`
LocalNode string `yaml:"local_node,omitempty" json:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
// Directory where all llamactl data will be stored (database, instances, logs, etc.)
DataDir string `yaml:"data_dir" json:"data_dir"`
Version string `yaml:"-" json:"version"`
CommitHash string `yaml:"-" json:"commit_hash"`
BuildTime string `yaml:"-" json:"build_time"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host" json:"host"`
// Server port to bind to
Port int `yaml:"port" json:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DatabaseConfig contains database configuration settings
type DatabaseConfig struct {
// Database file path (relative to the top-level data_dir or absolute)
Path string `yaml:"path" json:"path"`
// Connection settings
MaxOpenConnections int `yaml:"max_open_connections" json:"max_open_connections"`
MaxIdleConnections int `yaml:"max_idle_connections" json:"max_idle_connections"`
ConnMaxLifetime time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range" json:"port_range"`
// Instance config directory override (relative to data_dir if not absolute)
InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
// Logs directory override (relative to data_dir if not absolute)
LogsDir string `yaml:"logs_dir" json:"logs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances" json:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
}
type NodeConfig struct {
Address string `yaml:"address" json:"address"`
APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
}
// LoadConfig loads configuration with the following precedence: // LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults // 1. Hardcoded defaults
// 2. Config file // 2. Config file
// 3. Environment variables // 3. Environment variables
func LoadConfig(configPath string) (AppConfig, error) { func LoadConfig(configPath string) (AppConfig, error) {
// 1. Start with defaults // 1. Start with defaults
defaultDataDir := getDefaultDataDirectory() defaultDataDir := getDefaultDataDir()
cfg := getDefaultConfig(defaultDataDir)
cfg := AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false,
},
LocalNode: "main",
Nodes: map[string]NodeConfig{},
DataDir: defaultDataDir,
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(defaultDataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(defaultDataDir, "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
// should be relative path to DataDir if not explicitly set.
InstancesDir: "",
LogsDir: "",
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
},
Database: DatabaseConfig{
Path: "", // Will be set to data_dir/llamactl.db if empty
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
// 2. Load from config file // 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil { if err := loadConfigFile(&cfg, configPath); err != nil {
@@ -247,19 +32,7 @@ func LoadConfig(configPath string) (AppConfig, error) {
// 3. Override with environment variables // 3. Override with environment variables
loadEnvVars(&cfg) loadEnvVars(&cfg)
// Log warning if deprecated inference keys are present
if len(cfg.Auth.InferenceKeys) > 0 {
log.Println("⚠️ Config-based inference keys are no longer supported and will be ignored.")
log.Println(" Please create inference keys in web UI or via management API.")
}
// Set default directories if not specified // Set default directories if not specified
if cfg.Instances.InstancesDir == "" {
cfg.Instances.InstancesDir = filepath.Join(cfg.DataDir, "instances")
} else {
// Log deprecation warning if using custom instances dir
log.Println("⚠️ Instances directory is deprecated and will be removed in future versions. Instances are persisted in the database.")
}
if cfg.Instances.LogsDir == "" { if cfg.Instances.LogsDir == "" {
cfg.Instances.LogsDir = filepath.Join(cfg.DataDir, "logs") cfg.Instances.LogsDir = filepath.Join(cfg.DataDir, "logs")
} }
@@ -300,372 +73,6 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
return nil return nil
} }
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
// LlamaCpp backend
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
}
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
// Database config
if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
cfg.Database.Path = dbPath
}
if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
if m, err := strconv.Atoi(maxOpenConns); err == nil {
cfg.Database.MaxOpenConnections = m
}
}
if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
if m, err := strconv.Atoi(maxIdleConns); err == nil {
cfg.Database.MaxIdleConnections = m
}
}
if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
if d, err := time.ParseDuration(connMaxLifetime); err == nil {
cfg.Database.ConnMaxLifetime = d
}
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
default:
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
locations = append(locations, "/etc/llamactl/config.yaml")
}
return locations
}
// SanitizedCopy returns a copy of the AppConfig with sensitive information removed // SanitizedCopy returns a copy of the AppConfig with sensitive information removed
func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) { func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
// Deep copy via JSON marshal/unmarshal to avoid concurrent map access // Deep copy via JSON marshal/unmarshal to avoid concurrent map access
@@ -682,7 +89,6 @@ func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
} }
// Clear sensitive information // Clear sensitive information
sanitized.Auth.InferenceKeys = []string{}
sanitized.Auth.ManagementKeys = []string{} sanitized.Auth.ManagementKeys = []string{}
// Clear API keys from nodes // Clear API keys from nodes

View File

@@ -41,9 +41,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
t.Fatalf("Failed to get user home directory: %v", err) t.Fatalf("Failed to get user home directory: %v", err)
} }
if cfg.Instances.InstancesDir != filepath.Join(homedir, ".local", "share", "llamactl", "instances") {
t.Errorf("Expected default instances directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "instances"), cfg.Instances.InstancesDir)
}
if cfg.Instances.LogsDir != filepath.Join(homedir, ".local", "share", "llamactl", "logs") { if cfg.Instances.LogsDir != filepath.Join(homedir, ".local", "share", "llamactl", "logs") {
t.Errorf("Expected default logs directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "logs"), cfg.Instances.LogsDir) t.Errorf("Expected default logs directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "logs"), cfg.Instances.LogsDir)
} }
@@ -78,8 +75,8 @@ server:
port: 9090 port: 9090
instances: instances:
port_range: [7000, 8000] port_range: [7000, 8000]
logs_dir: "/custom/logs"
max_instances: 5 max_instances: 5
logs_dir: "/custom/logs"
llama_executable: "/usr/bin/llama-server" llama_executable: "/usr/bin/llama-server"
default_auto_restart: false default_auto_restart: false
default_max_restarts: 10 default_max_restarts: 10
@@ -219,7 +216,6 @@ instances:
} }
} }
func TestParsePortRange(t *testing.T) { func TestParsePortRange(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
@@ -248,7 +244,6 @@ func TestParsePortRange(t *testing.T) {
} }
} }
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) { func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
bc := &config.BackendConfig{ bc := &config.BackendConfig{
LlamaCpp: config.BackendSettings{ LlamaCpp: config.BackendSettings{
@@ -305,7 +300,6 @@ func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
} }
} }
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) { func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
// Test that backend environment variables work correctly // Test that backend environment variables work correctly
envVars := map[string]string{ envVars := map[string]string{
@@ -375,7 +369,6 @@ func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
} }
} }
func TestLoadConfig_LocalNode(t *testing.T) { func TestLoadConfig_LocalNode(t *testing.T) {
t.Run("default local node", func(t *testing.T) { t.Run("default local node", func(t *testing.T) {
cfg, err := config.LoadConfig("nonexistent-file.yaml") cfg, err := config.LoadConfig("nonexistent-file.yaml")

150
pkg/config/defaults.go Normal file
View File

@@ -0,0 +1,150 @@
package config
import (
"os"
"path/filepath"
"runtime"
"time"
)
func getDefaultConfig(dataDir string) AppConfig {
return AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
AllowedHeaders: []string{"*"}, // Default to allow all headers
EnableSwagger: false,
},
LocalNode: "main",
Nodes: map[string]NodeConfig{},
DataDir: dataDir,
Backends: BackendConfig{
LlamaCpp: BackendSettings{
Command: "llama-server",
Args: []string{},
Environment: map[string]string{},
Docker: &DockerSettings{
Enabled: false,
Image: "ghcr.io/ggml-org/llama.cpp:server",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all",
"-v", filepath.Join(dataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
Environment: map[string]string{},
},
},
VLLM: BackendSettings{
Command: "vllm",
Args: []string{"serve"},
Docker: &DockerSettings{
Enabled: false,
Image: "vllm/vllm-openai:latest",
Args: []string{
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
"-v", filepath.Join(dataDir, "huggingface") + ":/root/.cache/huggingface",
},
Environment: map[string]string{},
},
},
MLX: BackendSettings{
Command: "mlx_lm.server",
Args: []string{},
// No Docker section for MLX - not supported
},
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
LogsDir: "", // Will be set to data_dir/logs if empty
LogRotationEnabled: true,
LogRotationMaxSize: 100,
LogRotationCompress: false,
},
Database: DatabaseConfig{
Path: "", // Will be set to data_dir/llamactl.db if empty
MaxOpenConnections: 25,
MaxIdleConnections: 5,
ConnMaxLifetime: 5 * time.Minute,
},
Auth: AuthConfig{
RequireInferenceAuth: true,
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
}
// getDefaultDataDir returns platform-specific default data directory
func getDefaultDataDir() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Use ./llamactl.yaml and ./config.yaml as the default config file
locations = append(locations, "llamactl.yaml")
locations = append(locations, "config.yaml")
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
default:
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
locations = append(locations, "/etc/llamactl/config.yaml")
}
return locations
}

319
pkg/config/env.go Normal file
View File

@@ -0,0 +1,319 @@
package config
import (
"os"
"strconv"
"strings"
"time"
)
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.DataDir = dataDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
// Backend config
// LlamaCpp backend
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
cfg.Backends.LlamaCpp.Command = llamaCmd
}
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.Environment == nil {
cfg.Backends.LlamaCpp.Environment = make(map[string]string)
}
parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
}
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Enabled = b
}
}
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
}
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
}
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
if cfg.Backends.LlamaCpp.Docker == nil {
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
}
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
}
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
cfg.Backends.VLLM.Command = vllmCmd
}
if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
}
if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
if cfg.Backends.VLLM.Environment == nil {
cfg.Backends.VLLM.Environment = make(map[string]string)
}
parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
}
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Enabled = b
}
}
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
}
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
}
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
if cfg.Backends.VLLM.Docker == nil {
cfg.Backends.VLLM.Docker = &DockerSettings{}
}
if cfg.Backends.VLLM.Docker.Environment == nil {
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
}
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
cfg.Backends.MLX.Command = mlxCmd
}
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
}
if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
if cfg.Backends.MLX.Environment == nil {
cfg.Backends.MLX.Environment = make(map[string]string)
}
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
}
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
// Local node config
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
cfg.LocalNode = localNode
}
// Database config
if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
cfg.Database.Path = dbPath
}
if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
if m, err := strconv.Atoi(maxOpenConns); err == nil {
cfg.Database.MaxOpenConnections = m
}
}
if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
if m, err := strconv.Atoi(maxIdleConns); err == nil {
cfg.Database.MaxIdleConnections = m
}
}
if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
if d, err := time.ParseDuration(connMaxLifetime); err == nil {
cfg.Database.ConnMaxLifetime = d
}
}
// Log rotation config
if logRotationEnabled := os.Getenv("LLAMACTL_LOG_ROTATION_ENABLED"); logRotationEnabled != "" {
if b, err := strconv.ParseBool(logRotationEnabled); err == nil {
cfg.Instances.LogRotationEnabled = b
}
}
if logRotationMaxSize := os.Getenv("LLAMACTL_LOG_ROTATION_MAX_SIZE"); logRotationMaxSize != "" {
if m, err := strconv.Atoi(logRotationMaxSize); err == nil {
cfg.Instances.LogRotationMaxSize = m
}
}
if logRotationCompress := os.Getenv("LLAMACTL_LOG_ROTATION_COMPRESS"); logRotationCompress != "" {
if b, err := strconv.ParseBool(logRotationCompress); err == nil {
cfg.Instances.LogRotationCompress = b
}
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
// and populates the provided environment map
func parseEnvVars(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ",") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}

143
pkg/config/types.go Normal file
View File

@@ -0,0 +1,143 @@
package config
import "time"
// BackendSettings contains structured backend configuration
type BackendSettings struct {
Command string `yaml:"command" json:"command"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty" json:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DockerSettings contains Docker-specific configuration
type DockerSettings struct {
Enabled bool `yaml:"enabled" json:"enabled"`
Image string `yaml:"image" json:"image"`
Args []string `yaml:"args" json:"args"`
Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
}
// BackendConfig contains backend executable configurations
type BackendConfig struct {
LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
VLLM BackendSettings `yaml:"vllm" json:"vllm"`
MLX BackendSettings `yaml:"mlx" json:"mlx"`
}
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server" json:"server"`
Backends BackendConfig `yaml:"backends" json:"backends"`
Instances InstancesConfig `yaml:"instances" json:"instances"`
Database DatabaseConfig `yaml:"database" json:"database"`
Auth AuthConfig `yaml:"auth" json:"auth"`
LocalNode string `yaml:"local_node,omitempty" json:"local_node,omitempty"`
Nodes map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
// Directory where all llamactl data will be stored (database, instances, logs, etc.)
DataDir string `yaml:"data_dir" json:"data_dir"`
Version string `yaml:"-" json:"version"`
CommitHash string `yaml:"-" json:"commit_hash"`
BuildTime string `yaml:"-" json:"build_time"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host" json:"host"`
// Server port to bind to
Port int `yaml:"port" json:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
}
// DatabaseConfig contains database configuration settings
type DatabaseConfig struct {
// Database file path (relative to the top-level data_dir or absolute)
Path string `yaml:"path" json:"path"`
// Connection settings
MaxOpenConnections int `yaml:"max_open_connections" json:"max_open_connections"`
MaxIdleConnections int `yaml:"max_idle_connections" json:"max_idle_connections"`
ConnMaxLifetime time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range" json:"port_range"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances" json:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
// Logs directory override (relative to data_dir if not absolute)
LogsDir string `yaml:"logs_dir" json:"logs_dir"`
// Log rotation enabled
LogRotationEnabled bool `yaml:"log_rotation_enabled" default:"true"`
// Maximum log file size in MB before rotation
LogRotationMaxSize int `yaml:"log_rotation_max_size" default:"100"`
// Whether to compress rotated log files
LogRotationCompress bool `yaml:"log_rotation_compress" default:"false"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
}
type NodeConfig struct {
Address string `yaml:"address" json:"address"`
APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
}

View File

@@ -3,10 +3,12 @@ package instance
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"llamactl/pkg/config"
"log" "log"
"net/http" "net/http"
"time" "time"
"llamactl/pkg/backends"
"llamactl/pkg/config"
) )
// Instance represents a running instance of llama server // Instance represents a running instance of llama server
@@ -68,7 +70,16 @@ func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusCha
// Only create logger, proxy, and process for local instances // Only create logger, proxy, and process for local instances
if !instance.IsRemote() { if !instance.IsRemote() {
instance.logger = newLogger(name, globalInstanceSettings.LogsDir) logRotationConfig := &LogRotationConfig{
Enabled: globalInstanceSettings.LogRotationEnabled,
MaxSize: globalInstanceSettings.LogRotationMaxSize,
Compress: globalInstanceSettings.LogRotationCompress,
}
instance.logger = newLogger(
name,
globalInstanceSettings.LogsDir,
logRotationConfig,
)
instance.process = newProcess(instance) instance.process = newProcess(instance)
} }
@@ -107,6 +118,14 @@ func (i *Instance) WaitForHealthy(timeout int) error {
return i.process.waitForHealthy(timeout) return i.process.waitForHealthy(timeout)
} }
func (i *Instance) GetBackendType() backends.BackendType {
opts := i.GetOptions()
if opts == nil {
return backends.BackendTypeUnknown
}
return opts.BackendOptions.BackendType
}
// GetOptions returns the current options // GetOptions returns the current options
func (i *Instance) GetOptions() *Options { func (i *Instance) GetOptions() *Options {
if i.options == nil { if i.options == nil {

View File

@@ -27,8 +27,8 @@ func TestNewInstance(t *testing.T) {
}, },
}, },
Instances: config.InstancesConfig{ Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true, DefaultAutoRestart: true,
LogsDir: "/tmp/test",
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
}, },
@@ -120,8 +120,8 @@ func TestSetOptions(t *testing.T) {
}, },
}, },
Instances: config.InstancesConfig{ Instances: config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true, DefaultAutoRestart: true,
LogsDir: "/tmp/test",
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
}, },

View File

@@ -7,66 +7,117 @@ import (
"os" "os"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
timber "github.com/DeRuina/timberjack"
) )
// LogRotationConfig contains log rotation settings for instances
type LogRotationConfig struct {
Enabled bool
MaxSize int
Compress bool
}
type logger struct { type logger struct {
name string name string
logDir string logDir string
logFile atomic.Pointer[os.File] logFile *timber.Logger
logFilePath string logFilePath string
mu sync.RWMutex mu sync.RWMutex
cfg *LogRotationConfig
} }
func newLogger(name string, logDir string) *logger { func newLogger(name, logDir string, cfg *LogRotationConfig) *logger {
return &logger{ return &logger{
name: name, name: name,
logDir: logDir, logDir: logDir,
cfg: cfg,
} }
} }
// create creates and opens the log files for stdout and stderr func (l *logger) create() error {
func (i *logger) create() error { l.mu.Lock()
i.mu.Lock() defer l.mu.Unlock()
defer i.mu.Unlock()
if i.logDir == "" { if l.logDir == "" {
return fmt.Errorf("logDir is empty for instance %s", i.name) return fmt.Errorf("logDir empty for instance %s", l.name)
} }
// Set up instance logs if err := os.MkdirAll(l.logDir, 0755); err != nil {
logPath := i.logDir + "/" + i.name + ".log"
i.logFilePath = logPath
if err := os.MkdirAll(i.logDir, 0755); err != nil {
return fmt.Errorf("failed to create log directory: %w", err) return fmt.Errorf("failed to create log directory: %w", err)
} }
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) logPath := fmt.Sprintf("%s/%s.log", l.logDir, l.name)
if err != nil { l.logFilePath = logPath
return fmt.Errorf("failed to create stdout log file: %w", err)
// Build the timber logger
t := &timber.Logger{
Filename: logPath,
MaxSize: l.cfg.MaxSize,
MaxBackups: 0, // No limit on backups
// Compression: "gzip" if Compress is true, else "none"
Compression: func() string {
if l.cfg.Compress {
return "gzip"
}
return "none"
}(),
FileMode: 0644,
LocalTime: true,
} }
i.logFile.Store(logFile) // If rotation is disabled, set MaxSize to 0 so no rotation occurs
if !l.cfg.Enabled {
t.MaxSize = 0
}
// Write a startup marker to both files l.logFile = t
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp) // Write a startup marker
ts := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(t, "\n=== Instance %s started at %s ===\n", l.name, ts)
return nil return nil
} }
// getLogs retrieves the last n lines of logs from the instance func (l *logger) readOutput(rc io.ReadCloser) {
func (i *logger) getLogs(num_lines int) (string, error) { defer rc.Close()
i.mu.RLock() scanner := bufio.NewScanner(rc)
defer i.mu.RUnlock() for scanner.Scan() {
line := scanner.Text()
if lg := l.logFile; lg != nil {
fmt.Fprintln(lg, line)
}
}
}
if i.logFilePath == "" { func (l *logger) close() {
return "", fmt.Errorf("log file not created for instance %s", i.name) l.mu.Lock()
defer l.mu.Unlock()
lg := l.logFile
if lg == nil {
return
} }
file, err := os.Open(i.logFilePath) ts := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(lg, "=== Instance %s stopped at %s ===\n\n", l.name, ts)
_ = lg.Close()
l.logFile = nil
}
// getLogs retrieves the last n lines of logs from the instance
func (l *logger) getLogs(num_lines int) (string, error) {
l.mu.RLock()
defer l.mu.RUnlock()
if l.logFilePath == "" {
return "", fmt.Errorf("log file not created for instance %s", l.name)
}
file, err := os.Open(l.logFilePath)
if err != nil { if err != nil {
return "", fmt.Errorf("failed to open log file: %w", err) return "", fmt.Errorf("failed to open log file: %w", err)
} }
@@ -97,31 +148,3 @@ func (i *logger) getLogs(num_lines int) (string, error) {
return strings.Join(lines[start:], "\n"), nil return strings.Join(lines[start:], "\n"), nil
} }
// close closes the log files
func (i *logger) close() {
i.mu.Lock()
defer i.mu.Unlock()
logFile := i.logFile.Swap(nil)
if logFile != nil {
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
logFile.Sync() // Ensure all buffered data is written to disk
logFile.Close()
}
}
// readOutput reads from the given reader and writes lines to the log file
func (i *logger) readOutput(reader io.ReadCloser) {
defer reader.Close()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
// Use atomic load to avoid lock contention on every line
if logFile := i.logFile.Load(); logFile != nil {
fmt.Fprintln(logFile, line)
}
}
}

View File

@@ -19,7 +19,7 @@ type InstanceManager interface {
UpdateInstance(name string, options *instance.Options) (*instance.Instance, error) UpdateInstance(name string, options *instance.Options) (*instance.Instance, error)
DeleteInstance(name string) error DeleteInstance(name string) error
StartInstance(name string) (*instance.Instance, error) StartInstance(name string) (*instance.Instance, error)
IsMaxRunningInstancesReached() bool AtMaxRunning() bool
StopInstance(name string) (*instance.Instance, error) StopInstance(name string) (*instance.Instance, error)
EvictLRUInstance() error EvictLRUInstance() error
RestartInstance(name string) (*instance.Instance, error) RestartInstance(name string) (*instance.Instance, error)

View File

@@ -202,12 +202,11 @@ func createTestAppConfig(instancesDir string) *config.AppConfig {
}, },
Instances: config.InstancesConfig{ Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
InstancesDir: instancesDir,
LogsDir: instancesDir,
MaxInstances: 10, MaxInstances: 10,
MaxRunningInstances: 10, MaxRunningInstances: 10,
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
LogsDir: instancesDir,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
}, },

View File

@@ -383,7 +383,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Instance, error
} }
// Check max running instances limit for local instances only // Check max running instances limit for local instances only
if im.IsMaxRunningInstancesReached() { if im.AtMaxRunning() {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances)) return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances))
} }
@@ -399,7 +399,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Instance, error
return inst, nil return inst, nil
} }
func (im *instanceManager) IsMaxRunningInstancesReached() bool { func (im *instanceManager) AtMaxRunning() bool {
if im.globalConfig.Instances.MaxRunningInstances == -1 { if im.globalConfig.Instances.MaxRunningInstances == -1 {
return false return false
} }

View File

@@ -38,7 +38,6 @@ func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
} }
func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) { func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
tempDir := t.TempDir()
appConfig := &config.AppConfig{ appConfig := &config.AppConfig{
Backends: config.BackendConfig{ Backends: config.BackendConfig{
LlamaCpp: config.BackendSettings{ LlamaCpp: config.BackendSettings{
@@ -47,7 +46,6 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
}, },
Instances: config.InstancesConfig{ Instances: config.InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 1, // Very low limit for testing MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
}, },

View File

@@ -96,7 +96,7 @@ func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
return fmt.Errorf("instance is not running and on-demand start is not enabled") return fmt.Errorf("instance is not running and on-demand start is not enabled")
} }
if h.InstanceManager.IsMaxRunningInstancesReached() { if h.InstanceManager.AtMaxRunning() {
if h.cfg.Instances.EnableLRUEviction { if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance() err := h.InstanceManager.EvictLRUInstance()
if err != nil { if err != nil {

View File

@@ -306,3 +306,158 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc { func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return h.executeLlamaServerCommand("--list-devices", "Failed to list devices") return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
} }
// LlamaCppListModels godoc
// @Summary List models in a llama.cpp instance
// @Description Returns a list of models available in the specified llama.cpp instance
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} map[string]any "Models list response"
// @Failure 400 {string} string "Invalid instance"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/llama-cpp/{name}/models [get]
func (h *Handler) LlamaCppListModels() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
return
}
}
// Modify request path to /models for proxying
r.URL.Path = "/models"
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
// LlamaCppLoadModel godoc
// @Summary Load a model in a llama.cpp instance
// @Description Loads the specified model in the given llama.cpp instance
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Param model path string true "Model Name"
// @Success 200 {object} map[string]string "Success message"
// @Failure 400 {string} string "Invalid request"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/llama-cpp/{name}/models/{model}/load [post]
func (h *Handler) LlamaCppLoadModel() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
return
}
}
// Modify request path to /models/load for proxying
r.URL.Path = "/models/load"
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}
// LlamaCppUnloadModel godoc
// @Summary Unload a model in a llama.cpp instance
// @Description Unloads the specified model in the given llama.cpp instance
// @Tags Llama.cpp
// @Security ApiKeyAuth
// @Produces json
// @Param name path string true "Instance Name"
// @Param model path string true "Model Name"
// @Success 200 {object} map[string]string "Success message"
// @Failure 400 {string} string "Invalid request"
// @Failure 500 {string} string "Internal Server Error"
// @Router /api/v1/llama-cpp/{name}/models/{model}/unload [post]
func (h *Handler) LlamaCppUnloadModel() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
inst, err := h.validateLlamaCppInstance(r)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
return
}
// Check instance permissions
if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
writeError(w, http.StatusForbidden, "permission_denied", err.Error())
return
}
// Check if instance is shutting down before autostart logic
if inst.GetStatus() == instance.ShuttingDown {
writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
return
}
if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst)
if err != nil {
writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
return
}
}
// Modify request path to /models/unload for proxying
r.URL.Path = "/models/unload"
// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
err = inst.ServeHTTP(w, r)
if err != nil {
// Error is already handled in ServeHTTP (response written)
return
}
}
}

View File

@@ -3,10 +3,13 @@ package server
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"llamactl/pkg/backends"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/validation" "llamactl/pkg/validation"
"net/http" "net/http"
"strings"
) )
// OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format // OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format
@@ -23,6 +26,53 @@ type OpenAIInstance struct {
OwnedBy string `json:"owned_by"` OwnedBy string `json:"owned_by"`
} }
// LlamaCppModel represents a model available in a llama.cpp instance
type LlamaCppModel struct {
ID string `json:"id"`
Object string `json:"object"`
OwnedBy string `json:"owned_by"`
Created int64 `json:"created"`
InCache bool `json:"in_cache"`
Path string `json:"path"`
Status LlamaCppModelStatus `json:"status"`
}
// LlamaCppModelStatus represents the status of a model in a llama.cpp instance
type LlamaCppModelStatus struct {
Value string `json:"value"` // "loaded" | "loading" | "unloaded"
Args []string `json:"args"`
}
// fetchLlamaCppModels fetches models from a llama.cpp instance using the proxy
func fetchLlamaCppModels(inst *instance.Instance) ([]LlamaCppModel, error) {
// Create a request to the instance's /models endpoint
req, err := http.NewRequest("GET", fmt.Sprintf("http://%s:%d/models", inst.GetHost(), inst.GetPort()), nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
// Use a custom response writer to capture the response
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("status %d: %s", resp.StatusCode, string(bodyBytes))
}
var result struct {
Data []LlamaCppModel `json:"data"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("failed to decode response: %w", err)
}
return result.Data, nil
}
// OpenAIListInstances godoc // OpenAIListInstances godoc
// @Summary List instances in OpenAI-compatible format // @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API // @Description Returns a list of instances in a format compatible with OpenAI API
@@ -40,14 +90,41 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
return return
} }
openaiInstances := make([]OpenAIInstance, len(instances)) var openaiInstances []OpenAIInstance
for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{ // For each llama.cpp instance, try to fetch models and add them as separate entries
for _, inst := range instances {
if inst.GetBackendType() == backends.BackendTypeLlamaCpp && inst.IsRunning() {
// Try to fetch models from the instance
models, err := fetchLlamaCppModels(inst)
if err != nil {
fmt.Printf("Failed to fetch models from instance %s: %v", inst.Name, err)
continue
}
for _, model := range models {
openaiInstances = append(openaiInstances, OpenAIInstance{
ID: inst.Name + "/" + model.ID,
Object: "model",
Created: inst.Created,
OwnedBy: inst.Name,
})
}
if len(models) > 1 {
// Skip adding the instance name if multiple models are present
continue
}
}
// Add instance name as single entry (for non-llama.cpp or if model fetch failed)
openaiInstances = append(openaiInstances, OpenAIInstance{
ID: inst.Name, ID: inst.Name,
Object: "model", Object: "model",
Created: inst.Created, Created: inst.Created,
OwnedBy: "llamactl", OwnedBy: "llamactl",
} })
} }
openaiResponse := OpenAIListInstancesResponse{ openaiResponse := OpenAIListInstancesResponse{
@@ -87,14 +164,28 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return return
} }
modelName, ok := requestBody["model"].(string) reqModelName, ok := requestBody["model"].(string)
if !ok || modelName == "" { if !ok || reqModelName == "" {
writeError(w, http.StatusBadRequest, "invalid_request", "Instance name is required") writeError(w, http.StatusBadRequest, "invalid_request", "Model name is required")
return return
} }
// Parse instance name and model name from <instance_name>/<model_name> format
var instanceName string
var modelName string
// Check if model name contains "/"
if idx := strings.Index(reqModelName, "/"); idx != -1 {
// Split into instance and model parts
instanceName = reqModelName[:idx]
modelName = reqModelName[idx+1:]
} else {
instanceName = reqModelName
modelName = reqModelName
}
// Validate instance name at the entry point // Validate instance name at the entry point
validatedName, err := validation.ValidateInstanceName(modelName) validatedName, err := validation.ValidateInstanceName(instanceName)
if err != nil { if err != nil {
writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error()) writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
return return
@@ -119,6 +210,11 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return return
} }
if inst.IsRemote() {
// Don't replace model name for remote instances
modelName = reqModelName
}
if !inst.IsRemote() && !inst.IsRunning() { if !inst.IsRemote() && !inst.IsRunning() {
err := h.ensureInstanceRunning(inst) err := h.ensureInstanceRunning(inst)
if err != nil { if err != nil {
@@ -127,6 +223,16 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
} }
} }
// Update the request body with just the model name
requestBody["model"] = modelName
// Re-marshal the updated body
bodyBytes, err = json.Marshal(requestBody)
if err != nil {
writeError(w, http.StatusInternalServerError, "marshal_error", "Failed to update request body")
return
}
// Recreate the request body from the bytes we read // Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes)) r.ContentLength = int64(len(bodyBytes))

View File

@@ -275,16 +275,3 @@ func TestAutoGenerationScenarios(t *testing.T) {
}) })
} }
} }
func TestConfigBasedInferenceKeysDeprecationWarning(t *testing.T) {
// Test that config-based inference keys trigger a warning (captured in logs)
cfg := config.AuthConfig{
InferenceKeys: []string{"sk-inference-old"},
}
// Creating middleware should log a warning, but shouldn't fail
_ = server.NewAPIAuthMiddleware(cfg, nil)
// If we get here without panic, the test passes
// The warning is logged but not returned as an error
}

View File

@@ -26,9 +26,6 @@ func SetupRouter(handler *Handler) *chi.Mux {
MaxAge: 300, MaxAge: 300,
})) }))
// Add API authentication middleware
authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth, handler.authStore)
if handler.cfg.Server.EnableSwagger { if handler.cfg.Server.EnableSwagger {
r.Get("/swagger/*", httpSwagger.Handler( r.Get("/swagger/*", httpSwagger.Handler(
httpSwagger.URL("/swagger/doc.json"), httpSwagger.URL("/swagger/doc.json"),
@@ -38,8 +35,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Define routes // Define routes
r.Route("/api/v1", func(r chi.Router) { r.Route("/api/v1", func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth { if handler.authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
r.Use(authMiddleware.ManagementAuthMiddleware()) r.Use(handler.authMiddleware.ManagementAuthMiddleware())
} }
r.Get("/version", handler.VersionHandler()) r.Get("/version", handler.VersionHandler())
@@ -73,6 +70,13 @@ func SetupRouter(handler *Handler) *chi.Mux {
}) })
}) })
// Llama.cpp instance-specific endpoints
r.Route("/llama-cpp/{name}", func(r chi.Router) {
r.Get("/models", handler.LlamaCppListModels())
r.Post("/models/{model}/load", handler.LlamaCppLoadModel())
r.Post("/models/{model}/unload", handler.LlamaCppUnloadModel())
})
// Node management endpoints // Node management endpoints
r.Route("/nodes", func(r chi.Router) { r.Route("/nodes", func(r chi.Router) {
r.Get("/", handler.ListNodes()) // List all nodes r.Get("/", handler.ListNodes()) // List all nodes
@@ -107,8 +111,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
r.Route("/v1", func(r chi.Router) { r.Route("/v1", func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth { if handler.authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.InferenceAuthMiddleware()) r.Use(handler.authMiddleware.InferenceAuthMiddleware())
} }
r.Get("/models", handler.OpenAIListInstances()) // List instances in OpenAI-compatible format r.Get("/models", handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
@@ -135,8 +139,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Private Routes // Private Routes
r.Group(func(r chi.Router) { r.Group(func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth { if handler.authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
r.Use(authMiddleware.InferenceAuthMiddleware()) r.Use(handler.authMiddleware.InferenceAuthMiddleware())
} }
// This handler auto starts the server if it's not running // This handler auto starts the server if it's not running

135
test_client.py Normal file
View File

@@ -0,0 +1,135 @@
#!/usr/bin/env python3
"""
Simple Python script to interact with local LLM server's OpenAI-compatible API
"""
import requests
import sys
# Local LLM server configuration
BASE_URL = "http://localhost:8080"
API_KEY = None
MODEL_NAME = None
def get_models():
"""Fetch available models from /v1/models endpoint"""
headers = {}
if API_KEY:
headers["Authorization"] = f"Bearer {API_KEY}"
try:
response = requests.get(f"{BASE_URL}/v1/models", headers=headers, timeout=10)
response.raise_for_status()
return response.json()["data"]
except Exception as e:
print(f"Error fetching models: {e}")
return []
def send_message(message):
"""
Send a message to local LLM server API
Args:
message (str): The message to send
Returns:
str: The AI response or error message
"""
headers = {
"Content-Type": "application/json",
}
if API_KEY:
headers["Authorization"] = f"Bearer {API_KEY}"
data = {
"model": MODEL_NAME,
"messages": [
{
"role": "user",
"content": message
}
],
"temperature": 0.7,
"max_tokens": 1000,
"stream": False,
}
response = requests.post(f"{BASE_URL}/v1/chat/completions", headers=headers, json=data, timeout=60)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
def interactive_mode():
"""Run in interactive mode for continuous conversation"""
global BASE_URL, API_KEY, MODEL_NAME
# Get base URL
url_input = input(f"Base URL [{BASE_URL}]: ").strip()
if url_input:
BASE_URL = url_input
# Get API key (optional)
key_input = input("API key (optional): ").strip()
if key_input:
API_KEY = key_input
# Fetch and select model
models = get_models()
if not models:
print("No models available. Exiting.")
return
print("\nAvailable models:")
for i, m in enumerate(models, 1):
print(f"{i}. {m['id']}")
while True:
try:
selection = int(input("\nSelect model: "))
if 1 <= selection <= len(models):
MODEL_NAME = models[selection - 1]["id"]
break
print(f"Please enter a number between 1 and {len(models)}")
except ValueError:
print("Please enter a valid number")
print(f"\nUsing model: {MODEL_NAME}")
print("Type 'quit' or 'exit' to stop")
print("-" * 40)
while True:
try:
user_input = input("\nYou: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("Goodbye!")
break
if not user_input:
continue
print("AI: ", end="", flush=True)
response = send_message(user_input)
print(response)
except KeyboardInterrupt:
print("\nGoodbye!")
break
except EOFError:
print("\nGoodbye!")
break
def main():
"""Main function"""
if len(sys.argv) > 1:
# Single message mode
message = " ".join(sys.argv[1:])
response = send_message(message)
print(response)
else:
# Interactive mode
interactive_mode()
if __name__ == "__main__":
main()

681
webui/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -18,30 +18,29 @@
"lint:fix": "eslint . --ext .ts,.tsx --fix" "lint:fix": "eslint . --ext .ts,.tsx --fix"
}, },
"dependencies": { "dependencies": {
"@radix-ui/react-checkbox": "^1.3.2", "@radix-ui/react-checkbox": "^1.3.3",
"@radix-ui/react-dialog": "^1.1.14", "@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-label": "^2.1.7", "@radix-ui/react-label": "^2.1.8",
"@radix-ui/react-radio-group": "^1.3.8", "@radix-ui/react-radio-group": "^1.3.8",
"@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-slot": "^1.2.4",
"@tailwindcss/vite": "^4.1.11", "@tailwindcss/vite": "^4.1.11",
"class-variance-authority": "^0.7.1", "class-variance-authority": "^0.7.1",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"date-fns": "^4.1.0", "date-fns": "^4.1.0",
"lucide-react": "^0.555.0", "lucide-react": "^0.562.0",
"react": "^19.2.0", "react": "^19.2.0",
"react-dom": "^19.2.0", "react-dom": "^19.2.0",
"sonner": "^2.0.7", "sonner": "^2.0.7",
"tailwind-merge": "^3.4.0", "tailwind-merge": "^3.4.0",
"tailwindcss": "^4.1.11", "tailwindcss": "^4.1.11",
"zod": "^4.1.12" "zod": "^4.2.0"
}, },
"devDependencies": { "devDependencies": {
"@eslint/js": "^9.39.1", "@eslint/js": "^9.39.1",
"@testing-library/jest-dom": "^6.9.1", "@testing-library/jest-dom": "^6.9.1",
"@testing-library/react": "^16.3.0", "@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1", "@testing-library/user-event": "^14.6.1",
"@types/eslint__js": "^9.14.0", "@types/node": "^25.0.2",
"@types/node": "^24.10.1",
"@types/react": "^19.2.4", "@types/react": "^19.2.4",
"@types/react-dom": "^19.2.3", "@types/react-dom": "^19.2.3",
"@vitejs/plugin-react": "^5.1.1", "@vitejs/plugin-react": "^5.1.1",
@@ -50,11 +49,11 @@
"eslint-plugin-react": "^7.37.5", "eslint-plugin-react": "^7.37.5",
"eslint-plugin-react-hooks": "^7.0.1", "eslint-plugin-react-hooks": "^7.0.1",
"eslint-plugin-react-refresh": "^0.4.20", "eslint-plugin-react-refresh": "^0.4.20",
"jsdom": "^27.2.0", "jsdom": "^27.3.0",
"tw-animate-css": "^1.4.0", "tw-animate-css": "^1.4.0",
"typescript": "^5.9.3", "typescript": "^5.9.3",
"typescript-eslint": "^8.48.0", "typescript-eslint": "^8.50.0",
"vite": "^7.2.2", "vite": "^7.3.0",
"vitest": "^4.0.8" "vitest": "^4.0.8"
} }
} }

View File

@@ -1,14 +1,16 @@
// ui/src/components/InstanceCard.tsx // ui/src/components/InstanceCard.tsx
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Badge } from "@/components/ui/badge";
import type { Instance } from "@/types/instance"; import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2, MoreHorizontal, Download } from "lucide-react"; import { Edit, FileText, Play, Square, Trash2, MoreHorizontal, Download, Boxes } from "lucide-react";
import LogsDialog from "@/components/LogDialog"; import LogsDialog from "@/components/LogDialog";
import ModelsDialog from "@/components/ModelsDialog";
import HealthBadge from "@/components/HealthBadge"; import HealthBadge from "@/components/HealthBadge";
import BackendBadge from "@/components/BackendBadge"; import BackendBadge from "@/components/BackendBadge";
import { useState } from "react"; import { useState, useEffect } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth"; import { useInstanceHealth } from "@/hooks/useInstanceHealth";
import { instancesApi } from "@/lib/api"; import { instancesApi, llamaCppApi, type Model } from "@/lib/api";
interface InstanceCardProps { interface InstanceCardProps {
instance: Instance; instance: Instance;
@@ -26,9 +28,35 @@ function InstanceCard({
editInstance, editInstance,
}: InstanceCardProps) { }: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false); const [isLogsOpen, setIsLogsOpen] = useState(false);
const [isModelsOpen, setIsModelsOpen] = useState(false);
const [showAllActions, setShowAllActions] = useState(false); const [showAllActions, setShowAllActions] = useState(false);
const [models, setModels] = useState<Model[]>([]);
const health = useInstanceHealth(instance.name, instance.status); const health = useInstanceHealth(instance.name, instance.status);
const running = instance.status === "running";
const isLlamaCpp = instance.options?.backend_type === "llama_cpp";
// Fetch models for llama.cpp instances
useEffect(() => {
if (!isLlamaCpp || !running) {
setModels([]);
return;
}
void (async () => {
try {
const fetchedModels = await llamaCppApi.getModels(instance.name);
setModels(fetchedModels);
} catch {
setModels([]);
}
})();
}, [instance.name, isLlamaCpp, running]);
// Calculate model counts
const totalModels = models.length;
const loadedModels = models.filter(m => m.status.value === "loaded").length;
const handleStart = () => { const handleStart = () => {
startInstance(instance.name); startInstance(instance.name);
}; };
@@ -53,6 +81,10 @@ function InstanceCard({
setIsLogsOpen(true); setIsLogsOpen(true);
}; };
const handleModels = () => {
setIsModelsOpen(true);
};
const handleExport = () => { const handleExport = () => {
void (async () => { void (async () => {
try { try {
@@ -83,8 +115,6 @@ function InstanceCard({
})(); })();
}; };
const running = instance.status === "running";
return ( return (
<> <>
<Card className="hover:shadow-md transition-shadow"> <Card className="hover:shadow-md transition-shadow">
@@ -99,6 +129,12 @@ function InstanceCard({
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap">
<BackendBadge backend={instance.options?.backend_type} docker={instance.options?.docker_enabled} /> <BackendBadge backend={instance.options?.backend_type} docker={instance.options?.docker_enabled} />
{running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
{isLlamaCpp && running && totalModels > 0 && (
<Badge variant="secondary" className="text-xs">
<Boxes className="h-3 w-3 mr-1" />
{loadedModels}/{totalModels} models
</Badge>
)}
</div> </div>
</div> </div>
</CardHeader> </CardHeader>
@@ -149,26 +185,37 @@ function InstanceCard({
{/* Secondary actions - collapsible */} {/* Secondary actions - collapsible */}
{showAllActions && ( {showAllActions && (
<div className="flex items-center gap-2 pt-2 border-t border-border"> <div className="flex items-center gap-2 pt-2 border-t border-border flex-wrap">
<Button <Button
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleLogs} onClick={handleLogs}
title="View logs" title="View logs"
data-testid="view-logs-button" data-testid="view-logs-button"
className="flex-1"
> >
<FileText className="h-4 w-4 mr-1" /> <FileText className="h-4 w-4 mr-1" />
Logs Logs
</Button> </Button>
{isLlamaCpp && totalModels > 1 && (
<Button
size="sm"
variant="outline"
onClick={handleModels}
title="Manage models"
data-testid="manage-models-button"
>
<Boxes className="h-4 w-4 mr-1" />
Models
</Button>
)}
<Button <Button
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleExport} onClick={handleExport}
title="Export instance" title="Export instance"
data-testid="export-instance-button" data-testid="export-instance-button"
className="flex-1"
> >
<Download className="h-4 w-4 mr-1" /> <Download className="h-4 w-4 mr-1" />
Export Export
@@ -195,6 +242,13 @@ function InstanceCard({
instanceName={instance.name} instanceName={instance.name}
isRunning={running} isRunning={running}
/> />
<ModelsDialog
open={isModelsOpen}
onOpenChange={setIsModelsOpen}
instanceName={instance.name}
isRunning={running}
/>
</> </>
); );
} }

View File

@@ -0,0 +1,303 @@
import React, { useState, useEffect } from 'react'
import { Button } from '@/components/ui/button'
import {
Dialog,
DialogContent,
DialogDescription,
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog'
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeader,
TableRow,
} from '@/components/ui/table'
import { Badge } from '@/components/ui/badge'
import { llamaCppApi } from '@/lib/api'
import { RefreshCw, Loader2, AlertCircle } from 'lucide-react'
interface ModelsDialogProps {
open: boolean
onOpenChange: (open: boolean) => void
instanceName: string
isRunning: boolean
}
interface Model {
id: string
object: string
owned_by: string
created: number
in_cache: boolean
path: string
status: {
value: string // "loaded" | "loading" | "unloaded"
args: string[]
}
}
const StatusIcon: React.FC<{ status: string }> = ({ status }) => {
switch (status) {
case 'loaded':
return (
<div className="h-2 w-2 rounded-full bg-green-500" />
)
case 'loading':
return (
<Loader2
className="h-3 w-3 animate-spin text-yellow-500"
/>
)
case 'unloaded':
return (
<div className="h-2 w-2 rounded-full bg-gray-400" />
)
default:
return null
}
}
const ModelsDialog: React.FC<ModelsDialogProps> = ({
open,
onOpenChange,
instanceName,
isRunning,
}) => {
const [models, setModels] = useState<Model[]>([])
const [loading, setLoading] = useState(false)
const [error, setError] = useState<string | null>(null)
const [loadingModels, setLoadingModels] = useState<Set<string>>(new Set())
// Fetch models function
const fetchModels = React.useCallback(async () => {
if (!instanceName || !isRunning) return
setLoading(true)
setError(null)
try {
const response = await llamaCppApi.getModels(instanceName)
setModels(response)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch models')
} finally {
setLoading(false)
}
}, [instanceName, isRunning])
// Fetch models when dialog opens
useEffect(() => {
if (!open || !isRunning) return
// Initial fetch
void fetchModels()
}, [open, isRunning, fetchModels])
// Auto-refresh only when models are loading
useEffect(() => {
if (!open || !isRunning) return
// Check if any model is in loading state
const hasLoadingModel = models.some(m => m.status.value === 'loading')
if (!hasLoadingModel) return
// Poll every 2 seconds when there's a loading model
const interval = setInterval(() => {
void fetchModels()
}, 2000)
return () => clearInterval(interval)
}, [open, isRunning, models, fetchModels])
// Load model
const loadModel = async (modelName: string) => {
setLoadingModels((prev) => new Set(prev).add(modelName))
setError(null)
try {
await llamaCppApi.loadModel(instanceName, modelName)
// Wait a bit for the backend to process the load
await new Promise(resolve => setTimeout(resolve, 500))
// Refresh models list after loading
await fetchModels()
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to load model')
} finally {
setLoadingModels((prev) => {
const newSet = new Set(prev)
newSet.delete(modelName)
return newSet
})
}
}
// Unload model
const unloadModel = async (modelName: string) => {
setLoadingModels((prev) => new Set(prev).add(modelName))
setError(null)
try {
await llamaCppApi.unloadModel(instanceName, modelName)
// Wait a bit for the backend to process the unload
await new Promise(resolve => setTimeout(resolve, 500))
// Refresh models list after unloading
await fetchModels()
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to unload model')
} finally {
setLoadingModels((prev) => {
const newSet = new Set(prev)
newSet.delete(modelName)
return newSet
})
}
}
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-4xl max-w-[calc(100%-2rem)] max-h-[80vh] flex flex-col">
<DialogHeader>
<div className="flex items-center justify-between">
<div>
<DialogTitle className="flex items-center gap-2">
Models: {instanceName}
<Badge variant={isRunning ? 'default' : 'secondary'}>
{isRunning ? 'Running' : 'Stopped'}
</Badge>
</DialogTitle>
<DialogDescription>
Manage models in this llama.cpp instance
</DialogDescription>
</div>
<Button
variant="outline"
size="sm"
onClick={() => void fetchModels()}
disabled={loading || !isRunning}
>
{loading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<RefreshCw className="h-4 w-4" />
)}
</Button>
</div>
</DialogHeader>
{/* Error Display */}
{error && (
<div className="flex items-center gap-2 p-3 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive" />
<span className="text-sm text-destructive">{error}</span>
</div>
)}
{/* Models Table */}
<div className="flex-1 flex flex-col min-h-0 overflow-auto">
{!isRunning ? (
<div className="flex items-center justify-center h-full text-muted-foreground">
Instance is not running
</div>
) : loading && models.length === 0 ? (
<div className="flex items-center justify-center h-full">
<Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
<span className="ml-2 text-muted-foreground">
Loading models...
</span>
</div>
) : models.length === 0 ? (
<div className="flex items-center justify-center h-full text-muted-foreground">
No models found
</div>
) : (
<Table>
<TableHeader>
<TableRow>
<TableHead>Model</TableHead>
<TableHead>Status</TableHead>
<TableHead className="text-right">Actions</TableHead>
</TableRow>
</TableHeader>
<TableBody>
{models.map((model) => {
const isLoading = loadingModels.has(model.id)
const isModelLoading = model.status.value === 'loading'
return (
<TableRow key={model.id}>
<TableCell className="font-mono text-sm">
{model.id}
</TableCell>
<TableCell>
<div className="flex items-center gap-2">
<StatusIcon status={model.status.value} />
<span className="text-sm capitalize">
{model.status.value}
</span>
</div>
</TableCell>
<TableCell className="text-right">
{model.status.value === 'loaded' ? (
<Button
size="sm"
variant="outline"
onClick={() => { void unloadModel(model.id) }}
disabled={!isRunning || isLoading || isModelLoading}
>
{isLoading ? (
<>
<Loader2 className="h-3 w-3 animate-spin mr-1" />
Unloading...
</>
) : (
'Unload'
)}
</Button>
) : model.status.value === 'unloaded' ? (
<Button
size="sm"
variant="default"
onClick={() => { void loadModel(model.id) }}
disabled={!isRunning || isLoading || isModelLoading}
>
{isLoading ? (
<>
<Loader2 className="h-3 w-3 animate-spin mr-1" />
Loading...
</>
) : (
'Load'
)}
</Button>
) : (
<Button size="sm" variant="ghost" disabled>
Loading...
</Button>
)}
</TableCell>
</TableRow>
)
})}
</TableBody>
</Table>
)}
</div>
{/* Auto-refresh indicator - only shown when models are loading */}
{isRunning && models.some(m => m.status.value === 'loading') && (
<div className="flex items-center gap-2 text-sm text-muted-foreground">
<div className="w-2 h-2 bg-yellow-500 rounded-full animate-pulse"></div>
Auto-refreshing while models are loading
</div>
)}
</DialogContent>
</Dialog>
)
}
export default ModelsDialog

View File

@@ -4,8 +4,7 @@ import userEvent from '@testing-library/user-event'
import InstanceList from '@/components/InstanceList' import InstanceList from '@/components/InstanceList'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import { BackendType, type Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext' import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API

View File

@@ -116,7 +116,7 @@ function CreateApiKeyDialog({ open, onOpenChange, onKeyCreated }: CreateApiKeyDi
<DialogHeader> <DialogHeader>
<DialogTitle>Create API Key</DialogTitle> <DialogTitle>Create API Key</DialogTitle>
</DialogHeader> </DialogHeader>
<form onSubmit={handleSubmit} className="space-y-4"> <form onSubmit={(e) => void handleSubmit(e)} className="space-y-4">
{error && ( {error && (
<Alert variant="destructive"> <Alert variant="destructive">
<AlertDescription>{error}</AlertDescription> <AlertDescription>{error}</AlertDescription>

View File

@@ -59,7 +59,7 @@ const KeyValueInput: React.FC<KeyValueInputProps> = ({
// Reset to single empty row if value is explicitly undefined/null // Reset to single empty row if value is explicitly undefined/null
setPairs([{ key: '', value: '' }]) setPairs([{ key: '', value: '' }])
} }
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [value]) }, [value])
// Update parent component when pairs change // Update parent component when pairs change

View File

@@ -5,7 +5,7 @@ import NumberInput from '@/components/form/NumberInput'
interface AutoRestartConfigurationProps { interface AutoRestartConfigurationProps {
formData: CreateInstanceOptions formData: CreateInstanceOptions
onChange: (key: keyof CreateInstanceOptions, value: any) => void onChange: <K extends keyof CreateInstanceOptions>(key: K, value: CreateInstanceOptions[K]) => void
} }
const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({ const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({

View File

@@ -3,9 +3,11 @@ import type { CreateInstanceOptions } from '@/types/instance'
import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils' import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
import BackendFormField from '@/components/BackendFormField' import BackendFormField from '@/components/BackendFormField'
type BackendFieldValue = string | number | boolean | string[] | Record<string, string> | undefined
interface BackendConfigurationProps { interface BackendConfigurationProps {
formData: CreateInstanceOptions formData: CreateInstanceOptions
onBackendFieldChange: (key: string, value: any) => void onBackendFieldChange: (key: string, value: BackendFieldValue) => void
showAdvanced?: boolean showAdvanced?: boolean
} }
@@ -26,7 +28,7 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
<BackendFormField <BackendFormField
key={fieldKey} key={fieldKey}
fieldKey={fieldKey} fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]} value={(formData.backend_options as Record<string, BackendFieldValue> | undefined)?.[fieldKey]}
onChange={onBackendFieldChange} onChange={onBackendFieldChange}
/> />
))} ))}
@@ -41,7 +43,7 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
<BackendFormField <BackendFormField
key={fieldKey} key={fieldKey}
fieldKey={fieldKey} fieldKey={fieldKey}
value={(formData.backend_options as any)?.[fieldKey]} value={(formData.backend_options as Record<string, BackendFieldValue> | undefined)?.[fieldKey]}
onChange={onBackendFieldChange} onChange={onBackendFieldChange}
/> />
))} ))}
@@ -53,7 +55,7 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
<BackendFormField <BackendFormField
key="extra_args" key="extra_args"
fieldKey="extra_args" fieldKey="extra_args"
value={(formData.backend_options as any)?.extra_args} value={(formData.backend_options as Record<string, BackendFieldValue> | undefined)?.extra_args}
onChange={onBackendFieldChange} onChange={onBackendFieldChange}
/> />
</div> </div>

View File

@@ -4,7 +4,7 @@ import { Badge } from "@/components/ui/badge";
import { Alert, AlertDescription } from "@/components/ui/alert"; import { Alert, AlertDescription } from "@/components/ui/alert";
import { Trash2, Copy, Check, X, ChevronDown, ChevronRight } from "lucide-react"; import { Trash2, Copy, Check, X, ChevronDown, ChevronRight } from "lucide-react";
import { apiKeysApi } from "@/lib/api"; import { apiKeysApi } from "@/lib/api";
import { ApiKey, KeyPermissionResponse, PermissionMode } from "@/types/apiKey"; import { type ApiKey, type KeyPermissionResponse, PermissionMode } from "@/types/apiKey";
import CreateApiKeyDialog from "@/components/apikeys/CreateApiKeyDialog"; import CreateApiKeyDialog from "@/components/apikeys/CreateApiKeyDialog";
import { format, formatDistanceToNow } from "date-fns"; import { format, formatDistanceToNow } from "date-fns";
@@ -20,7 +20,7 @@ function ApiKeysSection() {
const [loadingPermissions, setLoadingPermissions] = useState<Record<number, boolean>>({}); const [loadingPermissions, setLoadingPermissions] = useState<Record<number, boolean>>({});
useEffect(() => { useEffect(() => {
fetchKeys(); void fetchKeys();
}, []); }, []);
const fetchKeys = async () => { const fetchKeys = async () => {
@@ -52,7 +52,7 @@ function ApiKeysSection() {
const handleKeyCreated = (plainTextKey: string) => { const handleKeyCreated = (plainTextKey: string) => {
setNewKeyPlainText(plainTextKey); setNewKeyPlainText(plainTextKey);
fetchKeys(); void fetchKeys();
setCreateDialogOpen(false); setCreateDialogOpen(false);
}; };
@@ -75,7 +75,7 @@ function ApiKeysSection() {
try { try {
await apiKeysApi.delete(id); await apiKeysApi.delete(id);
fetchKeys(); void fetchKeys();
} catch (err) { } catch (err) {
alert(err instanceof Error ? err.message : "Failed to delete API key"); alert(err instanceof Error ? err.message : "Failed to delete API key");
} }
@@ -87,7 +87,7 @@ function ApiKeysSection() {
} else { } else {
setExpandedRowId(key.id); setExpandedRowId(key.id);
if (key.permission_mode === PermissionMode.PerInstance) { if (key.permission_mode === PermissionMode.PerInstance) {
fetchPermissions(key.id); void fetchPermissions(key.id);
} }
} }
}; };
@@ -136,7 +136,7 @@ function ApiKeysSection() {
<code className="flex-1 p-3 bg-white dark:bg-gray-900 border border-green-300 dark:border-green-800 rounded font-mono text-sm break-all"> <code className="flex-1 p-3 bg-white dark:bg-gray-900 border border-green-300 dark:border-green-800 rounded font-mono text-sm break-all">
{newKeyPlainText} {newKeyPlainText}
</code> </code>
<Button onClick={handleCopyKey} variant="outline" size="sm"> <Button onClick={() => void handleCopyKey()} variant="outline" size="sm">
{copiedKey ? <Check className="h-4 w-4" /> : <Copy className="h-4 w-4" />} {copiedKey ? <Check className="h-4 w-4" /> : <Copy className="h-4 w-4" />}
</Button> </Button>
</div> </div>
@@ -216,7 +216,7 @@ function ApiKeysSection() {
size="icon" size="icon"
onClick={(e) => { onClick={(e) => {
e.stopPropagation(); e.stopPropagation();
handleDeleteKey(key.id, key.name); void handleDeleteKey(key.id, key.name);
}} }}
title="Delete key" title="Delete key"
> >

View File

@@ -1,4 +1,4 @@
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "@/components/ui/dialog"; import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog";
import ApiKeysSection from "./ApiKeysSection"; import ApiKeysSection from "./ApiKeysSection";
interface SettingsDialogProps { interface SettingsDialogProps {
@@ -12,6 +12,9 @@ function SettingsDialog({ open, onOpenChange }: SettingsDialogProps) {
<DialogContent className="sm:max-w-5xl max-h-[90vh] overflow-y-auto"> <DialogContent className="sm:max-w-5xl max-h-[90vh] overflow-y-auto">
<DialogHeader> <DialogHeader>
<DialogTitle>Settings</DialogTitle> <DialogTitle>Settings</DialogTitle>
<DialogDescription>
Manage your application settings and API keys.
</DialogDescription>
</DialogHeader> </DialogHeader>
<ApiKeysSection /> <ApiKeysSection />
</DialogContent> </DialogContent>

View File

@@ -4,13 +4,13 @@ import { cva, type VariantProps } from "class-variance-authority"
import { cn } from "@/lib/utils" import { cn } from "@/lib/utils"
const alertVariants = cva( const alertVariants = cva(
"relative w-full rounded-lg border p-4", "relative w-full rounded-lg border px-4 py-3 text-sm grid has-[>svg]:grid-cols-[calc(var(--spacing)*4)_1fr] grid-cols-[0_1fr] has-[>svg]:gap-x-3 gap-y-0.5 items-start [&>svg]:size-4 [&>svg]:translate-y-0.5 [&>svg]:text-current",
{ {
variants: { variants: {
variant: { variant: {
default: "bg-background text-foreground", default: "bg-card text-card-foreground",
destructive: destructive:
"border-destructive/50 text-destructive dark:border-destructive [&>svg]:text-destructive", "text-destructive bg-card [&>svg]:text-current *:data-[slot=alert-description]:text-destructive/90",
}, },
}, },
defaultVariants: { defaultVariants: {
@@ -19,41 +19,48 @@ const alertVariants = cva(
} }
) )
const Alert = React.forwardRef< function Alert({
HTMLDivElement, className,
React.HTMLAttributes<HTMLDivElement> & VariantProps<typeof alertVariants> variant,
>(({ className, variant, ...props }, ref) => ( ...props
<div }: React.ComponentProps<"div"> & VariantProps<typeof alertVariants>) {
ref={ref} return (
role="alert" <div
className={cn(alertVariants({ variant }), className)} data-slot="alert"
{...props} role="alert"
/> className={cn(alertVariants({ variant }), className)}
)) {...props}
Alert.displayName = "Alert" />
)
}
const AlertTitle = React.forwardRef< function AlertTitle({ className, ...props }: React.ComponentProps<"div">) {
HTMLParagraphElement, return (
React.HTMLAttributes<HTMLHeadingElement> <div
>(({ className, ...props }, ref) => ( data-slot="alert-title"
<h5 className={cn(
ref={ref} "col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight",
className={cn("mb-1 font-medium leading-none tracking-tight", className)} className
{...props} )}
/> {...props}
)) />
AlertTitle.displayName = "AlertTitle" )
}
const AlertDescription = React.forwardRef< function AlertDescription({
HTMLParagraphElement, className,
React.HTMLAttributes<HTMLParagraphElement> ...props
>(({ className, ...props }, ref) => ( }: React.ComponentProps<"div">) {
<div return (
ref={ref} <div
className={cn("text-sm [&_p]:leading-relaxed", className)} data-slot="alert-description"
{...props} className={cn(
/> "text-muted-foreground col-start-2 grid justify-items-start gap-1 text-sm [&_p]:leading-relaxed",
)) className
AlertDescription.displayName = "AlertDescription" )}
{...props}
/>
)
}
export { Alert, AlertTitle, AlertDescription } export { Alert, AlertTitle, AlertDescription }

View File

@@ -5,7 +5,7 @@ import { cva, type VariantProps } from "class-variance-authority"
import { cn } from "@/lib/utils" import { cn } from "@/lib/utils"
const badgeVariants = cva( const badgeVariants = cva(
"inline-flex items-center justify-center rounded-md border px-2 py-0.5 text-xs font-medium w-fit whitespace-nowrap shrink-0 [&>svg]:size-3 gap-1 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive transition-[color,box-shadow] overflow-hidden", "inline-flex items-center justify-center rounded-full border px-2 py-0.5 text-xs font-medium w-fit whitespace-nowrap shrink-0 [&>svg]:size-3 gap-1 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive transition-[color,box-shadow] overflow-hidden",
{ {
variants: { variants: {
variant: { variant: {

View File

@@ -9,14 +9,13 @@ const buttonVariants = cva(
{ {
variants: { variants: {
variant: { variant: {
default: default: "bg-primary text-primary-foreground hover:bg-primary/90",
"bg-primary text-primary-foreground shadow-xs hover:bg-primary/90",
destructive: destructive:
"bg-destructive text-white shadow-xs hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60", "bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
outline: outline:
"border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50", "border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
secondary: secondary:
"bg-secondary text-secondary-foreground shadow-xs hover:bg-secondary/80", "bg-secondary text-secondary-foreground hover:bg-secondary/80",
ghost: ghost:
"hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50", "hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
link: "text-primary underline-offset-4 hover:underline", link: "text-primary underline-offset-4 hover:underline",
@@ -26,6 +25,8 @@ const buttonVariants = cva(
sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5", sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
lg: "h-10 rounded-md px-6 has-[>svg]:px-4", lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
icon: "size-9", icon: "size-9",
"icon-sm": "size-8",
"icon-lg": "size-10",
}, },
}, },
defaultVariants: { defaultVariants: {

View File

@@ -20,7 +20,7 @@ function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
<div <div
data-slot="card-header" data-slot="card-header"
className={cn( className={cn(
"@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-1.5 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6", "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-2 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
className className
)} )}
{...props} {...props}

View File

@@ -19,7 +19,7 @@ function Checkbox({
> >
<CheckboxPrimitive.Indicator <CheckboxPrimitive.Indicator
data-slot="checkbox-indicator" data-slot="checkbox-indicator"
className="flex items-center justify-center text-current transition-none" className="grid place-content-center text-current transition-none"
> >
<CheckIcon className="size-3.5" /> <CheckIcon className="size-3.5" />
</CheckboxPrimitive.Indicator> </CheckboxPrimitive.Indicator>

View File

@@ -8,7 +8,7 @@ function Input({ className, type, ...props }: React.ComponentProps<"input">) {
type={type} type={type}
data-slot="input" data-slot="input"
className={cn( className={cn(
"file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm", "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
"focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]", "focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
"aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive", "aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
className className

View File

@@ -1,42 +1,43 @@
import * as React from "react" import * as React from "react"
import * as RadioGroupPrimitive from "@radix-ui/react-radio-group" import * as RadioGroupPrimitive from "@radix-ui/react-radio-group"
import { Circle } from "lucide-react" import { CircleIcon } from "lucide-react"
import { cn } from "@/lib/utils" import { cn } from "@/lib/utils"
const RadioGroup = React.forwardRef< function RadioGroup({
React.ElementRef<typeof RadioGroupPrimitive.Root>, className,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Root> ...props
>(({ className, ...props }, ref) => { }: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {
return ( return (
<RadioGroupPrimitive.Root <RadioGroupPrimitive.Root
className={cn("grid gap-2", className)} data-slot="radio-group"
className={cn("grid gap-3", className)}
{...props} {...props}
ref={ref}
/> />
) )
}) }
RadioGroup.displayName = RadioGroupPrimitive.Root.displayName
const RadioGroupItem = React.forwardRef< function RadioGroupItem({
React.ElementRef<typeof RadioGroupPrimitive.Item>, className,
React.ComponentPropsWithoutRef<typeof RadioGroupPrimitive.Item> ...props
>(({ className, ...props }, ref) => { }: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {
return ( return (
<RadioGroupPrimitive.Item <RadioGroupPrimitive.Item
ref={ref} data-slot="radio-group-item"
className={cn( className={cn(
"aspect-square h-4 w-4 rounded-full border border-primary text-primary ring-offset-background focus:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50", "border-input text-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 aspect-square size-4 shrink-0 rounded-full border shadow-xs transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
className className
)} )}
{...props} {...props}
> >
<RadioGroupPrimitive.Indicator className="flex items-center justify-center"> <RadioGroupPrimitive.Indicator
<Circle className="h-2.5 w-2.5 fill-current text-current" /> data-slot="radio-group-indicator"
className="relative flex items-center justify-center"
>
<CircleIcon className="fill-primary absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2" />
</RadioGroupPrimitive.Indicator> </RadioGroupPrimitive.Indicator>
</RadioGroupPrimitive.Item> </RadioGroupPrimitive.Item>
) )
}) }
RadioGroupItem.displayName = RadioGroupPrimitive.Item.displayName
export { RadioGroup, RadioGroupItem } export { RadioGroup, RadioGroupItem }

View File

@@ -0,0 +1,117 @@
import * as React from "react"
import { cn } from "@/lib/utils"
const Table = React.forwardRef<
HTMLTableElement,
React.HTMLAttributes<HTMLTableElement>
>(({ className, ...props }, ref) => (
<div className="relative w-full overflow-auto">
<table
ref={ref}
className={cn("w-full caption-bottom text-sm", className)}
{...props}
/>
</div>
))
Table.displayName = "Table"
const TableHeader = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
))
TableHeader.displayName = "TableHeader"
const TableBody = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<tbody
ref={ref}
className={cn("[&_tr:last-child]:border-0", className)}
{...props}
/>
))
TableBody.displayName = "TableBody"
const TableFooter = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<tfoot
ref={ref}
className={cn(
"border-t bg-muted/50 font-medium [&>tr]:last:border-b-0",
className
)}
{...props}
/>
))
TableFooter.displayName = "TableFooter"
const TableRow = React.forwardRef<
HTMLTableRowElement,
React.HTMLAttributes<HTMLTableRowElement>
>(({ className, ...props }, ref) => (
<tr
ref={ref}
className={cn(
"border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted",
className
)}
{...props}
/>
))
TableRow.displayName = "TableRow"
const TableHead = React.forwardRef<
HTMLTableCellElement,
React.ThHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
<th
ref={ref}
className={cn(
"h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0",
className
)}
{...props}
/>
))
TableHead.displayName = "TableHead"
const TableCell = React.forwardRef<
HTMLTableCellElement,
React.TdHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
<td
ref={ref}
className={cn("p-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
{...props}
/>
))
TableCell.displayName = "TableCell"
const TableCaption = React.forwardRef<
HTMLTableCaptionElement,
React.HTMLAttributes<HTMLTableCaptionElement>
>(({ className, ...props }, ref) => (
<caption
ref={ref}
className={cn("mt-4 text-sm text-muted-foreground", className)}
{...props}
/>
))
TableCaption.displayName = "TableCaption"
export {
Table,
TableHeader,
TableBody,
TableFooter,
TableHead,
TableRow,
TableCell,
TableCaption,
}

View File

@@ -3,8 +3,7 @@ import { render, screen, waitFor } from "@testing-library/react";
import type { ReactNode } from "react"; import type { ReactNode } from "react";
import { InstancesProvider, useInstances } from "@/contexts/InstancesContext"; import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
import { instancesApi } from "@/lib/api"; import { instancesApi } from "@/lib/api";
import type { Instance } from "@/types/instance"; import { BackendType, type Instance } from "@/types/instance";
import { BackendType } from "@/types/instance";
import { AuthProvider } from "../AuthContext"; import { AuthProvider } from "../AuthContext";
// Mock the API module // Mock the API module
@@ -71,37 +70,37 @@ function TestComponent() {
{/* Action buttons for testing with specific instances */} {/* Action buttons for testing with specific instances */}
<button <button
onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })} onClick={() => void createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
data-testid="create-instance" data-testid="create-instance"
> >
Create Instance Create Instance
</button> </button>
<button <button
onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })} onClick={() => void updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
data-testid="update-instance" data-testid="update-instance"
> >
Update Instance Update Instance
</button> </button>
<button <button
onClick={() => startInstance("instance2")} onClick={() => void startInstance("instance2")}
data-testid="start-instance" data-testid="start-instance"
> >
Start Instance2 Start Instance2
</button> </button>
<button <button
onClick={() => stopInstance("instance1")} onClick={() => void stopInstance("instance1")}
data-testid="stop-instance" data-testid="stop-instance"
> >
Stop Instance1 Stop Instance1
</button> </button>
<button <button
onClick={() => restartInstance("instance1")} onClick={() => void restartInstance("instance1")}
data-testid="restart-instance" data-testid="restart-instance"
> >
Restart Instance1 Restart Instance1
</button> </button>
<button <button
onClick={() => deleteInstance("instance2")} onClick={() => void deleteInstance("instance2")}
data-testid="delete-instance" data-testid="delete-instance"
> >
Delete Instance2 Delete Instance2

View File

@@ -205,3 +205,53 @@ export const apiKeysApi = {
getPermissions: (id: number) => getPermissions: (id: number) =>
apiCall<KeyPermissionResponse[]>(`/auth/keys/${id}/permissions`), apiCall<KeyPermissionResponse[]>(`/auth/keys/${id}/permissions`),
}; };
// Llama.cpp model management types
export interface Model {
id: string;
object: string;
owned_by: string;
created: number;
in_cache: boolean;
path: string;
status: {
value: string; // "loaded" | "loading" | "unloaded"
args: string[];
};
}
export interface ModelsListResponse {
object: string;
data: Model[];
}
// Llama.cpp model management API functions
export const llamaCppApi = {
// GET /llama-cpp/{name}/models
getModels: async (instanceName: string): Promise<Model[]> => {
const response = await apiCall<ModelsListResponse>(
`/llama-cpp/${encodeURIComponent(instanceName)}/models`
);
return response.data;
},
// POST /llama-cpp/{name}/models/{model}/load
loadModel: (instanceName: string, modelName: string) =>
apiCall<{ success: boolean }>(
`/llama-cpp/${encodeURIComponent(instanceName)}/models/${encodeURIComponent(modelName)}/load`,
{
method: "POST",
body: JSON.stringify({ model: modelName }),
}
),
// POST /llama-cpp/{name}/models/{model}/unload
unloadModel: (instanceName: string, modelName: string) =>
apiCall<{ success: boolean }>(
`/llama-cpp/${encodeURIComponent(instanceName)}/models/${encodeURIComponent(modelName)}/unload`,
{
method: "POST",
body: JSON.stringify({ model: modelName }),
}
),
};

View File

@@ -156,11 +156,14 @@ class HealthService {
this.callbacks.set(instanceName, new Set()) this.callbacks.set(instanceName, new Set())
} }
this.callbacks.get(instanceName)!.add(callback) const callbacks = this.callbacks.get(instanceName)
if (callbacks) {
callbacks.add(callback)
// Start health checking if this is the first subscriber // Start health checking if this is the first subscriber
if (this.callbacks.get(instanceName)!.size === 1) { if (callbacks.size === 1) {
this.startHealthCheck(instanceName) this.startHealthCheck(instanceName)
}
} }
// Return unsubscribe function // Return unsubscribe function
@@ -214,22 +217,24 @@ class HealthService {
} }
// Start new interval with appropriate timing // Start new interval with appropriate timing
const interval = setInterval(async () => { const interval = setInterval(() => {
try { void (async () => {
const health = await this.performHealthCheck(instanceName) try {
this.notifyCallbacks(instanceName, health) const health = await this.performHealthCheck(instanceName)
this.notifyCallbacks(instanceName, health)
// Check if state changed and adjust interval // Check if state changed and adjust interval
const previousState = this.lastHealthState.get(instanceName) const previousState = this.lastHealthState.get(instanceName)
this.lastHealthState.set(instanceName, health.state) this.lastHealthState.set(instanceName, health.state)
if (previousState !== health.state) { if (previousState !== health.state) {
this.adjustPollingInterval(instanceName, health.state) this.adjustPollingInterval(instanceName, health.state)
}
} catch (error) {
console.error(`Health check failed for ${instanceName}:`, error)
// Continue polling even on error
} }
} catch (error) { })()
console.error(`Health check failed for ${instanceName}:`, error)
// Continue polling even on error
}
}, pollInterval) }, pollInterval)
this.intervals.set(instanceName, interval) this.intervals.set(instanceName, interval)

View File

@@ -6,7 +6,10 @@ import './index.css'
import { AuthProvider } from './contexts/AuthContext' import { AuthProvider } from './contexts/AuthContext'
import { ConfigProvider } from './contexts/ConfigContext' import { ConfigProvider } from './contexts/ConfigContext'
ReactDOM.createRoot(document.getElementById('root')!).render( const rootElement = document.getElementById('root')
if (!rootElement) throw new Error('Failed to find the root element')
ReactDOM.createRoot(rootElement).render(
<React.StrictMode> <React.StrictMode>
<AuthProvider> <AuthProvider>
<ConfigProvider> <ConfigProvider>

View File

@@ -1,10 +1,44 @@
import '@testing-library/jest-dom' import '@testing-library/jest-dom'
import { afterEach, vi } from 'vitest' import { afterEach, beforeEach } from 'vitest'
// Mock fetch globally since your app uses fetch // Create a working localStorage implementation for tests
global.fetch = vi.fn() // This ensures localStorage works in both CLI and VSCode test runner
class LocalStorageMock implements Storage {
private store: Map<string, string> = new Map()
get length(): number {
return this.store.size
}
clear(): void {
this.store.clear()
}
getItem(key: string): string | null {
return this.store.get(key) ?? null
}
key(index: number): string | null {
return Array.from(this.store.keys())[index] ?? null
}
removeItem(key: string): void {
this.store.delete(key)
}
setItem(key: string, value: string): void {
this.store.set(key, value)
}
}
// Replace global localStorage
global.localStorage = new LocalStorageMock()
// Clean up before each test
beforeEach(() => {
localStorage.clear()
})
// Clean up after each test
afterEach(() => { afterEach(() => {
vi.clearAllMocks() localStorage.clear()
}) })

View File

@@ -30,7 +30,6 @@ export interface ServerConfig {
export interface InstancesConfig { export interface InstancesConfig {
port_range: [number, number] port_range: [number, number]
configs_dir: string
logs_dir: string logs_dir: string
auto_create_dirs: boolean auto_create_dirs: boolean
max_instances: number max_instances: number
@@ -53,7 +52,6 @@ export interface DatabaseConfig {
export interface AuthConfig { export interface AuthConfig {
require_inference_auth: boolean require_inference_auth: boolean
inference_keys: string[] // Will be empty in sanitized response
require_management_auth: boolean require_management_auth: boolean
management_keys: string[] // Will be empty in sanitized response management_keys: string[] // Will be empty in sanitized response
} }

View File

@@ -4,7 +4,8 @@
"skipLibCheck": true, "skipLibCheck": true,
"module": "ESNext", "module": "ESNext",
"moduleResolution": "bundler", "moduleResolution": "bundler",
"allowSyntheticDefaultImports": true "allowSyntheticDefaultImports": true,
"types": ["node"]
}, },
"include": ["vite.config.ts"] "include": ["vite.config.ts"]
} }