mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
Merge pull request #28 from lordmathis/docs/user-guide
docs: Add mkdocs based user documentation
This commit is contained in:
65
.github/workflows/docs.yml
vendored
Normal file
65
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
name: Build and Deploy Documentation
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
paths:
|
||||||
|
- 'docs/**'
|
||||||
|
- 'mkdocs.yml'
|
||||||
|
- 'docs-requirements.txt'
|
||||||
|
- '.github/workflows/docs.yml'
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
paths:
|
||||||
|
- 'docs/**'
|
||||||
|
- 'mkdocs.yml'
|
||||||
|
- 'docs-requirements.txt'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pages: write
|
||||||
|
id-token: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: "pages"
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # Needed for git-revision-date-localized plugin
|
||||||
|
|
||||||
|
- name: Setup Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install -r docs-requirements.txt
|
||||||
|
|
||||||
|
- name: Build documentation
|
||||||
|
run: |
|
||||||
|
mkdocs build --strict
|
||||||
|
|
||||||
|
- name: Upload documentation artifact
|
||||||
|
if: github.ref == 'refs/heads/main'
|
||||||
|
uses: actions/upload-pages-artifact@v3
|
||||||
|
with:
|
||||||
|
path: ./site
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
if: github.ref == 'refs/heads/main'
|
||||||
|
environment:
|
||||||
|
name: github-pages
|
||||||
|
url: ${{ steps.deployment.outputs.page_url }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
steps:
|
||||||
|
- name: Deploy to GitHub Pages
|
||||||
|
id: deployment
|
||||||
|
uses: actions/deploy-pages@v4
|
||||||
@@ -129,6 +129,50 @@ Use this format for pull request titles:
|
|||||||
- Use meaningful component and variable names
|
- Use meaningful component and variable names
|
||||||
- Prefer functional components over class components
|
- Prefer functional components over class components
|
||||||
|
|
||||||
|
## Documentation Development
|
||||||
|
|
||||||
|
This project uses MkDocs for documentation. When working on documentation:
|
||||||
|
|
||||||
|
### Setup Documentation Environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install documentation dependencies
|
||||||
|
pip install -r docs-requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Development Workflow
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Serve documentation locally for development
|
||||||
|
mkdocs serve
|
||||||
|
```
|
||||||
|
The documentation will be available at http://localhost:8000
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build static documentation site
|
||||||
|
mkdocs build
|
||||||
|
```
|
||||||
|
The built site will be in the `site/` directory.
|
||||||
|
|
||||||
|
### Documentation Structure
|
||||||
|
|
||||||
|
- `docs/` - Documentation content (Markdown files)
|
||||||
|
- `mkdocs.yml` - MkDocs configuration
|
||||||
|
- `docs-requirements.txt` - Python dependencies for documentation
|
||||||
|
|
||||||
|
### Adding New Documentation
|
||||||
|
|
||||||
|
When adding new documentation:
|
||||||
|
|
||||||
|
1. Create Markdown files in the appropriate `docs/` subdirectory
|
||||||
|
2. Update the navigation in `mkdocs.yml`
|
||||||
|
3. Test locally with `mkdocs serve`
|
||||||
|
4. Submit a pull request
|
||||||
|
|
||||||
|
### Documentation Deployment
|
||||||
|
|
||||||
|
Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
|
||||||
|
|
||||||
## Getting Help
|
## Getting Help
|
||||||
|
|
||||||
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
|
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
|
||||||
|
|||||||
105
README.md
105
README.md
@@ -15,7 +15,7 @@
|
|||||||
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
|
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
|
||||||
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
|
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
|
||||||
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
|
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
|
||||||
@@ -123,7 +123,6 @@ instances:
|
|||||||
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
timeout_check_interval: 5 # Idle instance timeout check in minutes
|
timeout_check_interval: 5 # Idle instance timeout check in minutes
|
||||||
|
|
||||||
|
|
||||||
auth:
|
auth:
|
||||||
require_inference_auth: true # Require auth for inference endpoints
|
require_inference_auth: true # Require auth for inference endpoints
|
||||||
inference_keys: [] # Keys for inference endpoints
|
inference_keys: [] # Keys for inference endpoints
|
||||||
@@ -131,107 +130,7 @@ auth:
|
|||||||
management_keys: [] # Keys for management endpoints
|
management_keys: [] # Keys for management endpoints
|
||||||
```
|
```
|
||||||
|
|
||||||
<details><summary><strong>Full Configuration Guide</strong></summary>
|
For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
|
||||||
|
|
||||||
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
|
|
||||||
|
|
||||||
```
|
|
||||||
Defaults < Configuration file < Environment variables
|
|
||||||
```
|
|
||||||
|
|
||||||
### Configuration Files
|
|
||||||
|
|
||||||
#### Configuration File Locations
|
|
||||||
|
|
||||||
Configuration files are searched in the following locations (in order of precedence):
|
|
||||||
|
|
||||||
**Linux/macOS:**
|
|
||||||
- `./llamactl.yaml` or `./config.yaml` (current directory)
|
|
||||||
- `$HOME/.config/llamactl/config.yaml`
|
|
||||||
- `/etc/llamactl/config.yaml`
|
|
||||||
|
|
||||||
**Windows:**
|
|
||||||
- `./llamactl.yaml` or `./config.yaml` (current directory)
|
|
||||||
- `%APPDATA%\llamactl\config.yaml`
|
|
||||||
- `%USERPROFILE%\llamactl\config.yaml`
|
|
||||||
- `%PROGRAMDATA%\llamactl\config.yaml`
|
|
||||||
|
|
||||||
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
|
|
||||||
|
|
||||||
### Configuration Options
|
|
||||||
|
|
||||||
#### Server Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
|
|
||||||
port: 8080 # Server port to bind to (default: 8080)
|
|
||||||
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
|
|
||||||
enable_swagger: false # Enable Swagger UI (default: false)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Environment Variables:**
|
|
||||||
- `LLAMACTL_HOST` - Server host
|
|
||||||
- `LLAMACTL_PORT` - Server port
|
|
||||||
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
|
|
||||||
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
|
|
||||||
|
|
||||||
#### Instance Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
instances:
|
|
||||||
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
|
|
||||||
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
|
|
||||||
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
|
|
||||||
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
|
|
||||||
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
|
|
||||||
max_instances: -1 # Maximum instances (-1 = unlimited)
|
|
||||||
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
|
|
||||||
enable_lru_eviction: true # Enable LRU eviction for idle instances
|
|
||||||
llama_executable: "llama-server" # Path to llama-server executable
|
|
||||||
default_auto_restart: true # Default auto-restart setting
|
|
||||||
default_max_restarts: 3 # Default maximum restart attempts
|
|
||||||
default_restart_delay: 5 # Default restart delay in seconds
|
|
||||||
default_on_demand_start: true # Default on-demand start setting
|
|
||||||
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
|
||||||
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
|
||||||
```
|
|
||||||
|
|
||||||
**Environment Variables:**
|
|
||||||
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
|
|
||||||
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
|
|
||||||
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
|
|
||||||
- `LLAMACTL_LOGS_DIR` - Log directory path
|
|
||||||
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
|
|
||||||
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
|
|
||||||
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
|
|
||||||
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
|
|
||||||
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
|
|
||||||
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
|
||||||
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
|
||||||
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
|
||||||
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
|
|
||||||
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
|
|
||||||
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
|
||||||
|
|
||||||
|
|
||||||
#### Authentication Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
auth:
|
|
||||||
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
|
|
||||||
inference_keys: [] # List of valid inference API keys
|
|
||||||
require_management_auth: true # Require API key for management endpoints (default: true)
|
|
||||||
management_keys: [] # List of valid management API keys
|
|
||||||
```
|
|
||||||
|
|
||||||
**Environment Variables:**
|
|
||||||
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
|
|
||||||
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
|
|
||||||
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
|
||||||
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|||||||
@@ -884,6 +884,10 @@ const docTemplate = `{
|
|||||||
"host": {
|
"host": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"idle_timeout": {
|
||||||
|
"description": "Idle timeout",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
"ignore_eos": {
|
"ignore_eos": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
@@ -1018,6 +1022,10 @@ const docTemplate = `{
|
|||||||
"numa": {
|
"numa": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"on_demand_start": {
|
||||||
|
"description": "On demand start",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"override_kv": {
|
"override_kv": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@@ -1078,8 +1086,7 @@ const docTemplate = `{
|
|||||||
"reranking": {
|
"reranking": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"restart_delay_seconds": {
|
"restart_delay": {
|
||||||
"description": "RestartDelay duration in seconds",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"rope_freq_base": {
|
"rope_freq_base": {
|
||||||
@@ -1194,6 +1201,19 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"instance.InstanceStatus": {
|
||||||
|
"type": "integer",
|
||||||
|
"enum": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"x-enum-varnames": [
|
||||||
|
"Stopped",
|
||||||
|
"Running",
|
||||||
|
"Failed"
|
||||||
|
]
|
||||||
|
},
|
||||||
"instance.Process": {
|
"instance.Process": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -1204,9 +1224,13 @@ const docTemplate = `{
|
|||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"running": {
|
"status": {
|
||||||
"description": "Status",
|
"description": "Status",
|
||||||
"type": "boolean"
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/instance.InstanceStatus"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -877,6 +877,10 @@
|
|||||||
"host": {
|
"host": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"idle_timeout": {
|
||||||
|
"description": "Idle timeout",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
"ignore_eos": {
|
"ignore_eos": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
@@ -1011,6 +1015,10 @@
|
|||||||
"numa": {
|
"numa": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"on_demand_start": {
|
||||||
|
"description": "On demand start",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"override_kv": {
|
"override_kv": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@@ -1071,8 +1079,7 @@
|
|||||||
"reranking": {
|
"reranking": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
"restart_delay_seconds": {
|
"restart_delay": {
|
||||||
"description": "RestartDelay duration in seconds",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"rope_freq_base": {
|
"rope_freq_base": {
|
||||||
@@ -1187,6 +1194,19 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"instance.InstanceStatus": {
|
||||||
|
"type": "integer",
|
||||||
|
"enum": [
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"x-enum-varnames": [
|
||||||
|
"Stopped",
|
||||||
|
"Running",
|
||||||
|
"Failed"
|
||||||
|
]
|
||||||
|
},
|
||||||
"instance.Process": {
|
"instance.Process": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -1197,9 +1217,13 @@
|
|||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"running": {
|
"status": {
|
||||||
"description": "Status",
|
"description": "Status",
|
||||||
"type": "boolean"
|
"allOf": [
|
||||||
|
{
|
||||||
|
"$ref": "#/definitions/instance.InstanceStatus"
|
||||||
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -136,6 +136,9 @@ definitions:
|
|||||||
type: string
|
type: string
|
||||||
host:
|
host:
|
||||||
type: string
|
type: string
|
||||||
|
idle_timeout:
|
||||||
|
description: Idle timeout
|
||||||
|
type: integer
|
||||||
ignore_eos:
|
ignore_eos:
|
||||||
type: boolean
|
type: boolean
|
||||||
jinja:
|
jinja:
|
||||||
@@ -226,6 +229,9 @@ definitions:
|
|||||||
type: boolean
|
type: boolean
|
||||||
numa:
|
numa:
|
||||||
type: string
|
type: string
|
||||||
|
on_demand_start:
|
||||||
|
description: On demand start
|
||||||
|
type: boolean
|
||||||
override_kv:
|
override_kv:
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
@@ -266,8 +272,7 @@ definitions:
|
|||||||
type: number
|
type: number
|
||||||
reranking:
|
reranking:
|
||||||
type: boolean
|
type: boolean
|
||||||
restart_delay_seconds:
|
restart_delay:
|
||||||
description: RestartDelay duration in seconds
|
|
||||||
type: integer
|
type: integer
|
||||||
rope_freq_base:
|
rope_freq_base:
|
||||||
type: number
|
type: number
|
||||||
@@ -344,6 +349,16 @@ definitions:
|
|||||||
yarn_orig_ctx:
|
yarn_orig_ctx:
|
||||||
type: integer
|
type: integer
|
||||||
type: object
|
type: object
|
||||||
|
instance.InstanceStatus:
|
||||||
|
enum:
|
||||||
|
- 0
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
type: integer
|
||||||
|
x-enum-varnames:
|
||||||
|
- Stopped
|
||||||
|
- Running
|
||||||
|
- Failed
|
||||||
instance.Process:
|
instance.Process:
|
||||||
properties:
|
properties:
|
||||||
created:
|
created:
|
||||||
@@ -351,9 +366,10 @@ definitions:
|
|||||||
type: integer
|
type: integer
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
running:
|
status:
|
||||||
|
allOf:
|
||||||
|
- $ref: '#/definitions/instance.InstanceStatus'
|
||||||
description: Status
|
description: Status
|
||||||
type: boolean
|
|
||||||
type: object
|
type: object
|
||||||
server.OpenAIInstance:
|
server.OpenAIInstance:
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
4
docs-requirements.txt
Normal file
4
docs-requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
mkdocs-material==9.5.3
|
||||||
|
mkdocs==1.5.3
|
||||||
|
pymdown-extensions==10.7
|
||||||
|
mkdocs-git-revision-date-localized-plugin==1.2.4
|
||||||
150
docs/getting-started/configuration.md
Normal file
150
docs/getting-started/configuration.md
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
# Configuration
|
||||||
|
|
||||||
|
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
|
||||||
|
|
||||||
|
```
|
||||||
|
Defaults < Configuration file < Environment variables
|
||||||
|
```
|
||||||
|
|
||||||
|
llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
|
||||||
|
|
||||||
|
## Default Configuration
|
||||||
|
|
||||||
|
Here's the default configuration with all available options:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
host: "0.0.0.0" # Server host to bind to
|
||||||
|
port: 8080 # Server port to bind to
|
||||||
|
allowed_origins: ["*"] # Allowed CORS origins (default: all)
|
||||||
|
enable_swagger: false # Enable Swagger UI for API docs
|
||||||
|
|
||||||
|
instances:
|
||||||
|
port_range: [8000, 9000] # Port range for instances
|
||||||
|
data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
|
||||||
|
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
|
||||||
|
logs_dir: ~/.local/share/llamactl/logs # Logs directory
|
||||||
|
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
|
||||||
|
max_instances: -1 # Max instances (-1 = unlimited)
|
||||||
|
max_running_instances: -1 # Max running instances (-1 = unlimited)
|
||||||
|
enable_lru_eviction: true # Enable LRU eviction for idle instances
|
||||||
|
llama_executable: llama-server # Path to llama-server executable
|
||||||
|
default_auto_restart: true # Auto-restart new instances by default
|
||||||
|
default_max_restarts: 3 # Max restarts for new instances
|
||||||
|
default_restart_delay: 5 # Restart delay (seconds) for new instances
|
||||||
|
default_on_demand_start: true # Default on-demand start setting
|
||||||
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
|
timeout_check_interval: 5 # Idle instance timeout check in minutes
|
||||||
|
|
||||||
|
auth:
|
||||||
|
require_inference_auth: true # Require auth for inference endpoints
|
||||||
|
inference_keys: [] # Keys for inference endpoints
|
||||||
|
require_management_auth: true # Require auth for management endpoints
|
||||||
|
management_keys: [] # Keys for management endpoints
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Files
|
||||||
|
|
||||||
|
### Configuration File Locations
|
||||||
|
|
||||||
|
Configuration files are searched in the following locations (in order of precedence):
|
||||||
|
|
||||||
|
**Linux:**
|
||||||
|
- `./llamactl.yaml` or `./config.yaml` (current directory)
|
||||||
|
- `$HOME/.config/llamactl/config.yaml`
|
||||||
|
- `/etc/llamactl/config.yaml`
|
||||||
|
|
||||||
|
**macOS:**
|
||||||
|
- `./llamactl.yaml` or `./config.yaml` (current directory)
|
||||||
|
- `$HOME/Library/Application Support/llamactl/config.yaml`
|
||||||
|
- `/Library/Application Support/llamactl/config.yaml`
|
||||||
|
|
||||||
|
**Windows:**
|
||||||
|
- `./llamactl.yaml` or `./config.yaml` (current directory)
|
||||||
|
- `%APPDATA%\llamactl\config.yaml`
|
||||||
|
- `%USERPROFILE%\llamactl\config.yaml`
|
||||||
|
- `%PROGRAMDATA%\llamactl\config.yaml`
|
||||||
|
|
||||||
|
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
|
||||||
|
|
||||||
|
## Configuration Options
|
||||||
|
|
||||||
|
### Server Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
|
||||||
|
port: 8080 # Server port to bind to (default: 8080)
|
||||||
|
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
|
||||||
|
enable_swagger: false # Enable Swagger UI (default: false)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
- `LLAMACTL_HOST` - Server host
|
||||||
|
- `LLAMACTL_PORT` - Server port
|
||||||
|
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
|
||||||
|
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
|
||||||
|
|
||||||
|
### Instance Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
instances:
|
||||||
|
port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
|
||||||
|
data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
|
||||||
|
configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
|
||||||
|
logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
|
||||||
|
auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
|
||||||
|
max_instances: -1 # Maximum instances (-1 = unlimited)
|
||||||
|
max_running_instances: -1 # Maximum running instances (-1 = unlimited)
|
||||||
|
enable_lru_eviction: true # Enable LRU eviction for idle instances
|
||||||
|
llama_executable: "llama-server" # Path to llama-server executable
|
||||||
|
default_auto_restart: true # Default auto-restart setting
|
||||||
|
default_max_restarts: 3 # Default maximum restart attempts
|
||||||
|
default_restart_delay: 5 # Default restart delay in seconds
|
||||||
|
default_on_demand_start: true # Default on-demand start setting
|
||||||
|
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
|
||||||
|
timeout_check_interval: 5 # Default instance timeout check interval in minutes
|
||||||
|
```
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
|
||||||
|
- `LLAMACTL_DATA_DIRECTORY` - Data directory path
|
||||||
|
- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
|
||||||
|
- `LLAMACTL_LOGS_DIR` - Log directory path
|
||||||
|
- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
|
||||||
|
- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
|
||||||
|
- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
|
||||||
|
- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
|
||||||
|
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
|
||||||
|
- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
|
||||||
|
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
|
||||||
|
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
|
||||||
|
- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
|
||||||
|
- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
|
||||||
|
- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
|
||||||
|
|
||||||
|
### Authentication Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
auth:
|
||||||
|
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
|
||||||
|
inference_keys: [] # List of valid inference API keys
|
||||||
|
require_management_auth: true # Require API key for management endpoints (default: true)
|
||||||
|
management_keys: [] # List of valid management API keys
|
||||||
|
```
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
|
||||||
|
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
|
||||||
|
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
||||||
|
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
||||||
|
|
||||||
|
## Command Line Options
|
||||||
|
|
||||||
|
View all available command line options:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llamactl --help
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also override configuration using command line flags when starting llamactl.
|
||||||
70
docs/getting-started/installation.md
Normal file
70
docs/getting-started/installation.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Installation
|
||||||
|
|
||||||
|
This guide will walk you through installing Llamactl on your system.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
|
||||||
|
|
||||||
|
|
||||||
|
**Quick install methods:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Homebrew (macOS/Linux)
|
||||||
|
brew install llama.cpp
|
||||||
|
# Winget (Windows)
|
||||||
|
winget install llama.cpp
|
||||||
|
```
|
||||||
|
|
||||||
|
Or build from source - see llama.cpp docs
|
||||||
|
|
||||||
|
## Installation Methods
|
||||||
|
|
||||||
|
### Option 1: Download Binary (Recommended)
|
||||||
|
|
||||||
|
Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Linux/macOS - Get latest version and download
|
||||||
|
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||||
|
curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
|
||||||
|
sudo mv llamactl /usr/local/bin/
|
||||||
|
|
||||||
|
# Or download manually from:
|
||||||
|
# https://github.com/lordmathis/llamactl/releases/latest
|
||||||
|
|
||||||
|
# Windows - Download from releases page
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Build from Source
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Go 1.24 or later
|
||||||
|
- Node.js 22 or later
|
||||||
|
- Git
|
||||||
|
|
||||||
|
If you prefer to build from source:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone the repository
|
||||||
|
git clone https://github.com/lordmathis/llamactl.git
|
||||||
|
cd llamactl
|
||||||
|
|
||||||
|
# Build the web UI
|
||||||
|
cd webui && npm ci && npm run build && cd ..
|
||||||
|
|
||||||
|
# Build the application
|
||||||
|
go build -o llamactl ./cmd/server
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
Verify your installation by checking the version:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llamactl --version
|
||||||
|
```
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
|
||||||
143
docs/getting-started/quick-start.md
Normal file
143
docs/getting-started/quick-start.md
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
# Quick Start
|
||||||
|
|
||||||
|
This guide will help you get Llamactl up and running in just a few minutes.
|
||||||
|
|
||||||
|
## Step 1: Start Llamactl
|
||||||
|
|
||||||
|
Start the Llamactl server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
llamactl
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, Llamactl will start on `http://localhost:8080`.
|
||||||
|
|
||||||
|
## Step 2: Access the Web UI
|
||||||
|
|
||||||
|
Open your web browser and navigate to:
|
||||||
|
|
||||||
|
```
|
||||||
|
http://localhost:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
|
||||||
|
|
||||||
|
You should see the Llamactl web interface.
|
||||||
|
|
||||||
|
## Step 3: Create Your First Instance
|
||||||
|
|
||||||
|
1. Click the "Add Instance" button
|
||||||
|
2. Fill in the instance configuration:
|
||||||
|
- **Name**: Give your instance a descriptive name
|
||||||
|
- **Model Path**: Path to your Llama.cpp model file
|
||||||
|
- **Additional Options**: Any extra Llama.cpp parameters
|
||||||
|
|
||||||
|
3. Click "Create Instance"
|
||||||
|
|
||||||
|
## Step 4: Start Your Instance
|
||||||
|
|
||||||
|
Once created, you can:
|
||||||
|
|
||||||
|
- **Start** the instance by clicking the start button
|
||||||
|
- **Monitor** its status in real-time
|
||||||
|
- **View logs** by clicking the logs button
|
||||||
|
- **Stop** the instance when needed
|
||||||
|
|
||||||
|
## Example Configuration
|
||||||
|
|
||||||
|
Here's a basic example configuration for a Llama 2 model:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"model_path": "/path/to/llama-2-7b-chat.gguf",
|
||||||
|
"options": {
|
||||||
|
"threads": 4,
|
||||||
|
"context_size": 2048
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using the API
|
||||||
|
|
||||||
|
You can also manage instances via the REST API:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List all instances
|
||||||
|
curl http://localhost:8080/api/instances
|
||||||
|
|
||||||
|
# Create a new instance
|
||||||
|
curl -X POST http://localhost:8080/api/instances \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"name": "my-model",
|
||||||
|
"model_path": "/path/to/model.gguf",
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Start an instance
|
||||||
|
curl -X POST http://localhost:8080/api/instances/my-model/start
|
||||||
|
```
|
||||||
|
|
||||||
|
## OpenAI Compatible API
|
||||||
|
|
||||||
|
Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
|
||||||
|
|
||||||
|
### Chat Completions
|
||||||
|
|
||||||
|
Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "my-model",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello! Can you help me write a Python function?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 150,
|
||||||
|
"temperature": 0.7
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using with Python OpenAI Client
|
||||||
|
|
||||||
|
You can also use the official OpenAI Python client:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Point the client to your Llamactl server
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:8080/v1",
|
||||||
|
api_key="not-needed" # Llamactl doesn't require API keys by default
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a chat completion
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="my-model", # Use the name of your instance
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Explain quantum computing in simple terms"}
|
||||||
|
],
|
||||||
|
max_tokens=200,
|
||||||
|
temperature=0.7
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
### List Available Models
|
||||||
|
|
||||||
|
Get a list of running instances (models) in OpenAI-compatible format:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/v1/models
|
||||||
|
```
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
- Manage instances [Managing Instances](../user-guide/managing-instances.md)
|
||||||
|
- Explore the [API Reference](../user-guide/api-reference.md)
|
||||||
|
- Configure advanced settings in the [Configuration](configuration.md) guide
|
||||||
BIN
docs/images/create_instance.png
Normal file
BIN
docs/images/create_instance.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 69 KiB |
BIN
docs/images/dashboard.png
Normal file
BIN
docs/images/dashboard.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 44 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 47 KiB |
41
docs/index.md
Normal file
41
docs/index.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# Llamactl Documentation
|
||||||
|
|
||||||
|
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## What is Llamactl?
|
||||||
|
|
||||||
|
Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
|
||||||
|
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
|
||||||
|
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||||
|
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
||||||
|
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
||||||
|
⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
||||||
|
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
|
||||||
|
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
||||||
|
|
||||||
|
## Quick Links
|
||||||
|
|
||||||
|
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
|
||||||
|
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
|
||||||
|
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
|
||||||
|
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
|
||||||
|
- [API Reference](user-guide/api-reference.md) - Complete API documentation
|
||||||
|
|
||||||
|
|
||||||
|
## Getting Help
|
||||||
|
|
||||||
|
If you need help or have questions:
|
||||||
|
|
||||||
|
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
|
||||||
|
- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
|
||||||
|
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.
|
||||||
412
docs/user-guide/api-reference.md
Normal file
412
docs/user-guide/api-reference.md
Normal file
@@ -0,0 +1,412 @@
|
|||||||
|
# API Reference
|
||||||
|
|
||||||
|
Complete reference for the Llamactl REST API.
|
||||||
|
|
||||||
|
## Base URL
|
||||||
|
|
||||||
|
All API endpoints are relative to the base URL:
|
||||||
|
|
||||||
|
```
|
||||||
|
http://localhost:8080/api/v1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -H "Authorization: Bearer <your-api-key>" \
|
||||||
|
http://localhost:8080/api/v1/instances
|
||||||
|
```
|
||||||
|
|
||||||
|
The server supports two types of API keys:
|
||||||
|
- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
|
||||||
|
- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
|
||||||
|
|
||||||
|
## System Endpoints
|
||||||
|
|
||||||
|
### Get Llamactl Version
|
||||||
|
|
||||||
|
Get the version information of the llamactl server.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/version
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```
|
||||||
|
Version: 1.0.0
|
||||||
|
Commit: abc123
|
||||||
|
Build Time: 2024-01-15T10:00:00Z
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Llama Server Help
|
||||||
|
|
||||||
|
Get help text for the llama-server command.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/server/help
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:** Plain text help output from `llama-server --help`
|
||||||
|
|
||||||
|
### Get Llama Server Version
|
||||||
|
|
||||||
|
Get version information of the llama-server binary.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/server/version
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:** Plain text version output from `llama-server --version`
|
||||||
|
|
||||||
|
### List Available Devices
|
||||||
|
|
||||||
|
List available devices for llama-server.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/server/devices
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:** Plain text device list from `llama-server --list-devices`
|
||||||
|
|
||||||
|
## Instances
|
||||||
|
|
||||||
|
### List All Instances
|
||||||
|
|
||||||
|
Get a list of all instances.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/instances
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "running",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Instance Details
|
||||||
|
|
||||||
|
Get detailed information about a specific instance.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/instances/{name}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "running",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Create Instance
|
||||||
|
|
||||||
|
Create and start a new instance.
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/instances/{name}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "running",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update Instance
|
||||||
|
|
||||||
|
Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
|
||||||
|
|
||||||
|
```http
|
||||||
|
PUT /api/v1/instances/{name}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request Body:** JSON object with configuration fields to update.
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "running",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Delete Instance
|
||||||
|
|
||||||
|
Stop and remove an instance.
|
||||||
|
|
||||||
|
```http
|
||||||
|
DELETE /api/v1/instances/{name}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:** `204 No Content`
|
||||||
|
|
||||||
|
## Instance Operations
|
||||||
|
|
||||||
|
### Start Instance
|
||||||
|
|
||||||
|
Start a stopped instance.
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/instances/{name}/start
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "starting",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Error Responses:**
|
||||||
|
- `409 Conflict`: Maximum number of running instances reached
|
||||||
|
- `500 Internal Server Error`: Failed to start instance
|
||||||
|
|
||||||
|
### Stop Instance
|
||||||
|
|
||||||
|
Stop a running instance.
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/instances/{name}/stop
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "stopping",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restart Instance
|
||||||
|
|
||||||
|
Restart an instance (stop then start).
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /api/v1/instances/{name}/restart
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "llama2-7b",
|
||||||
|
"status": "restarting",
|
||||||
|
"created": 1705312200
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get Instance Logs
|
||||||
|
|
||||||
|
Retrieve instance logs.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/instances/{name}/logs
|
||||||
|
```
|
||||||
|
|
||||||
|
**Query Parameters:**
|
||||||
|
- `lines`: Number of lines to return (default: all lines, use -1 for all)
|
||||||
|
|
||||||
|
**Response:** Plain text log output
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```bash
|
||||||
|
curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Proxy to Instance
|
||||||
|
|
||||||
|
Proxy HTTP requests directly to the llama-server instance.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /api/v1/instances/{name}/proxy/*
|
||||||
|
POST /api/v1/instances/{name}/proxy/*
|
||||||
|
```
|
||||||
|
|
||||||
|
This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
|
||||||
|
|
||||||
|
**Example - Check Instance Health:**
|
||||||
|
```bash
|
||||||
|
curl -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances/my-model/proxy/health
|
||||||
|
```
|
||||||
|
|
||||||
|
This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
|
||||||
|
|
||||||
|
**Error Responses:**
|
||||||
|
- `503 Service Unavailable`: Instance is not running
|
||||||
|
|
||||||
|
## OpenAI-Compatible API
|
||||||
|
|
||||||
|
Llamactl provides OpenAI-compatible endpoints for inference operations.
|
||||||
|
|
||||||
|
### List Models
|
||||||
|
|
||||||
|
List all instances in OpenAI-compatible format.
|
||||||
|
|
||||||
|
```http
|
||||||
|
GET /v1/models
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": "llama2-7b",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1705312200,
|
||||||
|
"owned_by": "llamactl"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chat Completions, Completions, Embeddings
|
||||||
|
|
||||||
|
All OpenAI-compatible inference endpoints are available:
|
||||||
|
|
||||||
|
```http
|
||||||
|
POST /v1/chat/completions
|
||||||
|
POST /v1/completions
|
||||||
|
POST /v1/embeddings
|
||||||
|
POST /v1/rerank
|
||||||
|
POST /v1/reranking
|
||||||
|
```
|
||||||
|
|
||||||
|
**Request Body:** Standard OpenAI format with `model` field specifying the instance name
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "llama2-7b",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, how are you?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
|
||||||
|
|
||||||
|
**Error Responses:**
|
||||||
|
- `400 Bad Request`: Invalid request body or missing model name
|
||||||
|
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
|
||||||
|
- `409 Conflict`: Cannot start instance due to maximum instances limit
|
||||||
|
|
||||||
|
## Instance Status Values
|
||||||
|
|
||||||
|
Instances can have the following status values:
|
||||||
|
- `stopped`: Instance is not running
|
||||||
|
- `running`: Instance is running and ready to accept requests
|
||||||
|
- `failed`: Instance failed to start or crashed
|
||||||
|
|
||||||
|
## Error Responses
|
||||||
|
|
||||||
|
All endpoints may return error responses in the following format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"error": "Error message description"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common HTTP Status Codes
|
||||||
|
|
||||||
|
- `200`: Success
|
||||||
|
- `201`: Created
|
||||||
|
- `204`: No Content (successful deletion)
|
||||||
|
- `400`: Bad Request (invalid parameters or request body)
|
||||||
|
- `401`: Unauthorized (missing or invalid API key)
|
||||||
|
- `403`: Forbidden (insufficient permissions)
|
||||||
|
- `404`: Not Found (instance not found)
|
||||||
|
- `409`: Conflict (instance already exists, max instances reached)
|
||||||
|
- `500`: Internal Server Error
|
||||||
|
- `503`: Service Unavailable (instance not running)
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Complete Instance Lifecycle
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create and start instance
|
||||||
|
curl -X POST http://localhost:8080/api/v1/instances/my-model \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-api-key" \
|
||||||
|
-d '{
|
||||||
|
"model": "/models/llama-2-7b.gguf"
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Check instance status
|
||||||
|
curl -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances/my-model
|
||||||
|
|
||||||
|
# Get instance logs
|
||||||
|
curl -H "Authorization: Bearer your-api-key" \
|
||||||
|
"http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
|
||||||
|
|
||||||
|
# Use OpenAI-compatible chat completions
|
||||||
|
curl -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-inference-api-key" \
|
||||||
|
-d '{
|
||||||
|
"model": "my-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello!"}
|
||||||
|
],
|
||||||
|
"max_tokens": 100
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Stop instance
|
||||||
|
curl -X POST -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances/my-model/stop
|
||||||
|
|
||||||
|
# Delete instance
|
||||||
|
curl -X DELETE -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances/my-model
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using the Proxy Endpoint
|
||||||
|
|
||||||
|
You can also directly proxy requests to the llama-server instance:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Direct proxy to instance (bypasses OpenAI compatibility layer)
|
||||||
|
curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-api-key" \
|
||||||
|
-d '{
|
||||||
|
"prompt": "Hello, world!",
|
||||||
|
"n_predict": 50
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Swagger Documentation
|
||||||
|
|
||||||
|
If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
|
||||||
|
|
||||||
|
```
|
||||||
|
http://localhost:8080/swagger/
|
||||||
|
```
|
||||||
|
|
||||||
|
This provides a complete interactive interface for testing all API endpoints.
|
||||||
190
docs/user-guide/managing-instances.md
Normal file
190
docs/user-guide/managing-instances.md
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# Managing Instances
|
||||||
|
|
||||||
|
Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Llamactl provides two ways to manage instances:
|
||||||
|
|
||||||
|
- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
|
||||||
|
- **REST API**: Programmatic access for automation and integration
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
|
||||||
|
If authentication is enabled:
|
||||||
|
1. Navigate to the web UI
|
||||||
|
2. Enter your credentials
|
||||||
|
3. Bearer token is stored for the session
|
||||||
|
|
||||||
|
### Theme Support
|
||||||
|
|
||||||
|
- Switch between light and dark themes
|
||||||
|
- Setting is remembered across sessions
|
||||||
|
|
||||||
|
## Instance Cards
|
||||||
|
|
||||||
|
Each instance is displayed as a card showing:
|
||||||
|
|
||||||
|
- **Instance name**
|
||||||
|
- **Health status badge** (unknown, ready, error, failed)
|
||||||
|
- **Action buttons** (start, stop, edit, logs, delete)
|
||||||
|
|
||||||
|
## Create Instance
|
||||||
|
|
||||||
|
### Via Web UI
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
1. Click the **"Create Instance"** button on the dashboard
|
||||||
|
2. Enter a unique **Name** for your instance (only required field)
|
||||||
|
3. Configure model source (choose one):
|
||||||
|
- **Model Path**: Full path to your downloaded GGUF model file
|
||||||
|
- **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
|
||||||
|
- **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
|
||||||
|
4. Configure optional instance management settings:
|
||||||
|
- **Auto Restart**: Automatically restart instance on failure
|
||||||
|
- **Max Restarts**: Maximum number of restart attempts
|
||||||
|
- **Restart Delay**: Delay in seconds between restart attempts
|
||||||
|
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
||||||
|
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
||||||
|
5. Configure optional llama-server backend options:
|
||||||
|
- **Threads**: Number of CPU threads to use
|
||||||
|
- **Context Size**: Context window size (ctx_size)
|
||||||
|
- **GPU Layers**: Number of layers to offload to GPU
|
||||||
|
- **Port**: Network port (auto-assigned by llamactl if not specified)
|
||||||
|
- **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
|
||||||
|
6. Click **"Create"** to save the instance
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create instance with local model file
|
||||||
|
curl -X POST http://localhost:8080/api/instances/my-instance \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/path/to/model.gguf",
|
||||||
|
"threads": 8,
|
||||||
|
"ctx_size": 4096
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Create instance with HuggingFace model
|
||||||
|
curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"hf_repo": "unsloth/gemma-3-27b-it-GGUF",
|
||||||
|
"hf_file": "gemma-3-27b-it-GGUF.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"auto_restart": true,
|
||||||
|
"max_restarts": 3
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Start Instance
|
||||||
|
|
||||||
|
### Via Web UI
|
||||||
|
1. Click the **"Start"** button on an instance card
|
||||||
|
2. Watch the status change to "Unknown"
|
||||||
|
3. Monitor progress in the logs
|
||||||
|
4. Instance status changes to "Ready" when ready
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8080/api/instances/{name}/start
|
||||||
|
```
|
||||||
|
|
||||||
|
## Stop Instance
|
||||||
|
|
||||||
|
### Via Web UI
|
||||||
|
1. Click the **"Stop"** button on an instance card
|
||||||
|
2. Instance gracefully shuts down
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8080/api/instances/{name}/stop
|
||||||
|
```
|
||||||
|
|
||||||
|
## Edit Instance
|
||||||
|
|
||||||
|
### Via Web UI
|
||||||
|
1. Click the **"Edit"** button on an instance card
|
||||||
|
2. Modify settings in the configuration dialog
|
||||||
|
3. Changes require instance restart to take effect
|
||||||
|
4. Click **"Update & Restart"** to apply changes
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
Modify instance settings:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X PUT http://localhost:8080/api/instances/{name} \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"backend_options": {
|
||||||
|
"threads": 8,
|
||||||
|
"context_size": 4096
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
!!! note
|
||||||
|
Configuration changes require restarting the instance to take effect.
|
||||||
|
|
||||||
|
|
||||||
|
## View Logs
|
||||||
|
|
||||||
|
### Via Web UI
|
||||||
|
|
||||||
|
1. Click the **"Logs"** button on any instance card
|
||||||
|
2. Real-time log viewer opens
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
Check instance status in real-time:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get instance details
|
||||||
|
curl http://localhost:8080/api/instances/{name}/logs
|
||||||
|
```
|
||||||
|
|
||||||
|
## Delete Instance
|
||||||
|
|
||||||
|
### Via Web UI
|
||||||
|
1. Click the **"Delete"** button on an instance card
|
||||||
|
2. Only stopped instances can be deleted
|
||||||
|
3. Confirm deletion in the dialog
|
||||||
|
|
||||||
|
### Via API
|
||||||
|
```bash
|
||||||
|
curl -X DELETE http://localhost:8080/api/instances/{name}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Instance Proxy
|
||||||
|
|
||||||
|
Llamactl proxies all requests to the underlying llama-server instances.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get instance details
|
||||||
|
curl http://localhost:8080/api/instances/{name}/proxy/
|
||||||
|
```
|
||||||
|
|
||||||
|
Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
|
||||||
|
|
||||||
|
### Instance Health
|
||||||
|
|
||||||
|
#### Via Web UI
|
||||||
|
|
||||||
|
1. The health status badge is displayed on each instance card
|
||||||
|
|
||||||
|
#### Via API
|
||||||
|
|
||||||
|
Check the health status of your instances:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8080/api/instances/{name}/proxy/health
|
||||||
|
```
|
||||||
160
docs/user-guide/troubleshooting.md
Normal file
160
docs/user-guide/troubleshooting.md
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
# Troubleshooting
|
||||||
|
|
||||||
|
Issues specific to Llamactl deployment and operation.
|
||||||
|
|
||||||
|
## Configuration Issues
|
||||||
|
|
||||||
|
### Invalid Configuration
|
||||||
|
|
||||||
|
**Problem:** Invalid configuration preventing startup
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. Use minimal configuration:
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
host: "0.0.0.0"
|
||||||
|
port: 8080
|
||||||
|
instances:
|
||||||
|
port_range: [8000, 9000]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Check data directory permissions:
|
||||||
|
```bash
|
||||||
|
# Ensure data directory is writable (default: ~/.local/share/llamactl)
|
||||||
|
mkdir -p ~/.local/share/llamactl/{instances,logs}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Instance Management Issues
|
||||||
|
|
||||||
|
### Model Loading Failures
|
||||||
|
|
||||||
|
**Problem:** Instance fails to start with model loading errors
|
||||||
|
|
||||||
|
**Common Solutions:**
|
||||||
|
- **llama-server not found:** Ensure `llama-server` binary is in PATH
|
||||||
|
- **Wrong model format:** Ensure model is in GGUF format
|
||||||
|
- **Insufficient memory:** Use smaller model or reduce context size
|
||||||
|
- **Path issues:** Use absolute paths to model files
|
||||||
|
|
||||||
|
### Memory Issues
|
||||||
|
|
||||||
|
**Problem:** Out of memory errors or system becomes unresponsive
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. **Reduce context size:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"n_ctx": 1024
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use quantized models:**
|
||||||
|
- Try Q4_K_M instead of higher precision models
|
||||||
|
- Use smaller model variants (7B instead of 13B)
|
||||||
|
|
||||||
|
### GPU Configuration
|
||||||
|
|
||||||
|
**Problem:** GPU not being used effectively
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. **Configure GPU layers:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"n_gpu_layers": 35
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Advanced Instance Issues
|
||||||
|
|
||||||
|
**Problem:** Complex model loading, performance, or compatibility issues
|
||||||
|
|
||||||
|
Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
|
||||||
|
|
||||||
|
**Resources:**
|
||||||
|
- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
|
||||||
|
- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
|
||||||
|
- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
|
||||||
|
|
||||||
|
**Testing directly with llama-server:**
|
||||||
|
```bash
|
||||||
|
# Test your model and parameters directly with llama-server
|
||||||
|
llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
|
||||||
|
```
|
||||||
|
|
||||||
|
This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
|
||||||
|
|
||||||
|
## API and Network Issues
|
||||||
|
|
||||||
|
### CORS Errors
|
||||||
|
|
||||||
|
**Problem:** Web UI shows CORS errors in browser console
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. **Configure allowed origins:**
|
||||||
|
```yaml
|
||||||
|
server:
|
||||||
|
allowed_origins:
|
||||||
|
- "http://localhost:3000"
|
||||||
|
- "https://yourdomain.com"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication Issues
|
||||||
|
|
||||||
|
**Problem:** API requests failing with authentication errors
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. **Disable authentication temporarily:**
|
||||||
|
```yaml
|
||||||
|
auth:
|
||||||
|
require_management_auth: false
|
||||||
|
require_inference_auth: false
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Configure API keys:**
|
||||||
|
```yaml
|
||||||
|
auth:
|
||||||
|
management_keys:
|
||||||
|
- "your-management-key"
|
||||||
|
inference_keys:
|
||||||
|
- "your-inference-key"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use correct Authorization header:**
|
||||||
|
```bash
|
||||||
|
curl -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances
|
||||||
|
```
|
||||||
|
|
||||||
|
## Debugging and Logs
|
||||||
|
|
||||||
|
### Viewing Instance Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get instance logs via API
|
||||||
|
curl http://localhost:8080/api/v1/instances/{name}/logs
|
||||||
|
|
||||||
|
# Or check log files directly
|
||||||
|
tail -f ~/.local/share/llamactl/logs/{instance-name}.log
|
||||||
|
```
|
||||||
|
|
||||||
|
### Enable Debug Logging
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export LLAMACTL_LOG_LEVEL=debug
|
||||||
|
llamactl
|
||||||
|
```
|
||||||
|
|
||||||
|
## Getting Help
|
||||||
|
|
||||||
|
When reporting issues, include:
|
||||||
|
|
||||||
|
1. **System information:**
|
||||||
|
```bash
|
||||||
|
llamactl --version
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Configuration file** (remove sensitive keys)
|
||||||
|
|
||||||
|
3. **Relevant log output**
|
||||||
|
|
||||||
|
4. **Steps to reproduce the issue**
|
||||||
68
mkdocs.yml
Normal file
68
mkdocs.yml
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
site_name: Llamatl Documentation
|
||||||
|
site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
|
||||||
|
site_author: Llamatl Team
|
||||||
|
site_url: https://llamactl.org
|
||||||
|
|
||||||
|
repo_name: lordmathis/llamactl
|
||||||
|
repo_url: https://github.com/lordmathis/llamactl
|
||||||
|
|
||||||
|
theme:
|
||||||
|
name: material
|
||||||
|
palette:
|
||||||
|
# Palette toggle for light mode
|
||||||
|
- scheme: default
|
||||||
|
primary: indigo
|
||||||
|
accent: indigo
|
||||||
|
toggle:
|
||||||
|
icon: material/brightness-7
|
||||||
|
name: Switch to dark mode
|
||||||
|
# Palette toggle for dark mode
|
||||||
|
- scheme: slate
|
||||||
|
primary: indigo
|
||||||
|
accent: indigo
|
||||||
|
toggle:
|
||||||
|
icon: material/brightness-4
|
||||||
|
name: Switch to light mode
|
||||||
|
features:
|
||||||
|
- navigation.tabs
|
||||||
|
- navigation.sections
|
||||||
|
- navigation.expand
|
||||||
|
- navigation.top
|
||||||
|
- search.highlight
|
||||||
|
- search.share
|
||||||
|
- content.code.copy
|
||||||
|
|
||||||
|
markdown_extensions:
|
||||||
|
- pymdownx.highlight:
|
||||||
|
anchor_linenums: true
|
||||||
|
- pymdownx.inlinehilite
|
||||||
|
- pymdownx.snippets
|
||||||
|
- pymdownx.superfences
|
||||||
|
- admonition
|
||||||
|
- pymdownx.details
|
||||||
|
- pymdownx.tabbed:
|
||||||
|
alternate_style: true
|
||||||
|
- attr_list
|
||||||
|
- md_in_html
|
||||||
|
- toc:
|
||||||
|
permalink: true
|
||||||
|
|
||||||
|
nav:
|
||||||
|
- Home: index.md
|
||||||
|
- Getting Started:
|
||||||
|
- Installation: getting-started/installation.md
|
||||||
|
- Quick Start: getting-started/quick-start.md
|
||||||
|
- Configuration: getting-started/configuration.md
|
||||||
|
- User Guide:
|
||||||
|
- Managing Instances: user-guide/managing-instances.md
|
||||||
|
- API Reference: user-guide/api-reference.md
|
||||||
|
- Troubleshooting: user-guide/troubleshooting.md
|
||||||
|
|
||||||
|
plugins:
|
||||||
|
- search
|
||||||
|
- git-revision-date-localized
|
||||||
|
|
||||||
|
extra:
|
||||||
|
social:
|
||||||
|
- icon: fontawesome/brands/github
|
||||||
|
link: https://github.com/lordmathis/llamactl
|
||||||
Reference in New Issue
Block a user