diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..8df96b6
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,65 @@
+name: Build and Deploy Documentation
+
+on:
+ push:
+ branches: [ main ]
+ paths:
+ - 'docs/**'
+ - 'mkdocs.yml'
+ - 'docs-requirements.txt'
+ - '.github/workflows/docs.yml'
+ pull_request:
+ branches: [ main ]
+ paths:
+ - 'docs/**'
+ - 'mkdocs.yml'
+ - 'docs-requirements.txt'
+
+permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+concurrency:
+ group: "pages"
+ cancel-in-progress: false
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0 # Needed for git-revision-date-localized plugin
+
+ - name: Setup Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.11'
+
+ - name: Install dependencies
+ run: |
+ pip install -r docs-requirements.txt
+
+ - name: Build documentation
+ run: |
+ mkdocs build --strict
+
+ - name: Upload documentation artifact
+ if: github.ref == 'refs/heads/main'
+ uses: actions/upload-pages-artifact@v3
+ with:
+ path: ./site
+
+ deploy:
+ if: github.ref == 'refs/heads/main'
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-latest
+ needs: build
+ steps:
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v4
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 78c8613..1f4a50e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -129,6 +129,50 @@ Use this format for pull request titles:
- Use meaningful component and variable names
- Prefer functional components over class components
+## Documentation Development
+
+This project uses MkDocs for documentation. When working on documentation:
+
+### Setup Documentation Environment
+
+```bash
+# Install documentation dependencies
+pip install -r docs-requirements.txt
+```
+
+### Development Workflow
+
+```bash
+# Serve documentation locally for development
+mkdocs serve
+```
+The documentation will be available at http://localhost:8000
+
+```bash
+# Build static documentation site
+mkdocs build
+```
+The built site will be in the `site/` directory.
+
+### Documentation Structure
+
+- `docs/` - Documentation content (Markdown files)
+- `mkdocs.yml` - MkDocs configuration
+- `docs-requirements.txt` - Python dependencies for documentation
+
+### Adding New Documentation
+
+When adding new documentation:
+
+1. Create Markdown files in the appropriate `docs/` subdirectory
+2. Update the navigation in `mkdocs.yml`
+3. Test locally with `mkdocs serve`
+4. Submit a pull request
+
+### Documentation Deployment
+
+Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
+
## Getting Help
- Check existing [issues](https://github.com/lordmathis/llamactl/issues)
diff --git a/README.md b/README.md
index d9edfd5..a2a1e48 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
-
+
**Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances
**Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations
@@ -123,7 +123,6 @@ instances:
on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
timeout_check_interval: 5 # Idle instance timeout check in minutes
-
auth:
require_inference_auth: true # Require auth for inference endpoints
inference_keys: [] # Keys for inference endpoints
@@ -131,107 +130,7 @@ auth:
management_keys: [] # Keys for management endpoints
```
-Full Configuration Guide
-
-llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
-
-```
-Defaults < Configuration file < Environment variables
-```
-
-### Configuration Files
-
-#### Configuration File Locations
-
-Configuration files are searched in the following locations (in order of precedence):
-
-**Linux/macOS:**
-- `./llamactl.yaml` or `./config.yaml` (current directory)
-- `$HOME/.config/llamactl/config.yaml`
-- `/etc/llamactl/config.yaml`
-
-**Windows:**
-- `./llamactl.yaml` or `./config.yaml` (current directory)
-- `%APPDATA%\llamactl\config.yaml`
-- `%USERPROFILE%\llamactl\config.yaml`
-- `%PROGRAMDATA%\llamactl\config.yaml`
-
-You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
-
-### Configuration Options
-
-#### Server Configuration
-
-```yaml
-server:
- host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
- port: 8080 # Server port to bind to (default: 8080)
- allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
- enable_swagger: false # Enable Swagger UI (default: false)
-```
-
-**Environment Variables:**
-- `LLAMACTL_HOST` - Server host
-- `LLAMACTL_PORT` - Server port
-- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
-- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
-
-#### Instance Configuration
-
-```yaml
-instances:
- port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
- data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
- configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
- logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
- auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
- max_instances: -1 # Maximum instances (-1 = unlimited)
- max_running_instances: -1 # Maximum running instances (-1 = unlimited)
- enable_lru_eviction: true # Enable LRU eviction for idle instances
- llama_executable: "llama-server" # Path to llama-server executable
- default_auto_restart: true # Default auto-restart setting
- default_max_restarts: 3 # Default maximum restart attempts
- default_restart_delay: 5 # Default restart delay in seconds
- default_on_demand_start: true # Default on-demand start setting
- on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
- timeout_check_interval: 5 # Default instance timeout check interval in minutes
-```
-
-**Environment Variables:**
-- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
-- `LLAMACTL_DATA_DIRECTORY` - Data directory path
-- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
-- `LLAMACTL_LOGS_DIR` - Log directory path
-- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
-- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
-- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
-- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
-- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
-- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
-- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
-- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
-- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
-- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
-- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
-
-
-#### Authentication Configuration
-
-```yaml
-auth:
- require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
- inference_keys: [] # List of valid inference API keys
- require_management_auth: true # Require API key for management endpoints (default: true)
- management_keys: [] # List of valid management API keys
-```
-
-**Environment Variables:**
-- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
-- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
-- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
-- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
-
-
+For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
## License
diff --git a/apidocs/docs.go b/apidocs/docs.go
index 78bd3c0..7ea502e 100644
--- a/apidocs/docs.go
+++ b/apidocs/docs.go
@@ -884,6 +884,10 @@ const docTemplate = `{
"host": {
"type": "string"
},
+ "idle_timeout": {
+ "description": "Idle timeout",
+ "type": "integer"
+ },
"ignore_eos": {
"type": "boolean"
},
@@ -1018,6 +1022,10 @@ const docTemplate = `{
"numa": {
"type": "string"
},
+ "on_demand_start": {
+ "description": "On demand start",
+ "type": "boolean"
+ },
"override_kv": {
"type": "array",
"items": {
@@ -1078,8 +1086,7 @@ const docTemplate = `{
"reranking": {
"type": "boolean"
},
- "restart_delay_seconds": {
- "description": "RestartDelay duration in seconds",
+ "restart_delay": {
"type": "integer"
},
"rope_freq_base": {
@@ -1194,6 +1201,19 @@ const docTemplate = `{
}
}
},
+ "instance.InstanceStatus": {
+ "type": "integer",
+ "enum": [
+ 0,
+ 1,
+ 2
+ ],
+ "x-enum-varnames": [
+ "Stopped",
+ "Running",
+ "Failed"
+ ]
+ },
"instance.Process": {
"type": "object",
"properties": {
@@ -1204,9 +1224,13 @@ const docTemplate = `{
"name": {
"type": "string"
},
- "running": {
+ "status": {
"description": "Status",
- "type": "boolean"
+ "allOf": [
+ {
+ "$ref": "#/definitions/instance.InstanceStatus"
+ }
+ ]
}
}
},
diff --git a/apidocs/swagger.json b/apidocs/swagger.json
index 95493f1..be8d193 100644
--- a/apidocs/swagger.json
+++ b/apidocs/swagger.json
@@ -877,6 +877,10 @@
"host": {
"type": "string"
},
+ "idle_timeout": {
+ "description": "Idle timeout",
+ "type": "integer"
+ },
"ignore_eos": {
"type": "boolean"
},
@@ -1011,6 +1015,10 @@
"numa": {
"type": "string"
},
+ "on_demand_start": {
+ "description": "On demand start",
+ "type": "boolean"
+ },
"override_kv": {
"type": "array",
"items": {
@@ -1071,8 +1079,7 @@
"reranking": {
"type": "boolean"
},
- "restart_delay_seconds": {
- "description": "RestartDelay duration in seconds",
+ "restart_delay": {
"type": "integer"
},
"rope_freq_base": {
@@ -1187,6 +1194,19 @@
}
}
},
+ "instance.InstanceStatus": {
+ "type": "integer",
+ "enum": [
+ 0,
+ 1,
+ 2
+ ],
+ "x-enum-varnames": [
+ "Stopped",
+ "Running",
+ "Failed"
+ ]
+ },
"instance.Process": {
"type": "object",
"properties": {
@@ -1197,9 +1217,13 @@
"name": {
"type": "string"
},
- "running": {
+ "status": {
"description": "Status",
- "type": "boolean"
+ "allOf": [
+ {
+ "$ref": "#/definitions/instance.InstanceStatus"
+ }
+ ]
}
}
},
diff --git a/apidocs/swagger.yaml b/apidocs/swagger.yaml
index c32e7f5..bc6e4ec 100644
--- a/apidocs/swagger.yaml
+++ b/apidocs/swagger.yaml
@@ -136,6 +136,9 @@ definitions:
type: string
host:
type: string
+ idle_timeout:
+ description: Idle timeout
+ type: integer
ignore_eos:
type: boolean
jinja:
@@ -226,6 +229,9 @@ definitions:
type: boolean
numa:
type: string
+ on_demand_start:
+ description: On demand start
+ type: boolean
override_kv:
items:
type: string
@@ -266,8 +272,7 @@ definitions:
type: number
reranking:
type: boolean
- restart_delay_seconds:
- description: RestartDelay duration in seconds
+ restart_delay:
type: integer
rope_freq_base:
type: number
@@ -344,6 +349,16 @@ definitions:
yarn_orig_ctx:
type: integer
type: object
+ instance.InstanceStatus:
+ enum:
+ - 0
+ - 1
+ - 2
+ type: integer
+ x-enum-varnames:
+ - Stopped
+ - Running
+ - Failed
instance.Process:
properties:
created:
@@ -351,9 +366,10 @@ definitions:
type: integer
name:
type: string
- running:
+ status:
+ allOf:
+ - $ref: '#/definitions/instance.InstanceStatus'
description: Status
- type: boolean
type: object
server.OpenAIInstance:
properties:
diff --git a/docs-requirements.txt b/docs-requirements.txt
new file mode 100644
index 0000000..256e652
--- /dev/null
+++ b/docs-requirements.txt
@@ -0,0 +1,4 @@
+mkdocs-material==9.5.3
+mkdocs==1.5.3
+pymdown-extensions==10.7
+mkdocs-git-revision-date-localized-plugin==1.2.4
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
new file mode 100644
index 0000000..64b097a
--- /dev/null
+++ b/docs/getting-started/configuration.md
@@ -0,0 +1,150 @@
+# Configuration
+
+llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
+
+```
+Defaults < Configuration file < Environment variables
+```
+
+llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
+
+## Default Configuration
+
+Here's the default configuration with all available options:
+
+```yaml
+server:
+ host: "0.0.0.0" # Server host to bind to
+ port: 8080 # Server port to bind to
+ allowed_origins: ["*"] # Allowed CORS origins (default: all)
+ enable_swagger: false # Enable Swagger UI for API docs
+
+instances:
+ port_range: [8000, 9000] # Port range for instances
+ data_dir: ~/.local/share/llamactl # Data directory (platform-specific, see below)
+ configs_dir: ~/.local/share/llamactl/instances # Instance configs directory
+ logs_dir: ~/.local/share/llamactl/logs # Logs directory
+ auto_create_dirs: true # Auto-create data/config/logs dirs if missing
+ max_instances: -1 # Max instances (-1 = unlimited)
+ max_running_instances: -1 # Max running instances (-1 = unlimited)
+ enable_lru_eviction: true # Enable LRU eviction for idle instances
+ llama_executable: llama-server # Path to llama-server executable
+ default_auto_restart: true # Auto-restart new instances by default
+ default_max_restarts: 3 # Max restarts for new instances
+ default_restart_delay: 5 # Restart delay (seconds) for new instances
+ default_on_demand_start: true # Default on-demand start setting
+ on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
+ timeout_check_interval: 5 # Idle instance timeout check in minutes
+
+auth:
+ require_inference_auth: true # Require auth for inference endpoints
+ inference_keys: [] # Keys for inference endpoints
+ require_management_auth: true # Require auth for management endpoints
+ management_keys: [] # Keys for management endpoints
+```
+
+## Configuration Files
+
+### Configuration File Locations
+
+Configuration files are searched in the following locations (in order of precedence):
+
+**Linux:**
+- `./llamactl.yaml` or `./config.yaml` (current directory)
+- `$HOME/.config/llamactl/config.yaml`
+- `/etc/llamactl/config.yaml`
+
+**macOS:**
+- `./llamactl.yaml` or `./config.yaml` (current directory)
+- `$HOME/Library/Application Support/llamactl/config.yaml`
+- `/Library/Application Support/llamactl/config.yaml`
+
+**Windows:**
+- `./llamactl.yaml` or `./config.yaml` (current directory)
+- `%APPDATA%\llamactl\config.yaml`
+- `%USERPROFILE%\llamactl\config.yaml`
+- `%PROGRAMDATA%\llamactl\config.yaml`
+
+You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
+
+## Configuration Options
+
+### Server Configuration
+
+```yaml
+server:
+ host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
+ port: 8080 # Server port to bind to (default: 8080)
+ allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
+ enable_swagger: false # Enable Swagger UI (default: false)
+```
+
+**Environment Variables:**
+- `LLAMACTL_HOST` - Server host
+- `LLAMACTL_PORT` - Server port
+- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
+- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
+
+### Instance Configuration
+
+```yaml
+instances:
+ port_range: [8000, 9000] # Port range for instances (default: [8000, 9000])
+ data_dir: "~/.local/share/llamactl" # Directory for all llamactl data (default varies by OS)
+ configs_dir: "~/.local/share/llamactl/instances" # Directory for instance configs (default: data_dir/instances)
+ logs_dir: "~/.local/share/llamactl/logs" # Directory for instance logs (default: data_dir/logs)
+ auto_create_dirs: true # Automatically create data/config/logs directories (default: true)
+ max_instances: -1 # Maximum instances (-1 = unlimited)
+ max_running_instances: -1 # Maximum running instances (-1 = unlimited)
+ enable_lru_eviction: true # Enable LRU eviction for idle instances
+ llama_executable: "llama-server" # Path to llama-server executable
+ default_auto_restart: true # Default auto-restart setting
+ default_max_restarts: 3 # Default maximum restart attempts
+ default_restart_delay: 5 # Default restart delay in seconds
+ default_on_demand_start: true # Default on-demand start setting
+ on_demand_start_timeout: 120 # Default on-demand start timeout in seconds
+ timeout_check_interval: 5 # Default instance timeout check interval in minutes
+```
+
+**Environment Variables:**
+- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
+- `LLAMACTL_DATA_DIRECTORY` - Data directory path
+- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
+- `LLAMACTL_LOGS_DIR` - Log directory path
+- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
+- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
+- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
+- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
+- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
+- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
+- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
+- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
+- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
+- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
+- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
+
+### Authentication Configuration
+
+```yaml
+auth:
+ require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
+ inference_keys: [] # List of valid inference API keys
+ require_management_auth: true # Require API key for management endpoints (default: true)
+ management_keys: [] # List of valid management API keys
+```
+
+**Environment Variables:**
+- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
+- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
+- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
+- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
+
+## Command Line Options
+
+View all available command line options:
+
+```bash
+llamactl --help
+```
+
+You can also override configuration using command line flags when starting llamactl.
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
new file mode 100644
index 0000000..90f78a8
--- /dev/null
+++ b/docs/getting-started/installation.md
@@ -0,0 +1,70 @@
+# Installation
+
+This guide will walk you through installing Llamactl on your system.
+
+## Prerequisites
+
+You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
+
+
+**Quick install methods:**
+
+```bash
+# Homebrew (macOS/Linux)
+brew install llama.cpp
+# Winget (Windows)
+winget install llama.cpp
+```
+
+Or build from source - see llama.cpp docs
+
+## Installation Methods
+
+### Option 1: Download Binary (Recommended)
+
+Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
+
+```bash
+# Linux/macOS - Get latest version and download
+LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
+curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
+sudo mv llamactl /usr/local/bin/
+
+# Or download manually from:
+# https://github.com/lordmathis/llamactl/releases/latest
+
+# Windows - Download from releases page
+```
+
+### Option 2: Build from Source
+
+Requirements:
+- Go 1.24 or later
+- Node.js 22 or later
+- Git
+
+If you prefer to build from source:
+
+```bash
+# Clone the repository
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+
+# Build the web UI
+cd webui && npm ci && npm run build && cd ..
+
+# Build the application
+go build -o llamactl ./cmd/server
+```
+
+## Verification
+
+Verify your installation by checking the version:
+
+```bash
+llamactl --version
+```
+
+## Next Steps
+
+Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md
new file mode 100644
index 0000000..4de1065
--- /dev/null
+++ b/docs/getting-started/quick-start.md
@@ -0,0 +1,143 @@
+# Quick Start
+
+This guide will help you get Llamactl up and running in just a few minutes.
+
+## Step 1: Start Llamactl
+
+Start the Llamactl server:
+
+```bash
+llamactl
+```
+
+By default, Llamactl will start on `http://localhost:8080`.
+
+## Step 2: Access the Web UI
+
+Open your web browser and navigate to:
+
+```
+http://localhost:8080
+```
+
+Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
+
+You should see the Llamactl web interface.
+
+## Step 3: Create Your First Instance
+
+1. Click the "Add Instance" button
+2. Fill in the instance configuration:
+ - **Name**: Give your instance a descriptive name
+ - **Model Path**: Path to your Llama.cpp model file
+ - **Additional Options**: Any extra Llama.cpp parameters
+
+3. Click "Create Instance"
+
+## Step 4: Start Your Instance
+
+Once created, you can:
+
+- **Start** the instance by clicking the start button
+- **Monitor** its status in real-time
+- **View logs** by clicking the logs button
+- **Stop** the instance when needed
+
+## Example Configuration
+
+Here's a basic example configuration for a Llama 2 model:
+
+```json
+{
+ "name": "llama2-7b",
+ "model_path": "/path/to/llama-2-7b-chat.gguf",
+ "options": {
+ "threads": 4,
+ "context_size": 2048
+ }
+}
+```
+
+## Using the API
+
+You can also manage instances via the REST API:
+
+```bash
+# List all instances
+curl http://localhost:8080/api/instances
+
+# Create a new instance
+curl -X POST http://localhost:8080/api/instances \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "my-model",
+ "model_path": "/path/to/model.gguf",
+ }'
+
+# Start an instance
+curl -X POST http://localhost:8080/api/instances/my-model/start
+```
+
+## OpenAI Compatible API
+
+Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
+
+### Chat Completions
+
+Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
+
+```bash
+curl -X POST http://localhost:8080/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "my-model",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello! Can you help me write a Python function?"
+ }
+ ],
+ "max_tokens": 150,
+ "temperature": 0.7
+ }'
+```
+
+### Using with Python OpenAI Client
+
+You can also use the official OpenAI Python client:
+
+```python
+from openai import OpenAI
+
+# Point the client to your Llamactl server
+client = OpenAI(
+ base_url="http://localhost:8080/v1",
+ api_key="not-needed" # Llamactl doesn't require API keys by default
+)
+
+# Create a chat completion
+response = client.chat.completions.create(
+ model="my-model", # Use the name of your instance
+ messages=[
+ {"role": "user", "content": "Explain quantum computing in simple terms"}
+ ],
+ max_tokens=200,
+ temperature=0.7
+)
+
+print(response.choices[0].message.content)
+```
+
+### List Available Models
+
+Get a list of running instances (models) in OpenAI-compatible format:
+
+```bash
+curl http://localhost:8080/v1/models
+```
+
+## Next Steps
+
+- Manage instances [Managing Instances](../user-guide/managing-instances.md)
+- Explore the [API Reference](../user-guide/api-reference.md)
+- Configure advanced settings in the [Configuration](configuration.md) guide
diff --git a/docs/images/create_instance.png b/docs/images/create_instance.png
new file mode 100644
index 0000000..c1ce856
Binary files /dev/null and b/docs/images/create_instance.png differ
diff --git a/docs/images/dashboard.png b/docs/images/dashboard.png
new file mode 100644
index 0000000..01cea2a
Binary files /dev/null and b/docs/images/dashboard.png differ
diff --git a/docs/images/screenshot.png b/docs/images/screenshot.png
deleted file mode 100644
index 1c77ed2..0000000
Binary files a/docs/images/screenshot.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..d3e7bb9
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,41 @@
+# Llamactl Documentation
+
+Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
+
+
+
+## What is Llamactl?
+
+Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
+
+## Features
+
+🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
+🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
+🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
+🔐 **API Key Authentication**: Separate keys for management vs inference access
+📊 **Instance Monitoring**: Health checks, auto-restart, log management
+⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
+💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
+💾 **State Persistence**: Ensure instances remain intact across server restarts
+
+## Quick Links
+
+- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
+- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
+- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
+- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
+- [API Reference](user-guide/api-reference.md) - Complete API documentation
+
+
+## Getting Help
+
+If you need help or have questions:
+
+- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
+- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
+- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
+
+## License
+
+MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.
diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md
new file mode 100644
index 0000000..3f99e53
--- /dev/null
+++ b/docs/user-guide/api-reference.md
@@ -0,0 +1,412 @@
+# API Reference
+
+Complete reference for the Llamactl REST API.
+
+## Base URL
+
+All API endpoints are relative to the base URL:
+
+```
+http://localhost:8080/api/v1
+```
+
+## Authentication
+
+Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
+
+```bash
+curl -H "Authorization: Bearer " \
+ http://localhost:8080/api/v1/instances
+```
+
+The server supports two types of API keys:
+- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
+- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
+
+## System Endpoints
+
+### Get Llamactl Version
+
+Get the version information of the llamactl server.
+
+```http
+GET /api/v1/version
+```
+
+**Response:**
+```
+Version: 1.0.0
+Commit: abc123
+Build Time: 2024-01-15T10:00:00Z
+```
+
+### Get Llama Server Help
+
+Get help text for the llama-server command.
+
+```http
+GET /api/v1/server/help
+```
+
+**Response:** Plain text help output from `llama-server --help`
+
+### Get Llama Server Version
+
+Get version information of the llama-server binary.
+
+```http
+GET /api/v1/server/version
+```
+
+**Response:** Plain text version output from `llama-server --version`
+
+### List Available Devices
+
+List available devices for llama-server.
+
+```http
+GET /api/v1/server/devices
+```
+
+**Response:** Plain text device list from `llama-server --list-devices`
+
+## Instances
+
+### List All Instances
+
+Get a list of all instances.
+
+```http
+GET /api/v1/instances
+```
+
+**Response:**
+```json
+[
+ {
+ "name": "llama2-7b",
+ "status": "running",
+ "created": 1705312200
+ }
+]
+```
+
+### Get Instance Details
+
+Get detailed information about a specific instance.
+
+```http
+GET /api/v1/instances/{name}
+```
+
+**Response:**
+```json
+{
+ "name": "llama2-7b",
+ "status": "running",
+ "created": 1705312200
+}
+```
+
+### Create Instance
+
+Create and start a new instance.
+
+```http
+POST /api/v1/instances/{name}
+```
+
+**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
+
+**Response:**
+```json
+{
+ "name": "llama2-7b",
+ "status": "running",
+ "created": 1705312200
+}
+```
+
+### Update Instance
+
+Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
+
+```http
+PUT /api/v1/instances/{name}
+```
+
+**Request Body:** JSON object with configuration fields to update.
+
+**Response:**
+```json
+{
+ "name": "llama2-7b",
+ "status": "running",
+ "created": 1705312200
+}
+```
+
+### Delete Instance
+
+Stop and remove an instance.
+
+```http
+DELETE /api/v1/instances/{name}
+```
+
+**Response:** `204 No Content`
+
+## Instance Operations
+
+### Start Instance
+
+Start a stopped instance.
+
+```http
+POST /api/v1/instances/{name}/start
+```
+
+**Response:**
+```json
+{
+ "name": "llama2-7b",
+ "status": "starting",
+ "created": 1705312200
+}
+```
+
+**Error Responses:**
+- `409 Conflict`: Maximum number of running instances reached
+- `500 Internal Server Error`: Failed to start instance
+
+### Stop Instance
+
+Stop a running instance.
+
+```http
+POST /api/v1/instances/{name}/stop
+```
+
+**Response:**
+```json
+{
+ "name": "llama2-7b",
+ "status": "stopping",
+ "created": 1705312200
+}
+```
+
+### Restart Instance
+
+Restart an instance (stop then start).
+
+```http
+POST /api/v1/instances/{name}/restart
+```
+
+**Response:**
+```json
+{
+ "name": "llama2-7b",
+ "status": "restarting",
+ "created": 1705312200
+}
+```
+
+### Get Instance Logs
+
+Retrieve instance logs.
+
+```http
+GET /api/v1/instances/{name}/logs
+```
+
+**Query Parameters:**
+- `lines`: Number of lines to return (default: all lines, use -1 for all)
+
+**Response:** Plain text log output
+
+**Example:**
+```bash
+curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
+```
+
+### Proxy to Instance
+
+Proxy HTTP requests directly to the llama-server instance.
+
+```http
+GET /api/v1/instances/{name}/proxy/*
+POST /api/v1/instances/{name}/proxy/*
+```
+
+This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
+
+**Example - Check Instance Health:**
+```bash
+curl -H "Authorization: Bearer your-api-key" \
+ http://localhost:8080/api/v1/instances/my-model/proxy/health
+```
+
+This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
+
+**Error Responses:**
+- `503 Service Unavailable`: Instance is not running
+
+## OpenAI-Compatible API
+
+Llamactl provides OpenAI-compatible endpoints for inference operations.
+
+### List Models
+
+List all instances in OpenAI-compatible format.
+
+```http
+GET /v1/models
+```
+
+**Response:**
+```json
+{
+ "object": "list",
+ "data": [
+ {
+ "id": "llama2-7b",
+ "object": "model",
+ "created": 1705312200,
+ "owned_by": "llamactl"
+ }
+ ]
+}
+```
+
+### Chat Completions, Completions, Embeddings
+
+All OpenAI-compatible inference endpoints are available:
+
+```http
+POST /v1/chat/completions
+POST /v1/completions
+POST /v1/embeddings
+POST /v1/rerank
+POST /v1/reranking
+```
+
+**Request Body:** Standard OpenAI format with `model` field specifying the instance name
+
+**Example:**
+```json
+{
+ "model": "llama2-7b",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello, how are you?"
+ }
+ ]
+}
+```
+
+The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
+
+**Error Responses:**
+- `400 Bad Request`: Invalid request body or missing model name
+- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
+- `409 Conflict`: Cannot start instance due to maximum instances limit
+
+## Instance Status Values
+
+Instances can have the following status values:
+- `stopped`: Instance is not running
+- `running`: Instance is running and ready to accept requests
+- `failed`: Instance failed to start or crashed
+
+## Error Responses
+
+All endpoints may return error responses in the following format:
+
+```json
+{
+ "error": "Error message description"
+}
+```
+
+### Common HTTP Status Codes
+
+- `200`: Success
+- `201`: Created
+- `204`: No Content (successful deletion)
+- `400`: Bad Request (invalid parameters or request body)
+- `401`: Unauthorized (missing or invalid API key)
+- `403`: Forbidden (insufficient permissions)
+- `404`: Not Found (instance not found)
+- `409`: Conflict (instance already exists, max instances reached)
+- `500`: Internal Server Error
+- `503`: Service Unavailable (instance not running)
+
+## Examples
+
+### Complete Instance Lifecycle
+
+```bash
+# Create and start instance
+curl -X POST http://localhost:8080/api/v1/instances/my-model \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer your-api-key" \
+ -d '{
+ "model": "/models/llama-2-7b.gguf"
+ }'
+
+# Check instance status
+curl -H "Authorization: Bearer your-api-key" \
+ http://localhost:8080/api/v1/instances/my-model
+
+# Get instance logs
+curl -H "Authorization: Bearer your-api-key" \
+ "http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
+
+# Use OpenAI-compatible chat completions
+curl -X POST http://localhost:8080/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer your-inference-api-key" \
+ -d '{
+ "model": "my-model",
+ "messages": [
+ {"role": "user", "content": "Hello!"}
+ ],
+ "max_tokens": 100
+ }'
+
+# Stop instance
+curl -X POST -H "Authorization: Bearer your-api-key" \
+ http://localhost:8080/api/v1/instances/my-model/stop
+
+# Delete instance
+curl -X DELETE -H "Authorization: Bearer your-api-key" \
+ http://localhost:8080/api/v1/instances/my-model
+```
+
+### Using the Proxy Endpoint
+
+You can also directly proxy requests to the llama-server instance:
+
+```bash
+# Direct proxy to instance (bypasses OpenAI compatibility layer)
+curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer your-api-key" \
+ -d '{
+ "prompt": "Hello, world!",
+ "n_predict": 50
+ }'
+```
+
+## Swagger Documentation
+
+If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
+
+```
+http://localhost:8080/swagger/
+```
+
+This provides a complete interactive interface for testing all API endpoints.
diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md
new file mode 100644
index 0000000..90e4552
--- /dev/null
+++ b/docs/user-guide/managing-instances.md
@@ -0,0 +1,190 @@
+# Managing Instances
+
+Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
+
+## Overview
+
+Llamactl provides two ways to manage instances:
+
+- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
+- **REST API**: Programmatic access for automation and integration
+
+
+
+### Authentication
+
+If authentication is enabled:
+1. Navigate to the web UI
+2. Enter your credentials
+3. Bearer token is stored for the session
+
+### Theme Support
+
+- Switch between light and dark themes
+- Setting is remembered across sessions
+
+## Instance Cards
+
+Each instance is displayed as a card showing:
+
+- **Instance name**
+- **Health status badge** (unknown, ready, error, failed)
+- **Action buttons** (start, stop, edit, logs, delete)
+
+## Create Instance
+
+### Via Web UI
+
+
+
+1. Click the **"Create Instance"** button on the dashboard
+2. Enter a unique **Name** for your instance (only required field)
+3. Configure model source (choose one):
+ - **Model Path**: Full path to your downloaded GGUF model file
+ - **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
+ - **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
+4. Configure optional instance management settings:
+ - **Auto Restart**: Automatically restart instance on failure
+ - **Max Restarts**: Maximum number of restart attempts
+ - **Restart Delay**: Delay in seconds between restart attempts
+ - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
+ - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
+5. Configure optional llama-server backend options:
+ - **Threads**: Number of CPU threads to use
+ - **Context Size**: Context window size (ctx_size)
+ - **GPU Layers**: Number of layers to offload to GPU
+ - **Port**: Network port (auto-assigned by llamactl if not specified)
+ - **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
+6. Click **"Create"** to save the instance
+
+### Via API
+
+```bash
+# Create instance with local model file
+curl -X POST http://localhost:8080/api/instances/my-instance \
+ -H "Content-Type: application/json" \
+ -d '{
+ "backend_type": "llama_cpp",
+ "backend_options": {
+ "model": "/path/to/model.gguf",
+ "threads": 8,
+ "ctx_size": 4096
+ }
+ }'
+
+# Create instance with HuggingFace model
+curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
+ -H "Content-Type: application/json" \
+ -d '{
+ "backend_type": "llama_cpp",
+ "backend_options": {
+ "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
+ "hf_file": "gemma-3-27b-it-GGUF.gguf",
+ "gpu_layers": 32
+ },
+ "auto_restart": true,
+ "max_restarts": 3
+ }'
+```
+
+## Start Instance
+
+### Via Web UI
+1. Click the **"Start"** button on an instance card
+2. Watch the status change to "Unknown"
+3. Monitor progress in the logs
+4. Instance status changes to "Ready" when ready
+
+### Via API
+```bash
+curl -X POST http://localhost:8080/api/instances/{name}/start
+```
+
+## Stop Instance
+
+### Via Web UI
+1. Click the **"Stop"** button on an instance card
+2. Instance gracefully shuts down
+
+### Via API
+```bash
+curl -X POST http://localhost:8080/api/instances/{name}/stop
+```
+
+## Edit Instance
+
+### Via Web UI
+1. Click the **"Edit"** button on an instance card
+2. Modify settings in the configuration dialog
+3. Changes require instance restart to take effect
+4. Click **"Update & Restart"** to apply changes
+
+### Via API
+Modify instance settings:
+
+```bash
+curl -X PUT http://localhost:8080/api/instances/{name} \
+ -H "Content-Type: application/json" \
+ -d '{
+ "backend_options": {
+ "threads": 8,
+ "context_size": 4096
+ }
+ }'
+```
+
+!!! note
+ Configuration changes require restarting the instance to take effect.
+
+
+## View Logs
+
+### Via Web UI
+
+1. Click the **"Logs"** button on any instance card
+2. Real-time log viewer opens
+
+### Via API
+Check instance status in real-time:
+
+```bash
+# Get instance details
+curl http://localhost:8080/api/instances/{name}/logs
+```
+
+## Delete Instance
+
+### Via Web UI
+1. Click the **"Delete"** button on an instance card
+2. Only stopped instances can be deleted
+3. Confirm deletion in the dialog
+
+### Via API
+```bash
+curl -X DELETE http://localhost:8080/api/instances/{name}
+```
+
+## Instance Proxy
+
+Llamactl proxies all requests to the underlying llama-server instances.
+
+```bash
+# Get instance details
+curl http://localhost:8080/api/instances/{name}/proxy/
+```
+
+Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
+
+### Instance Health
+
+#### Via Web UI
+
+1. The health status badge is displayed on each instance card
+
+#### Via API
+
+Check the health status of your instances:
+
+```bash
+curl http://localhost:8080/api/instances/{name}/proxy/health
+```
diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md
new file mode 100644
index 0000000..5608139
--- /dev/null
+++ b/docs/user-guide/troubleshooting.md
@@ -0,0 +1,160 @@
+# Troubleshooting
+
+Issues specific to Llamactl deployment and operation.
+
+## Configuration Issues
+
+### Invalid Configuration
+
+**Problem:** Invalid configuration preventing startup
+
+**Solutions:**
+1. Use minimal configuration:
+ ```yaml
+ server:
+ host: "0.0.0.0"
+ port: 8080
+ instances:
+ port_range: [8000, 9000]
+ ```
+
+2. Check data directory permissions:
+ ```bash
+ # Ensure data directory is writable (default: ~/.local/share/llamactl)
+ mkdir -p ~/.local/share/llamactl/{instances,logs}
+ ```
+
+## Instance Management Issues
+
+### Model Loading Failures
+
+**Problem:** Instance fails to start with model loading errors
+
+**Common Solutions:**
+- **llama-server not found:** Ensure `llama-server` binary is in PATH
+- **Wrong model format:** Ensure model is in GGUF format
+- **Insufficient memory:** Use smaller model or reduce context size
+- **Path issues:** Use absolute paths to model files
+
+### Memory Issues
+
+**Problem:** Out of memory errors or system becomes unresponsive
+
+**Solutions:**
+1. **Reduce context size:**
+ ```json
+ {
+ "n_ctx": 1024
+ }
+ ```
+
+2. **Use quantized models:**
+ - Try Q4_K_M instead of higher precision models
+ - Use smaller model variants (7B instead of 13B)
+
+### GPU Configuration
+
+**Problem:** GPU not being used effectively
+
+**Solutions:**
+1. **Configure GPU layers:**
+ ```json
+ {
+ "n_gpu_layers": 35
+ }
+ ```
+
+### Advanced Instance Issues
+
+**Problem:** Complex model loading, performance, or compatibility issues
+
+Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
+
+**Resources:**
+- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)
+- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)
+- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)
+
+**Testing directly with llama-server:**
+```bash
+# Test your model and parameters directly with llama-server
+llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
+```
+
+This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
+
+## API and Network Issues
+
+### CORS Errors
+
+**Problem:** Web UI shows CORS errors in browser console
+
+**Solutions:**
+1. **Configure allowed origins:**
+ ```yaml
+ server:
+ allowed_origins:
+ - "http://localhost:3000"
+ - "https://yourdomain.com"
+ ```
+
+## Authentication Issues
+
+**Problem:** API requests failing with authentication errors
+
+**Solutions:**
+1. **Disable authentication temporarily:**
+ ```yaml
+ auth:
+ require_management_auth: false
+ require_inference_auth: false
+ ```
+
+2. **Configure API keys:**
+ ```yaml
+ auth:
+ management_keys:
+ - "your-management-key"
+ inference_keys:
+ - "your-inference-key"
+ ```
+
+3. **Use correct Authorization header:**
+ ```bash
+ curl -H "Authorization: Bearer your-api-key" \
+ http://localhost:8080/api/v1/instances
+ ```
+
+## Debugging and Logs
+
+### Viewing Instance Logs
+
+```bash
+# Get instance logs via API
+curl http://localhost:8080/api/v1/instances/{name}/logs
+
+# Or check log files directly
+tail -f ~/.local/share/llamactl/logs/{instance-name}.log
+```
+
+### Enable Debug Logging
+
+```bash
+export LLAMACTL_LOG_LEVEL=debug
+llamactl
+```
+
+## Getting Help
+
+When reporting issues, include:
+
+1. **System information:**
+ ```bash
+ llamactl --version
+ ```
+
+2. **Configuration file** (remove sensitive keys)
+
+3. **Relevant log output**
+
+4. **Steps to reproduce the issue**
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..ed4be3a
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,68 @@
+site_name: Llamatl Documentation
+site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
+site_author: Llamatl Team
+site_url: https://llamactl.org
+
+repo_name: lordmathis/llamactl
+repo_url: https://github.com/lordmathis/llamactl
+
+theme:
+ name: material
+ palette:
+ # Palette toggle for light mode
+ - scheme: default
+ primary: indigo
+ accent: indigo
+ toggle:
+ icon: material/brightness-7
+ name: Switch to dark mode
+ # Palette toggle for dark mode
+ - scheme: slate
+ primary: indigo
+ accent: indigo
+ toggle:
+ icon: material/brightness-4
+ name: Switch to light mode
+ features:
+ - navigation.tabs
+ - navigation.sections
+ - navigation.expand
+ - navigation.top
+ - search.highlight
+ - search.share
+ - content.code.copy
+
+markdown_extensions:
+ - pymdownx.highlight:
+ anchor_linenums: true
+ - pymdownx.inlinehilite
+ - pymdownx.snippets
+ - pymdownx.superfences
+ - admonition
+ - pymdownx.details
+ - pymdownx.tabbed:
+ alternate_style: true
+ - attr_list
+ - md_in_html
+ - toc:
+ permalink: true
+
+nav:
+ - Home: index.md
+ - Getting Started:
+ - Installation: getting-started/installation.md
+ - Quick Start: getting-started/quick-start.md
+ - Configuration: getting-started/configuration.md
+ - User Guide:
+ - Managing Instances: user-guide/managing-instances.md
+ - API Reference: user-guide/api-reference.md
+ - Troubleshooting: user-guide/troubleshooting.md
+
+plugins:
+ - search
+ - git-revision-date-localized
+
+extra:
+ social:
+ - icon: fontawesome/brands/github
+ link: https://github.com/lordmathis/llamactl