diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1f4a50e..9ffc0a3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -86,7 +86,7 @@ go install github.com/swaggo/swag/cmd/swag@latest
 
 # Update Swagger comments in pkg/server/handlers.go
 # Then regenerate docs
-swag init -g cmd/server/main.go -o apidocs
+swag init -g cmd/server/main.go
 ```
 
 ## Pull Request Guidelines
diff --git a/README.md b/README.md
index d9fea15..9f2039b 100644
--- a/README.md
+++ b/README.md
@@ -4,133 +4,32 @@
 
 **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
 
-## Features
-
-### 🚀 Easy Model Management
-- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
-- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
-- **State Persistence**: Ensure instances remain intact across server restarts
-
-### 🔗 Universal Compatibility
-- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
-- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
-- **Docker Support**: Run backends in containers
-
-### 🌐 User-Friendly Interface
-- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
-- **API Key Authentication**: Separate keys for management vs inference access
-
-### ⚡ Smart Operations
-- **Instance Monitoring**: Health checks, auto-restart, log management
-- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
-- **Environment Variables**: Set custom environment variables per instance for advanced configuration
-
-### 🔗 Remote Instance Deployment
-- **Remote Node Support**: Deploy instances on remote hosts
-- **Central Management**: Manage remote instances from a single dashboard
-- **Seamless Routing**: Automatic request routing to remote instances  
+📚 **[Full Documentation →](https://llamactl.org)**
 
 ![Dashboard Screenshot](docs/images/dashboard.png)
 
+## Features
+
+**🚀 Easy Model Management**
+- **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality)
+- **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits
+- **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs
+
+**🔗 Flexible Integration**
+- **OpenAI API Compatible**: Drop-in replacement - route requests to different models by instance name
+- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
+- **Docker Ready**: Run backends in containers with full GPU support
+
+**🌐 Distributed Deployment**
+- **Remote Instances**: Deploy instances on remote hosts
+- **Central Management**: Manage everything from a single dashboard with automatic routing  
+
 ## Quick Start
 
-```bash
-# 1. Install backend (one-time setup)
-# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
-# For MLX on macOS: pip install mlx-lm
-# For vLLM: pip install vllm
-# Or use Docker - no local installation required
-
-# 2. Download and run llamactl
-LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
-curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
-sudo mv llamactl /usr/local/bin/
-
-# 3. Start the server
-llamactl
-# Access dashboard at http://localhost:8080
-```
-
-## Usage
-
-### Create and manage instances via web dashboard:
-1. Open http://localhost:8080
-2. Click "Create Instance"
-3. Choose backend type (llama.cpp, MLX, or vLLM)
-4. Set model path and backend-specific options
-5. Configure environment variables if needed (optional)
-6. Start or stop the instance
-
-### Or use the REST API:
-```bash
-# Create llama.cpp instance
-curl -X POST localhost:8080/api/v1/instances/my-7b-model \
-  -H "Authorization: Bearer your-key" \
-  -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
-
-# Create MLX instance (macOS)
-curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
-  -H "Authorization: Bearer your-key" \
-  -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
-
-# Create vLLM instance with environment variables
-curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
-  -H "Authorization: Bearer your-key" \
-  -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}'
-
-# Use with OpenAI SDK
-curl -X POST localhost:8080/v1/chat/completions \
-  -H "Authorization: Bearer your-key" \
-  -d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
-```
-
-## Installation
-
-### Option 1: Download Binary (Recommended)
-
-```bash
-# Linux/macOS - Get latest version and download
-LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
-curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
-sudo mv llamactl /usr/local/bin/
-
-# Or download manually from the releases page:
-# https://github.com/lordmathis/llamactl/releases/latest
-
-# Windows - Download from releases page
-```
-
-### Option 2: Docker (No local backend installation required)
-
-```bash
-# Clone repository and build Docker images
-git clone https://github.com/lordmathis/llamactl.git
-cd llamactl
-mkdir -p data/llamacpp data/vllm models
-
-# Build and start llamactl with llama.cpp CUDA backend
-docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
-
-# Build and start llamactl with vLLM CUDA backend
-docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
-
-# Build from source using multi-stage build
-docker build -f docker/Dockerfile.source -t llamactl:source .
-```
-
-**Features:** CUDA support, automatic latest release installation, no backend dependencies.
-**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
-
-For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
-
-### Option 3: Build from Source
-Requires Go 1.24+ and Node.js 22+
-```bash
-git clone https://github.com/lordmathis/llamactl.git
-cd llamactl
-cd webui && npm ci && npm run build && cd ..
-go build -o llamactl ./cmd/server
-```
+1. Install a backend (llama.cpp, MLX, or vLLM) - see [Prerequisites](#prerequisites) below
+2. [Download llamactl](#installation) for your platform
+3. Run `llamactl` and open http://localhost:8080
+4. Create an instance and start inferencing!
 
 ## Prerequisites
 
@@ -175,9 +74,9 @@ pip install vllm
 # Or use Docker - no local installation required
 ```
 
-## Backend Docker Support
+### Docker Support
 
-llamactl can run backends in Docker containers:
+llamactl can run backends in Docker containers, eliminating the need for local backend installation:
 
 ```yaml
 backends:
@@ -189,9 +88,58 @@ backends:
       enabled: true
 ```
 
-**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support.
+## Installation
 
-For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
+### Option 1: Download Binary (Recommended)
+
+```bash
+# Linux/macOS - Get latest version and download
+LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
+curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
+sudo mv llamactl /usr/local/bin/
+
+# Or download manually from the releases page:
+# https://github.com/lordmathis/llamactl/releases/latest
+
+# Windows - Download from releases page
+```
+
+### Option 2: Docker (No local backend installation required)
+
+```bash
+# Clone repository and build Docker images
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+mkdir -p data/llamacpp data/vllm models
+
+# Build and start llamactl with llama.cpp CUDA backend
+docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
+
+# Build and start llamactl with vLLM CUDA backend
+docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
+
+# Build from source using multi-stage build
+docker build -f docker/Dockerfile.source -t llamactl:source .
+```
+
+**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
+
+### Option 3: Build from Source
+Requires Go 1.24+ and Node.js 22+
+```bash
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+cd webui && npm ci && npm run build && cd ..
+go build -o llamactl ./cmd/server
+```
+
+## Usage
+
+1. Open http://localhost:8080
+2. Click "Create Instance"
+3. Choose backend type (llama.cpp, MLX, or vLLM)
+4. Configure your model and options (ports and API keys are auto-assigned)
+5. Start the instance and use it with any OpenAI-compatible client
 
 ## Configuration
 
@@ -213,7 +161,7 @@ backends:
     docker:
       enabled: false
       image: "ghcr.io/ggml-org/llama.cpp:server"
-      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "-v", "~/.local/share/llamactl/llama.cpp:/root/.cache/llama.cpp"]
       environment: {}             # Environment variables for the container
 
   vllm:
@@ -223,7 +171,7 @@ backends:
     docker:
       enabled: false
       image: "vllm/vllm-openai:latest"
-      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g", "-v", "~/.local/share/llamactl/huggingface:/root/.cache/huggingface"]
       environment: {}             # Environment variables for the container
 
   mlx:
diff --git a/cmd/server/main.go b/cmd/server/main.go
index 2cba231..dee87ae 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -22,6 +22,9 @@ var buildTime string = "unknown"
 // @license.name MIT License
 // @license.url https://opensource.org/license/mit/
 // @basePath /api/v1
+// @securityDefinitions.apikey ApiKeyAuth
+// @in header
+// @name X-API-Key
 func main() {
 
 	// --version flag to print the version
diff --git a/docker/Dockerfile.source b/docker/Dockerfile.source
index 2b4482f..9cc610a 100644
--- a/docker/Dockerfile.source
+++ b/docker/Dockerfile.source
@@ -33,7 +33,7 @@ RUN go mod download
 # Copy source code
 COPY cmd/ ./cmd/
 COPY pkg/ ./pkg/
-COPY apidocs/ ./apidocs/
+COPY docs/ ./docs/
 COPY webui/webui.go ./webui/
 
 # Copy built webui from webui-builder
diff --git a/docs-requirements.txt b/docs-requirements.txt
index 632c6e1..05b03c0 100644
--- a/docs-requirements.txt
+++ b/docs-requirements.txt
@@ -1,5 +1,6 @@
-mkdocs-material==9.5.3
-mkdocs==1.5.3
-pymdown-extensions==10.7
-mkdocs-git-revision-date-localized-plugin==1.2.4
-mike==2.0.0
+mkdocs-material==9.6.22
+mkdocs==1.6.1
+pymdown-extensions==10.16.1
+mkdocs-git-revision-date-localized-plugin==1.4.7
+mike==2.1.3
+neoteroi-mkdocs==1.1.3
\ No newline at end of file
diff --git a/docs/api-reference.md b/docs/api-reference.md
new file mode 100644
index 0000000..339c609
--- /dev/null
+++ b/docs/api-reference.md
@@ -0,0 +1 @@
+[OAD(swagger.yaml)]
\ No newline at end of file
diff --git a/docs/getting-started/configuration.md b/docs/configuration.md
similarity index 99%
rename from docs/getting-started/configuration.md
rename to docs/configuration.md
index 6f9ee98..c271f29 100644
--- a/docs/getting-started/configuration.md
+++ b/docs/configuration.md
@@ -80,7 +80,7 @@ nodes:                           # Node configuration for multi-node deployment
 
 ### Configuration File Locations
 
-Configuration files are searched in the following locations (in order of precedence):
+Configuration files are searched in the following locations (in order of precedence, first found is used):
 
 **Linux:**  
 - `./llamactl.yaml` or `./config.yaml` (current directory)  
diff --git a/docs/css/css-v1.1.3.css b/docs/css/css-v1.1.3.css
new file mode 100644
index 0000000..e9daefe
--- /dev/null
+++ b/docs/css/css-v1.1.3.css
@@ -0,0 +1,1814 @@
+/**
+ *  All CSS for the neoteroi-mkdocs extensions.
+ *
+ *  https://github.com/Neoteroi/mkdocs-plugins
+**/
+:root {
+  --nt-color-0: #CD853F;
+  --nt-color-1: #B22222;
+  --nt-color-2: #000080;
+  --nt-color-3: #4B0082;
+  --nt-color-4: #3CB371;
+  --nt-color-5: #D2B48C;
+  --nt-color-6: #FF00FF;
+  --nt-color-7: #98FB98;
+  --nt-color-8: #FFEBCD;
+  --nt-color-9: #2E8B57;
+  --nt-color-10: #6A5ACD;
+  --nt-color-11: #48D1CC;
+  --nt-color-12: #FFA500;
+  --nt-color-13: #F4A460;
+  --nt-color-14: #A52A2A;
+  --nt-color-15: #FFE4C4;
+  --nt-color-16: #FF4500;
+  --nt-color-17: #AFEEEE;
+  --nt-color-18: #FA8072;
+  --nt-color-19: #2F4F4F;
+  --nt-color-20: #FFDAB9;
+  --nt-color-21: #BC8F8F;
+  --nt-color-22: #FFC0CB;
+  --nt-color-23: #00FA9A;
+  --nt-color-24: #F0FFF0;
+  --nt-color-25: #FFFACD;
+  --nt-color-26: #F5F5F5;
+  --nt-color-27: #FF6347;
+  --nt-color-28: #FFFFF0;
+  --nt-color-29: #7FFFD4;
+  --nt-color-30: #E9967A;
+  --nt-color-31: #7B68EE;
+  --nt-color-32: #FFF8DC;
+  --nt-color-33: #0000CD;
+  --nt-color-34: #D2691E;
+  --nt-color-35: #708090;
+  --nt-color-36: #5F9EA0;
+  --nt-color-37: #008080;
+  --nt-color-38: #008000;
+  --nt-color-39: #FFE4E1;
+  --nt-color-40: #FFFF00;
+  --nt-color-41: #FFFAF0;
+  --nt-color-42: #DCDCDC;
+  --nt-color-43: #ADFF2F;
+  --nt-color-44: #ADD8E6;
+  --nt-color-45: #8B008B;
+  --nt-color-46: #7FFF00;
+  --nt-color-47: #800000;
+  --nt-color-48: #20B2AA;
+  --nt-color-49: #556B2F;
+  --nt-color-50: #778899;
+  --nt-color-51: #E6E6FA;
+  --nt-color-52: #FFFAFA;
+  --nt-color-53: #FF7F50;
+  --nt-color-54: #FF0000;
+  --nt-color-55: #F5DEB3;
+  --nt-color-56: #008B8B;
+  --nt-color-57: #66CDAA;
+  --nt-color-58: #808000;
+  --nt-color-59: #FAF0E6;
+  --nt-color-60: #00BFFF;
+  --nt-color-61: #C71585;
+  --nt-color-62: #00FFFF;
+  --nt-color-63: #8B4513;
+  --nt-color-64: #F0F8FF;
+  --nt-color-65: #FAEBD7;
+  --nt-color-66: #8B0000;
+  --nt-color-67: #4682B4;
+  --nt-color-68: #F0E68C;
+  --nt-color-69: #BDB76B;
+  --nt-color-70: #A0522D;
+  --nt-color-71: #FAFAD2;
+  --nt-color-72: #FFD700;
+  --nt-color-73: #DEB887;
+  --nt-color-74: #E0FFFF;
+  --nt-color-75: #8A2BE2;
+  --nt-color-76: #32CD32;
+  --nt-color-77: #87CEFA;
+  --nt-color-78: #00CED1;
+  --nt-color-79: #696969;
+  --nt-color-80: #DDA0DD;
+  --nt-color-81: #EE82EE;
+  --nt-color-82: #FFB6C1;
+  --nt-color-83: #8FBC8F;
+  --nt-color-84: #D8BFD8;
+  --nt-color-85: #9400D3;
+  --nt-color-86: #A9A9A9;
+  --nt-color-87: #FFFFE0;
+  --nt-color-88: #FFF5EE;
+  --nt-color-89: #FFF0F5;
+  --nt-color-90: #FFDEAD;
+  --nt-color-91: #800080;
+  --nt-color-92: #B0E0E6;
+  --nt-color-93: #9932CC;
+  --nt-color-94: #DAA520;
+  --nt-color-95: #F0FFFF;
+  --nt-color-96: #40E0D0;
+  --nt-color-97: #00FF7F;
+  --nt-color-98: #006400;
+  --nt-color-99: #808080;
+  --nt-color-100: #87CEEB;
+  --nt-color-101: #0000FF;
+  --nt-color-102: #6495ED;
+  --nt-color-103: #FDF5E6;
+  --nt-color-104: #B8860B;
+  --nt-color-105: #BA55D3;
+  --nt-color-106: #C0C0C0;
+  --nt-color-107: #000000;
+  --nt-color-108: #F08080;
+  --nt-color-109: #B0C4DE;
+  --nt-color-110: #00008B;
+  --nt-color-111: #6B8E23;
+  --nt-color-112: #FFE4B5;
+  --nt-color-113: #FFA07A;
+  --nt-color-114: #9ACD32;
+  --nt-color-115: #FFFFFF;
+  --nt-color-116: #F5F5DC;
+  --nt-color-117: #90EE90;
+  --nt-color-118: #1E90FF;
+  --nt-color-119: #7CFC00;
+  --nt-color-120: #FF69B4;
+  --nt-color-121: #F8F8FF;
+  --nt-color-122: #F5FFFA;
+  --nt-color-123: #00FF00;
+  --nt-color-124: #D3D3D3;
+  --nt-color-125: #DB7093;
+  --nt-color-126: #DA70D6;
+  --nt-color-127: #FF1493;
+  --nt-color-128: #228B22;
+  --nt-color-129: #FFEFD5;
+  --nt-color-130: #4169E1;
+  --nt-color-131: #191970;
+  --nt-color-132: #9370DB;
+  --nt-color-133: #483D8B;
+  --nt-color-134: #FF8C00;
+  --nt-color-135: #EEE8AA;
+  --nt-color-136: #CD5C5C;
+  --nt-color-137: #DC143C;
+}
+
+:root {
+  --nt-group-0-main: #000000;
+  --nt-group-0-dark: #FFFFFF;
+  --nt-group-0-light: #000000;
+  --nt-group-0-main-bg: #F44336;
+  --nt-group-0-dark-bg: #BA000D;
+  --nt-group-0-light-bg: #FF7961;
+  --nt-group-1-main: #000000;
+  --nt-group-1-dark: #FFFFFF;
+  --nt-group-1-light: #000000;
+  --nt-group-1-main-bg: #E91E63;
+  --nt-group-1-dark-bg: #B0003A;
+  --nt-group-1-light-bg: #FF6090;
+  --nt-group-2-main: #FFFFFF;
+  --nt-group-2-dark: #FFFFFF;
+  --nt-group-2-light: #000000;
+  --nt-group-2-main-bg: #9C27B0;
+  --nt-group-2-dark-bg: #6A0080;
+  --nt-group-2-light-bg: #D05CE3;
+  --nt-group-3-main: #FFFFFF;
+  --nt-group-3-dark: #FFFFFF;
+  --nt-group-3-light: #000000;
+  --nt-group-3-main-bg: #673AB7;
+  --nt-group-3-dark-bg: #320B86;
+  --nt-group-3-light-bg: #9A67EA;
+  --nt-group-4-main: #FFFFFF;
+  --nt-group-4-dark: #FFFFFF;
+  --nt-group-4-light: #000000;
+  --nt-group-4-main-bg: #3F51B5;
+  --nt-group-4-dark-bg: #002984;
+  --nt-group-4-light-bg: #757DE8;
+  --nt-group-5-main: #000000;
+  --nt-group-5-dark: #FFFFFF;
+  --nt-group-5-light: #000000;
+  --nt-group-5-main-bg: #2196F3;
+  --nt-group-5-dark-bg: #0069C0;
+  --nt-group-5-light-bg: #6EC6FF;
+  --nt-group-6-main: #000000;
+  --nt-group-6-dark: #FFFFFF;
+  --nt-group-6-light: #000000;
+  --nt-group-6-main-bg: #03A9F4;
+  --nt-group-6-dark-bg: #007AC1;
+  --nt-group-6-light-bg: #67DAFF;
+  --nt-group-7-main: #000000;
+  --nt-group-7-dark: #000000;
+  --nt-group-7-light: #000000;
+  --nt-group-7-main-bg: #00BCD4;
+  --nt-group-7-dark-bg: #008BA3;
+  --nt-group-7-light-bg: #62EFFF;
+  --nt-group-8-main: #000000;
+  --nt-group-8-dark: #FFFFFF;
+  --nt-group-8-light: #000000;
+  --nt-group-8-main-bg: #009688;
+  --nt-group-8-dark-bg: #00675B;
+  --nt-group-8-light-bg: #52C7B8;
+  --nt-group-9-main: #000000;
+  --nt-group-9-dark: #FFFFFF;
+  --nt-group-9-light: #000000;
+  --nt-group-9-main-bg: #4CAF50;
+  --nt-group-9-dark-bg: #087F23;
+  --nt-group-9-light-bg: #80E27E;
+  --nt-group-10-main: #000000;
+  --nt-group-10-dark: #000000;
+  --nt-group-10-light: #000000;
+  --nt-group-10-main-bg: #8BC34A;
+  --nt-group-10-dark-bg: #5A9216;
+  --nt-group-10-light-bg: #BEF67A;
+  --nt-group-11-main: #000000;
+  --nt-group-11-dark: #000000;
+  --nt-group-11-light: #000000;
+  --nt-group-11-main-bg: #CDDC39;
+  --nt-group-11-dark-bg: #99AA00;
+  --nt-group-11-light-bg: #FFFF6E;
+  --nt-group-12-main: #000000;
+  --nt-group-12-dark: #000000;
+  --nt-group-12-light: #000000;
+  --nt-group-12-main-bg: #FFEB3B;
+  --nt-group-12-dark-bg: #C8B900;
+  --nt-group-12-light-bg: #FFFF72;
+  --nt-group-13-main: #000000;
+  --nt-group-13-dark: #000000;
+  --nt-group-13-light: #000000;
+  --nt-group-13-main-bg: #FFC107;
+  --nt-group-13-dark-bg: #C79100;
+  --nt-group-13-light-bg: #FFF350;
+  --nt-group-14-main: #000000;
+  --nt-group-14-dark: #000000;
+  --nt-group-14-light: #000000;
+  --nt-group-14-main-bg: #FF9800;
+  --nt-group-14-dark-bg: #C66900;
+  --nt-group-14-light-bg: #FFC947;
+  --nt-group-15-main: #000000;
+  --nt-group-15-dark: #FFFFFF;
+  --nt-group-15-light: #000000;
+  --nt-group-15-main-bg: #FF5722;
+  --nt-group-15-dark-bg: #C41C00;
+  --nt-group-15-light-bg: #FF8A50;
+  --nt-group-16-main: #FFFFFF;
+  --nt-group-16-dark: #FFFFFF;
+  --nt-group-16-light: #000000;
+  --nt-group-16-main-bg: #795548;
+  --nt-group-16-dark-bg: #4B2C20;
+  --nt-group-16-light-bg: #A98274;
+  --nt-group-17-main: #000000;
+  --nt-group-17-dark: #FFFFFF;
+  --nt-group-17-light: #000000;
+  --nt-group-17-main-bg: #9E9E9E;
+  --nt-group-17-dark-bg: #707070;
+  --nt-group-17-light-bg: #CFCFCF;
+  --nt-group-18-main: #000000;
+  --nt-group-18-dark: #FFFFFF;
+  --nt-group-18-light: #000000;
+  --nt-group-18-main-bg: #607D8B;
+  --nt-group-18-dark-bg: #34515E;
+  --nt-group-18-light-bg: #8EACBB;
+}
+
+.nt-pastello {
+  --nt-group-0-main: #000000;
+  --nt-group-0-dark: #000000;
+  --nt-group-0-light: #000000;
+  --nt-group-0-main-bg: #EF9A9A;
+  --nt-group-0-dark-bg: #BA6B6C;
+  --nt-group-0-light-bg: #FFCCCB;
+  --nt-group-1-main: #000000;
+  --nt-group-1-dark: #000000;
+  --nt-group-1-light: #000000;
+  --nt-group-1-main-bg: #F48FB1;
+  --nt-group-1-dark-bg: #BF5F82;
+  --nt-group-1-light-bg: #FFC1E3;
+  --nt-group-2-main: #000000;
+  --nt-group-2-dark: #000000;
+  --nt-group-2-light: #000000;
+  --nt-group-2-main-bg: #CE93D8;
+  --nt-group-2-dark-bg: #9C64A6;
+  --nt-group-2-light-bg: #FFC4FF;
+  --nt-group-3-main: #000000;
+  --nt-group-3-dark: #000000;
+  --nt-group-3-light: #000000;
+  --nt-group-3-main-bg: #B39DDB;
+  --nt-group-3-dark-bg: #836FA9;
+  --nt-group-3-light-bg: #E6CEFF;
+  --nt-group-4-main: #000000;
+  --nt-group-4-dark: #000000;
+  --nt-group-4-light: #000000;
+  --nt-group-4-main-bg: #9FA8DA;
+  --nt-group-4-dark-bg: #6F79A8;
+  --nt-group-4-light-bg: #D1D9FF;
+  --nt-group-5-main: #000000;
+  --nt-group-5-dark: #000000;
+  --nt-group-5-light: #000000;
+  --nt-group-5-main-bg: #90CAF9;
+  --nt-group-5-dark-bg: #5D99C6;
+  --nt-group-5-light-bg: #C3FDFF;
+  --nt-group-6-main: #000000;
+  --nt-group-6-dark: #000000;
+  --nt-group-6-light: #000000;
+  --nt-group-6-main-bg: #81D4FA;
+  --nt-group-6-dark-bg: #4BA3C7;
+  --nt-group-6-light-bg: #B6FFFF;
+  --nt-group-7-main: #000000;
+  --nt-group-7-dark: #000000;
+  --nt-group-7-light: #000000;
+  --nt-group-7-main-bg: #80DEEA;
+  --nt-group-7-dark-bg: #4BACB8;
+  --nt-group-7-light-bg: #B4FFFF;
+  --nt-group-8-main: #000000;
+  --nt-group-8-dark: #000000;
+  --nt-group-8-light: #000000;
+  --nt-group-8-main-bg: #80CBC4;
+  --nt-group-8-dark-bg: #4F9A94;
+  --nt-group-8-light-bg: #B2FEF7;
+  --nt-group-9-main: #000000;
+  --nt-group-9-dark: #000000;
+  --nt-group-9-light: #000000;
+  --nt-group-9-main-bg: #A5D6A7;
+  --nt-group-9-dark-bg: #75A478;
+  --nt-group-9-light-bg: #D7FFD9;
+  --nt-group-10-main: #000000;
+  --nt-group-10-dark: #000000;
+  --nt-group-10-light: #000000;
+  --nt-group-10-main-bg: #C5E1A5;
+  --nt-group-10-dark-bg: #94AF76;
+  --nt-group-10-light-bg: #F8FFD7;
+  --nt-group-11-main: #000000;
+  --nt-group-11-dark: #000000;
+  --nt-group-11-light: #000000;
+  --nt-group-11-main-bg: #E6EE9C;
+  --nt-group-11-dark-bg: #B3BC6D;
+  --nt-group-11-light-bg: #FFFFCE;
+  --nt-group-12-main: #000000;
+  --nt-group-12-dark: #000000;
+  --nt-group-12-light: #000000;
+  --nt-group-12-main-bg: #FFF59D;
+  --nt-group-12-dark-bg: #CBC26D;
+  --nt-group-12-light-bg: #FFFFCF;
+  --nt-group-13-main: #000000;
+  --nt-group-13-dark: #000000;
+  --nt-group-13-light: #000000;
+  --nt-group-13-main-bg: #FFE082;
+  --nt-group-13-dark-bg: #CAAE53;
+  --nt-group-13-light-bg: #FFFFB3;
+  --nt-group-14-main: #000000;
+  --nt-group-14-dark: #000000;
+  --nt-group-14-light: #000000;
+  --nt-group-14-main-bg: #FFCC80;
+  --nt-group-14-dark-bg: #CA9B52;
+  --nt-group-14-light-bg: #FFFFB0;
+  --nt-group-15-main: #000000;
+  --nt-group-15-dark: #000000;
+  --nt-group-15-light: #000000;
+  --nt-group-15-main-bg: #FFAB91;
+  --nt-group-15-dark-bg: #C97B63;
+  --nt-group-15-light-bg: #FFDDC1;
+  --nt-group-16-main: #000000;
+  --nt-group-16-dark: #000000;
+  --nt-group-16-light: #000000;
+  --nt-group-16-main-bg: #BCAAA4;
+  --nt-group-16-dark-bg: #8C7B75;
+  --nt-group-16-light-bg: #EFDCD5;
+  --nt-group-17-main: #000000;
+  --nt-group-17-dark: #000000;
+  --nt-group-17-light: #000000;
+  --nt-group-17-main-bg: #EEEEEE;
+  --nt-group-17-dark-bg: #BCBCBC;
+  --nt-group-17-light-bg: #FFFFFF;
+  --nt-group-18-main: #000000;
+  --nt-group-18-dark: #000000;
+  --nt-group-18-light: #000000;
+  --nt-group-18-main-bg: #B0BEC5;
+  --nt-group-18-dark-bg: #808E95;
+  --nt-group-18-light-bg: #E2F1F8;
+}
+
+.nt-group-0 .nt-plan-group-summary,
+.nt-group-0 .nt-timeline-dot {
+  color: var(--nt-group-0-dark);
+  background-color: var(--nt-group-0-dark-bg);
+}
+.nt-group-0 .period {
+  color: var(--nt-group-0-main);
+  background-color: var(--nt-group-0-main-bg);
+}
+
+.nt-group-1 .nt-plan-group-summary,
+.nt-group-1 .nt-timeline-dot {
+  color: var(--nt-group-1-dark);
+  background-color: var(--nt-group-1-dark-bg);
+}
+.nt-group-1 .period {
+  color: var(--nt-group-1-main);
+  background-color: var(--nt-group-1-main-bg);
+}
+
+.nt-group-2 .nt-plan-group-summary,
+.nt-group-2 .nt-timeline-dot {
+  color: var(--nt-group-2-dark);
+  background-color: var(--nt-group-2-dark-bg);
+}
+.nt-group-2 .period {
+  color: var(--nt-group-2-main);
+  background-color: var(--nt-group-2-main-bg);
+}
+
+.nt-group-3 .nt-plan-group-summary,
+.nt-group-3 .nt-timeline-dot {
+  color: var(--nt-group-3-dark);
+  background-color: var(--nt-group-3-dark-bg);
+}
+.nt-group-3 .period {
+  color: var(--nt-group-3-main);
+  background-color: var(--nt-group-3-main-bg);
+}
+
+.nt-group-4 .nt-plan-group-summary,
+.nt-group-4 .nt-timeline-dot {
+  color: var(--nt-group-4-dark);
+  background-color: var(--nt-group-4-dark-bg);
+}
+.nt-group-4 .period {
+  color: var(--nt-group-4-main);
+  background-color: var(--nt-group-4-main-bg);
+}
+
+.nt-group-5 .nt-plan-group-summary,
+.nt-group-5 .nt-timeline-dot {
+  color: var(--nt-group-5-dark);
+  background-color: var(--nt-group-5-dark-bg);
+}
+.nt-group-5 .period {
+  color: var(--nt-group-5-main);
+  background-color: var(--nt-group-5-main-bg);
+}
+
+.nt-group-6 .nt-plan-group-summary,
+.nt-group-6 .nt-timeline-dot {
+  color: var(--nt-group-6-dark);
+  background-color: var(--nt-group-6-dark-bg);
+}
+.nt-group-6 .period {
+  color: var(--nt-group-6-main);
+  background-color: var(--nt-group-6-main-bg);
+}
+
+.nt-group-7 .nt-plan-group-summary,
+.nt-group-7 .nt-timeline-dot {
+  color: var(--nt-group-7-dark);
+  background-color: var(--nt-group-7-dark-bg);
+}
+.nt-group-7 .period {
+  color: var(--nt-group-7-main);
+  background-color: var(--nt-group-7-main-bg);
+}
+
+.nt-group-8 .nt-plan-group-summary,
+.nt-group-8 .nt-timeline-dot {
+  color: var(--nt-group-8-dark);
+  background-color: var(--nt-group-8-dark-bg);
+}
+.nt-group-8 .period {
+  color: var(--nt-group-8-main);
+  background-color: var(--nt-group-8-main-bg);
+}
+
+.nt-group-9 .nt-plan-group-summary,
+.nt-group-9 .nt-timeline-dot {
+  color: var(--nt-group-9-dark);
+  background-color: var(--nt-group-9-dark-bg);
+}
+.nt-group-9 .period {
+  color: var(--nt-group-9-main);
+  background-color: var(--nt-group-9-main-bg);
+}
+
+.nt-group-10 .nt-plan-group-summary,
+.nt-group-10 .nt-timeline-dot {
+  color: var(--nt-group-10-dark);
+  background-color: var(--nt-group-10-dark-bg);
+}
+.nt-group-10 .period {
+  color: var(--nt-group-10-main);
+  background-color: var(--nt-group-10-main-bg);
+}
+
+.nt-group-11 .nt-plan-group-summary,
+.nt-group-11 .nt-timeline-dot {
+  color: var(--nt-group-11-dark);
+  background-color: var(--nt-group-11-dark-bg);
+}
+.nt-group-11 .period {
+  color: var(--nt-group-11-main);
+  background-color: var(--nt-group-11-main-bg);
+}
+
+.nt-group-12 .nt-plan-group-summary,
+.nt-group-12 .nt-timeline-dot {
+  color: var(--nt-group-12-dark);
+  background-color: var(--nt-group-12-dark-bg);
+}
+.nt-group-12 .period {
+  color: var(--nt-group-12-main);
+  background-color: var(--nt-group-12-main-bg);
+}
+
+.nt-group-13 .nt-plan-group-summary,
+.nt-group-13 .nt-timeline-dot {
+  color: var(--nt-group-13-dark);
+  background-color: var(--nt-group-13-dark-bg);
+}
+.nt-group-13 .period {
+  color: var(--nt-group-13-main);
+  background-color: var(--nt-group-13-main-bg);
+}
+
+.nt-group-14 .nt-plan-group-summary,
+.nt-group-14 .nt-timeline-dot {
+  color: var(--nt-group-14-dark);
+  background-color: var(--nt-group-14-dark-bg);
+}
+.nt-group-14 .period {
+  color: var(--nt-group-14-main);
+  background-color: var(--nt-group-14-main-bg);
+}
+
+.nt-group-15 .nt-plan-group-summary,
+.nt-group-15 .nt-timeline-dot {
+  color: var(--nt-group-15-dark);
+  background-color: var(--nt-group-15-dark-bg);
+}
+.nt-group-15 .period {
+  color: var(--nt-group-15-main);
+  background-color: var(--nt-group-15-main-bg);
+}
+
+.nt-group-16 .nt-plan-group-summary,
+.nt-group-16 .nt-timeline-dot {
+  color: var(--nt-group-16-dark);
+  background-color: var(--nt-group-16-dark-bg);
+}
+.nt-group-16 .period {
+  color: var(--nt-group-16-main);
+  background-color: var(--nt-group-16-main-bg);
+}
+
+.nt-group-17 .nt-plan-group-summary,
+.nt-group-17 .nt-timeline-dot {
+  color: var(--nt-group-17-dark);
+  background-color: var(--nt-group-17-dark-bg);
+}
+.nt-group-17 .period {
+  color: var(--nt-group-17-main);
+  background-color: var(--nt-group-17-main-bg);
+}
+
+.nt-group-18 .nt-plan-group-summary,
+.nt-group-18 .nt-timeline-dot {
+  color: var(--nt-group-18-dark);
+  background-color: var(--nt-group-18-dark-bg);
+}
+.nt-group-18 .period {
+  color: var(--nt-group-18-main);
+  background-color: var(--nt-group-18-main-bg);
+}
+
+/**
+ *  Extra CSS file for MkDocs and the neoteroi.timeline extension.
+ *
+ *  https://github.com/Neoteroi/mkdocs-plugins
+**/
+.nt-error {
+  border: 2px dashed darkred;
+  padding: 0 1rem;
+  background: #faf9ba;
+  color: darkred;
+}
+
+.nt-timeline {
+  margin-top: 30px;
+}
+.nt-timeline .nt-timeline-title {
+  font-size: 1.1rem;
+  margin-top: 0;
+}
+.nt-timeline .nt-timeline-sub-title {
+  margin-top: 0;
+}
+.nt-timeline .nt-timeline-content {
+  font-size: 0.8rem;
+  border-bottom: 2px dashed #ccc;
+  padding-bottom: 1.2rem;
+}
+.nt-timeline.horizontal .nt-timeline-items {
+  flex-direction: row;
+  overflow-x: scroll;
+}
+.nt-timeline.horizontal .nt-timeline-items > div {
+  min-width: 400px;
+  margin-right: 50px;
+}
+.nt-timeline.horizontal.reverse .nt-timeline-items {
+  flex-direction: row-reverse;
+}
+.nt-timeline.horizontal.center .nt-timeline-before {
+  background-image: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%);
+  background-repeat: no-repeat;
+  background-size: 100% 2px;
+  background-position: 0 center;
+}
+.nt-timeline.horizontal.center .nt-timeline-after {
+  background-image: linear-gradient(180deg, rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%);
+  background-repeat: no-repeat;
+  background-size: 100% 2px;
+  background-position: 0 center;
+}
+.nt-timeline.horizontal.center .nt-timeline-items {
+  background-image: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%);
+  background-repeat: no-repeat;
+  background-size: 100% 2px;
+  background-position: 0 center;
+}
+.nt-timeline.horizontal .nt-timeline-dot {
+  left: 50%;
+}
+.nt-timeline.horizontal .nt-timeline-dot:not(.bigger) {
+  top: calc(50% - 4px);
+}
+.nt-timeline.horizontal .nt-timeline-dot.bigger {
+  top: calc(50% - 15px);
+}
+.nt-timeline.vertical .nt-timeline-items {
+  flex-direction: column;
+}
+.nt-timeline.vertical.reverse .nt-timeline-items {
+  flex-direction: column-reverse;
+}
+.nt-timeline.vertical.center .nt-timeline-before {
+  background: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat center/2px 100%;
+}
+.nt-timeline.vertical.center .nt-timeline-after {
+  background: linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat center/2px 100%;
+}
+.nt-timeline.vertical.center .nt-timeline-items {
+  background: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat center/2px 100%;
+}
+.nt-timeline.vertical.center .nt-timeline-dot {
+  left: calc(50% - 10px);
+}
+.nt-timeline.vertical.center .nt-timeline-dot:not(.bigger) {
+  top: 10px;
+}
+.nt-timeline.vertical.center .nt-timeline-dot.bigger {
+  left: calc(50% - 20px);
+}
+.nt-timeline.vertical.left {
+  padding-left: 100px;
+}
+.nt-timeline.vertical.left .nt-timeline-item {
+  padding-left: 70px;
+}
+.nt-timeline.vertical.left .nt-timeline-sub-title {
+  left: -100px;
+  width: 100px;
+}
+.nt-timeline.vertical.left .nt-timeline-before {
+  background: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat 30px/2px 100%;
+}
+.nt-timeline.vertical.left .nt-timeline-after {
+  background: linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat 30px/2px 100%;
+}
+.nt-timeline.vertical.left .nt-timeline-items {
+  background: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat 30px/2px 100%;
+}
+.nt-timeline.vertical.left .nt-timeline-dot {
+  left: 21px;
+  top: 8px;
+}
+.nt-timeline.vertical.left .nt-timeline-dot.bigger {
+  top: 0px;
+  left: 10px;
+}
+.nt-timeline.vertical.right {
+  padding-right: 100px;
+}
+.nt-timeline.vertical.right .nt-timeline-sub-title {
+  right: -100px;
+  text-align: left;
+  width: 100px;
+}
+.nt-timeline.vertical.right .nt-timeline-item {
+  padding-right: 70px;
+}
+.nt-timeline.vertical.right .nt-timeline-before {
+  background: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat calc(100% - 30px)/2px 100%;
+}
+.nt-timeline.vertical.right .nt-timeline-after {
+  background: linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat calc(100% - 30px)/2px 100%;
+}
+.nt-timeline.vertical.right .nt-timeline-items {
+  background: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat calc(100% - 30px)/2px 100%;
+}
+.nt-timeline.vertical.right .nt-timeline-dot {
+  right: 21px;
+  top: 8px;
+}
+.nt-timeline.vertical.right .nt-timeline-dot.bigger {
+  top: 10px;
+  right: 10px;
+}
+
+.nt-timeline-items {
+  display: flex;
+  position: relative;
+}
+.nt-timeline-items > div {
+  min-height: 100px;
+  padding-top: 2px;
+  padding-bottom: 20px;
+}
+
+.nt-timeline-before {
+  content: "";
+  height: 15px;
+}
+
+.nt-timeline-after {
+  content: "";
+  height: 60px;
+  margin-bottom: 20px;
+}
+
+.nt-timeline-sub-title {
+  position: absolute;
+  width: 50%;
+  top: 4px;
+  font-size: 18px;
+  color: var(--nt-color-50);
+}
+
+[data-md-color-scheme=slate] .nt-timeline-sub-title {
+  color: var(--nt-color-51);
+}
+
+.nt-timeline-item {
+  position: relative;
+}
+
+.nt-timeline.vertical.center:not(.alternate) .nt-timeline-item {
+  padding-left: calc(50% + 40px);
+}
+.nt-timeline.vertical.center:not(.alternate) .nt-timeline-item .nt-timeline-sub-title {
+  left: 0;
+  padding-right: 40px;
+  text-align: right;
+}
+.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) {
+  padding-left: calc(50% + 40px);
+}
+.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) .nt-timeline-sub-title {
+  left: 0;
+  padding-right: 40px;
+  text-align: right;
+}
+.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) {
+  text-align: right;
+  padding-right: calc(50% + 40px);
+}
+.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) .nt-timeline-sub-title {
+  right: 0;
+  padding-left: 40px;
+  text-align: left;
+}
+
+.nt-timeline-dot {
+  position: relative;
+  width: 20px;
+  height: 20px;
+  border-radius: 100%;
+  background-color: #fc5b5b;
+  position: absolute;
+  top: 0px;
+  z-index: 2;
+  display: flex;
+  justify-content: center;
+  align-items: center;
+  box-shadow: 0 2px 1px -1px rgba(0, 0, 0, 0.2), 0 1px 1px 0 rgba(0, 0, 0, 0.14), 0 1px 3px 0 rgba(0, 0, 0, 0.12);
+  border: 3px solid white;
+}
+.nt-timeline-dot:not(.bigger) .icon {
+  font-size: 10px;
+}
+.nt-timeline-dot.bigger {
+  width: 40px;
+  height: 40px;
+  padding: 3px;
+}
+.nt-timeline-dot .icon {
+  color: white;
+  position: relative;
+  top: 1px;
+}
+
+/* Fix for webkit (Chrome, Safari) */
+@supports not (-moz-appearance: none) {
+  /*
+  This fix is necessary, for some reason, to render the timeline properly
+  inside `details` elements used by pymdownx. Firefox doesn't need this fix,
+  it renders elements properly.
+  */
+  details .nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) .nt-timeline-sub-title,
+details .nt-timeline.vertical.center:not(.alternate) .nt-timeline-item .nt-timeline-sub-title {
+    left: -40px;
+  }
+  details .nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) .nt-timeline-sub-title {
+    right: -40px;
+  }
+  details .nt-timeline.vertical.center .nt-timeline-dot {
+    left: calc(50% - 12px);
+  }
+  details .nt-timeline-dot.bigger {
+    font-size: 1rem !important;
+  }
+}
+/* default colors */
+.nt-timeline-item:nth-child(0) .nt-timeline-dot {
+  background-color: var(--nt-color-0);
+}
+
+.nt-timeline-item:nth-child(1) .nt-timeline-dot {
+  background-color: var(--nt-color-1);
+}
+
+.nt-timeline-item:nth-child(2) .nt-timeline-dot {
+  background-color: var(--nt-color-2);
+}
+
+.nt-timeline-item:nth-child(3) .nt-timeline-dot {
+  background-color: var(--nt-color-3);
+}
+
+.nt-timeline-item:nth-child(4) .nt-timeline-dot {
+  background-color: var(--nt-color-4);
+}
+
+.nt-timeline-item:nth-child(5) .nt-timeline-dot {
+  background-color: var(--nt-color-5);
+}
+
+.nt-timeline-item:nth-child(6) .nt-timeline-dot {
+  background-color: var(--nt-color-6);
+}
+
+.nt-timeline-item:nth-child(7) .nt-timeline-dot {
+  background-color: var(--nt-color-7);
+}
+
+.nt-timeline-item:nth-child(8) .nt-timeline-dot {
+  background-color: var(--nt-color-8);
+}
+
+.nt-timeline-item:nth-child(9) .nt-timeline-dot {
+  background-color: var(--nt-color-9);
+}
+
+.nt-timeline-item:nth-child(10) .nt-timeline-dot {
+  background-color: var(--nt-color-10);
+}
+
+.nt-timeline-item:nth-child(11) .nt-timeline-dot {
+  background-color: var(--nt-color-11);
+}
+
+.nt-timeline-item:nth-child(12) .nt-timeline-dot {
+  background-color: var(--nt-color-12);
+}
+
+.nt-timeline-item:nth-child(13) .nt-timeline-dot {
+  background-color: var(--nt-color-13);
+}
+
+.nt-timeline-item:nth-child(14) .nt-timeline-dot {
+  background-color: var(--nt-color-14);
+}
+
+.nt-timeline-item:nth-child(15) .nt-timeline-dot {
+  background-color: var(--nt-color-15);
+}
+
+.nt-timeline-item:nth-child(16) .nt-timeline-dot {
+  background-color: var(--nt-color-16);
+}
+
+.nt-timeline-item:nth-child(17) .nt-timeline-dot {
+  background-color: var(--nt-color-17);
+}
+
+.nt-timeline-item:nth-child(18) .nt-timeline-dot {
+  background-color: var(--nt-color-18);
+}
+
+.nt-timeline-item:nth-child(19) .nt-timeline-dot {
+  background-color: var(--nt-color-19);
+}
+
+.nt-timeline-item:nth-child(20) .nt-timeline-dot {
+  background-color: var(--nt-color-20);
+}
+
+/**
+ *  Extra CSS for the neoteroi.projects.gantt extension.
+ *
+ *  https://github.com/Neoteroi/mkdocs-plugins
+**/
+:root {
+  --nt-scrollbar-color: #2751b0;
+  --nt-plan-actions-height: 24px;
+  --nt-units-background: #ff9800;
+  --nt-months-background: #2751b0;
+  --nt-plan-vertical-line-color: #a3a3a3ad;
+}
+
+.nt-pastello {
+  --nt-scrollbar-color: #9fb8f4;
+  --nt-units-background: #f5dc82;
+  --nt-months-background: #5b7fd1;
+}
+
+[data-md-color-scheme=slate] {
+  --nt-units-background: #003773;
+}
+[data-md-color-scheme=slate] .nt-pastello {
+  --nt-units-background: #3f4997;
+}
+
+.nt-plan-root {
+  min-height: 200px;
+  scrollbar-width: 20px;
+  scrollbar-color: var(--nt-scrollbar-color);
+  display: flex;
+}
+.nt-plan-root ::-webkit-scrollbar {
+  width: 20px;
+}
+.nt-plan-root ::-webkit-scrollbar-track {
+  box-shadow: inset 0 0 5px grey;
+  border-radius: 10px;
+}
+.nt-plan-root ::-webkit-scrollbar-thumb {
+  background: var(--nt-scrollbar-color);
+  border-radius: 10px;
+}
+.nt-plan-root .nt-plan {
+  flex: 80%;
+}
+.nt-plan-root.no-groups .nt-plan-periods {
+  padding-left: 0;
+}
+.nt-plan-root.no-groups .nt-plan-group-summary {
+  display: none;
+}
+.nt-plan-root .nt-timeline-dot.bigger {
+  top: -10px;
+}
+.nt-plan-root .nt-timeline-dot.bigger[title] {
+  cursor: help;
+}
+
+.nt-plan {
+  white-space: nowrap;
+  overflow-x: auto;
+  display: flex;
+}
+.nt-plan .ug-timeline-dot {
+  left: 368px;
+  top: -8px;
+  cursor: help;
+}
+
+.months {
+  display: flex;
+}
+
+.month {
+  flex: auto;
+  display: inline-block;
+  box-shadow: rgba(0, 0, 0, 0.2) 0px 3px 1px -2px, rgba(0, 0, 0, 0.14) 0px 2px 2px 0px, rgba(0, 0, 0, 0.12) 0px 1px 5px 0px inset;
+  background-color: var(--nt-months-background);
+  color: white;
+  text-transform: uppercase;
+  font-family: Roboto, Helvetica, Arial, sans-serif;
+  padding: 2px 5px;
+  font-size: 12px;
+  border: 1px solid #000;
+  width: 150px;
+  border-radius: 8px;
+}
+
+.nt-plan-group-activities {
+  flex: auto;
+  position: relative;
+}
+
+.nt-vline {
+  border-left: 1px dashed var(--nt-plan-vertical-line-color);
+  height: 100%;
+  left: 0;
+  position: absolute;
+  margin-left: -0.5px;
+  top: 0;
+  -webkit-transition: all 0.5s linear !important;
+  -moz-transition: all 0.5s linear !important;
+  -ms-transition: all 0.5s linear !important;
+  -o-transition: all 0.5s linear !important;
+  transition: all 0.5s linear !important;
+  z-index: -2;
+}
+
+.nt-plan-activity {
+  display: flex;
+  margin: 2px 0;
+  background-color: rgba(187, 187, 187, 0.2509803922);
+}
+
+.actions {
+  height: var(--nt-plan-actions-height);
+}
+
+.actions {
+  position: relative;
+}
+
+.period {
+  display: inline-block;
+  height: var(--nt-plan-actions-height);
+  width: 120px;
+  position: absolute;
+  left: 0px;
+  background: #1da1f2;
+  border-radius: 5px;
+  transition: all 0.5s;
+  cursor: help;
+  -webkit-transition: width 1s ease-in-out;
+  -moz-transition: width 1s ease-in-out;
+  -o-transition: width 1s ease-in-out;
+  transition: width 1s ease-in-out;
+}
+.period .nt-tooltip {
+  display: none;
+  top: 30px;
+  position: relative;
+  padding: 1rem;
+  text-align: center;
+  font-size: 12px;
+}
+.period:hover .nt-tooltip {
+  display: inline-block;
+}
+
+.period-0 {
+  left: 340px;
+  visibility: visible;
+  background-color: rgb(69, 97, 101);
+}
+
+.period-1 {
+  left: 40px;
+  visibility: visible;
+  background-color: green;
+}
+
+.period-2 {
+  left: 120px;
+  visibility: visible;
+  background-color: pink;
+  width: 80px;
+}
+
+.period-3 {
+  left: 190px;
+  visibility: visible;
+  background-color: darkred;
+  width: 150px;
+}
+
+.weeks > span,
+.days > span {
+  height: 25px;
+}
+
+.weeks > span {
+  display: inline-block;
+  margin: 0;
+  padding: 0;
+  font-weight: bold;
+}
+.weeks > span .week-text {
+  font-size: 10px;
+  position: absolute;
+  display: inline-block;
+  padding: 3px 4px;
+}
+
+.days {
+  z-index: -2;
+  position: relative;
+}
+
+.day-text {
+  font-size: 10px;
+  position: absolute;
+  display: inline-block;
+  padding: 3px 4px;
+}
+
+.period span {
+  font-size: 12px;
+  vertical-align: top;
+  margin-left: 4px;
+  color: black;
+  background: rgba(255, 255, 255, 0.6588235294);
+  border-radius: 6px;
+  padding: 0 4px;
+}
+
+.weeks,
+.days {
+  height: 20px;
+  display: flex;
+  box-sizing: content-box;
+}
+
+.months {
+  display: flex;
+}
+
+.week,
+.day {
+  height: 20px;
+  position: relative;
+  border: 1;
+  flex: auto;
+  border: 2px solid white;
+  border-radius: 4px;
+  background-color: var(--nt-units-background);
+  cursor: help;
+}
+
+.years {
+  display: flex;
+}
+
+.year {
+  text-align: center;
+  border-right: 1px solid var(--nt-plan-vertical-line-color);
+  font-weight: bold;
+}
+.year:first-child {
+  border-left: 1px solid var(--nt-plan-vertical-line-color);
+}
+.year:first-child:last-child {
+  width: 100%;
+}
+
+.quarters {
+  display: flex;
+}
+
+.quarter {
+  width: 12.5%;
+  text-align: center;
+  border-right: 1px solid var(--nt-plan-vertical-line-color);
+  font-weight: bold;
+}
+.quarter:first-child {
+  border-left: 1px solid var(--nt-plan-vertical-line-color);
+}
+
+.nt-plan-group {
+  margin: 20px 0;
+  position: relative;
+}
+
+.nt-plan-group {
+  display: flex;
+}
+
+.nt-plan-group-summary {
+  background: #2751b0;
+  width: 150px;
+  white-space: normal;
+  padding: 0.1rem 0.5rem;
+  border-radius: 5px;
+  color: #fff;
+  z-index: 3;
+}
+.nt-plan-group-summary p {
+  margin: 0;
+  padding: 0;
+  font-size: 0.6rem;
+  color: #fff;
+}
+
+.nt-plan-group-summary,
+.month,
+.period,
+.week,
+.day,
+.nt-tooltip {
+  border: 3px solid white;
+  box-shadow: 0 2px 3px -1px rgba(0, 0, 0, 0.2), 0 3px 3px 0 rgba(0, 0, 0, 0.14), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
+}
+
+.nt-plan-periods {
+  padding-left: 150px;
+}
+
+.months {
+  z-index: 2;
+  position: relative;
+}
+
+.weeks {
+  position: relative;
+  top: -2px;
+  z-index: 0;
+}
+
+.month,
+.quarter,
+.year,
+.week,
+.day,
+.nt-tooltip {
+  font-family: Roboto, Helvetica, Arial, sans-serif;
+  box-sizing: border-box;
+}
+
+.nt-cards.nt-grid {
+  display: grid;
+  grid-auto-columns: 1fr;
+  gap: 0.5rem;
+  max-width: 100vw;
+  overflow-x: auto;
+  padding: 1px;
+}
+.nt-cards.nt-grid.cols-1 {
+  grid-template-columns: repeat(1, 1fr);
+}
+.nt-cards.nt-grid.cols-2 {
+  grid-template-columns: repeat(2, 1fr);
+}
+.nt-cards.nt-grid.cols-3 {
+  grid-template-columns: repeat(3, 1fr);
+}
+.nt-cards.nt-grid.cols-4 {
+  grid-template-columns: repeat(4, 1fr);
+}
+.nt-cards.nt-grid.cols-5 {
+  grid-template-columns: repeat(5, 1fr);
+}
+.nt-cards.nt-grid.cols-6 {
+  grid-template-columns: repeat(6, 1fr);
+}
+
+@media only screen and (max-width: 400px) {
+  .nt-cards.nt-grid {
+    grid-template-columns: repeat(1, 1fr) !important;
+  }
+}
+.nt-card {
+  box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14), 0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
+}
+.nt-card:hover {
+  box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.24), 0 3px 1px -2px rgba(0, 0, 0, 0.3), 0 1px 5px 0 rgba(0, 0, 0, 0.22);
+}
+
+[data-md-color-scheme=slate] .nt-card {
+  box-shadow: 0 2px 2px 0 rgba(4, 40, 33, 0.14), 0 3px 1px -2px rgba(40, 86, 94, 0.47), 0 1px 5px 0 rgba(139, 252, 255, 0.64);
+}
+[data-md-color-scheme=slate] .nt-card:hover {
+  box-shadow: 0 2px 2px 0 rgba(0, 255, 206, 0.14), 0 3px 1px -2px rgba(33, 156, 177, 0.47), 0 1px 5px 0 rgba(96, 251, 255, 0.64);
+}
+
+.nt-card > a {
+  color: var(--md-default-fg-color);
+}
+
+.nt-card > a > div {
+  cursor: pointer;
+}
+
+.nt-card {
+  padding: 5px;
+  margin-bottom: 0.5rem;
+}
+
+.nt-card-title {
+  font-size: 1rem;
+  font-weight: bold;
+  margin: 4px 0 8px 0;
+  line-height: 22px;
+}
+
+.nt-card-content {
+  padding: 0.4rem 0.8rem 0.8rem 0.8rem;
+}
+
+.nt-card-text {
+  font-size: 14px;
+  padding: 0;
+  margin: 0;
+}
+
+.nt-card .nt-card-image {
+  text-align: center;
+  border-radius: 2px;
+  background-position: center center;
+  background-size: cover;
+  background-repeat: no-repeat;
+  min-height: 120px;
+}
+
+.nt-card .nt-card-image.tags img {
+  margin-top: 12px;
+}
+
+.nt-card .nt-card-image img {
+  height: 105px;
+  margin-top: 5px;
+}
+
+.nt-card .nt-card-icon {
+  text-align: center;
+  padding-top: 12px;
+  min-height: 120px;
+}
+
+.nt-card .nt-card-icon .icon {
+  font-size: 95px;
+  line-height: 1;
+}
+
+.nt-card a:hover,
+.nt-card a:focus {
+  color: var(--md-accent-fg-color);
+}
+
+.nt-card h2 {
+  margin: 0;
+}
+
+/**
+ *   Extra CSS file recommended for MkDocs and neoteroi.spantable extension.
+ *
+ *   https://github.com/Neoteroi/mkdocs-plugins
+**/
+.span-table-wrapper table {
+  border-collapse: collapse;
+  margin-bottom: 2rem;
+  border-radius: 0.1rem;
+}
+
+.span-table td,
+.span-table th {
+  padding: 0.2rem;
+  background-color: var(--md-default-bg-color);
+  font-size: 0.64rem;
+  max-width: 100%;
+  overflow: auto;
+  touch-action: auto;
+  border-top: 0.05rem solid var(--md-typeset-table-color);
+  padding: 0.9375em 1.25em;
+  vertical-align: top;
+}
+
+.span-table tr:first-child td {
+  font-weight: 700;
+  min-width: 5rem;
+  padding: 0.9375em 1.25em;
+  vertical-align: top;
+}
+
+.span-table td:first-child {
+  border-left: 0.05rem solid var(--md-typeset-table-color);
+}
+
+.span-table td:last-child {
+  border-right: 0.05rem solid var(--md-typeset-table-color);
+}
+
+.span-table tr:last-child {
+  border-bottom: 0.05rem solid var(--md-typeset-table-color);
+}
+
+.span-table [colspan],
+.span-table [rowspan] {
+  font-weight: bold;
+  border: 0.05rem solid var(--md-typeset-table-color);
+}
+
+.span-table tr:not(:first-child):hover td:not([colspan]):not([rowspan]),
+.span-table td[colspan]:hover,
+.span-table td[rowspan]:hover {
+  background-color: rgba(0, 0, 0, 0.035);
+  box-shadow: 0 0.05rem 0 var(--md-default-bg-color) inset;
+  transition: background-color 125ms;
+}
+
+.nt-contribs {
+  margin-top: 2rem;
+  font-size: small;
+  border-top: 1px dotted lightgray;
+  padding-top: 0.5rem;
+}
+.nt-contribs .nt-contributors {
+  padding-top: 0.5rem;
+  display: flex;
+  flex-wrap: wrap;
+}
+.nt-contribs .nt-contributor {
+  background: lightgrey;
+  background-size: cover;
+  width: 40px;
+  height: 40px;
+  border-radius: 100%;
+  margin: 0 6px 6px 0;
+  cursor: help;
+  opacity: 0.7;
+}
+.nt-contribs .nt-contributor:hover {
+  opacity: 1;
+}
+.nt-contribs .nt-contributors-title {
+  font-style: italic;
+  margin-bottom: 0;
+}
+.nt-contribs .nt-initials {
+  text-transform: uppercase;
+  font-size: 20px;
+  text-align: center;
+  width: 40px;
+  height: 40px;
+  display: inline-block;
+  vertical-align: middle;
+  position: relative;
+  top: 4px;
+  color: inherit;
+  font-weight: bold;
+}
+.nt-contribs .nt-group-0 {
+  background-color: var(--nt-color-0);
+}
+.nt-contribs .nt-group-1 {
+  background-color: var(--nt-color-1);
+}
+.nt-contribs .nt-group-2 {
+  background-color: var(--nt-color-2);
+}
+.nt-contribs .nt-group-3 {
+  background-color: var(--nt-color-3);
+}
+.nt-contribs .nt-group-4 {
+  background-color: var(--nt-color-4);
+}
+.nt-contribs .nt-group-5 {
+  background-color: var(--nt-color-5);
+}
+.nt-contribs .nt-group-6 {
+  background-color: var(--nt-color-6);
+}
+.nt-contribs .nt-group-7 {
+  color: #000;
+  background-color: var(--nt-color-7);
+}
+.nt-contribs .nt-group-8 {
+  color: #000;
+  background-color: var(--nt-color-8);
+}
+.nt-contribs .nt-group-9 {
+  background-color: var(--nt-color-9);
+}
+.nt-contribs .nt-group-10 {
+  background-color: var(--nt-color-10);
+}
+.nt-contribs .nt-group-11 {
+  background-color: var(--nt-color-11);
+}
+.nt-contribs .nt-group-12 {
+  background-color: var(--nt-color-12);
+}
+.nt-contribs .nt-group-13 {
+  background-color: var(--nt-color-13);
+}
+.nt-contribs .nt-group-14 {
+  background-color: var(--nt-color-14);
+}
+.nt-contribs .nt-group-15 {
+  color: #000;
+  background-color: var(--nt-color-15);
+}
+.nt-contribs .nt-group-16 {
+  background-color: var(--nt-color-16);
+}
+.nt-contribs .nt-group-17 {
+  color: #000;
+  background-color: var(--nt-color-17);
+}
+.nt-contribs .nt-group-18 {
+  background-color: var(--nt-color-18);
+}
+.nt-contribs .nt-group-19 {
+  background-color: var(--nt-color-19);
+}
+.nt-contribs .nt-group-20 {
+  color: #000;
+  background-color: var(--nt-color-20);
+}
+.nt-contribs .nt-group-21 {
+  color: #000;
+  background-color: var(--nt-color-21);
+}
+.nt-contribs .nt-group-22 {
+  color: #000;
+  background-color: var(--nt-color-22);
+}
+.nt-contribs .nt-group-23 {
+  color: #000;
+  background-color: var(--nt-color-23);
+}
+.nt-contribs .nt-group-24 {
+  color: #000;
+  background-color: var(--nt-color-24);
+}
+.nt-contribs .nt-group-25 {
+  color: #000;
+  background-color: var(--nt-color-25);
+}
+.nt-contribs .nt-group-26 {
+  color: #000;
+  background-color: var(--nt-color-26);
+}
+.nt-contribs .nt-group-27 {
+  background-color: var(--nt-color-27);
+}
+.nt-contribs .nt-group-28 {
+  color: #000;
+  background-color: var(--nt-color-28);
+}
+.nt-contribs .nt-group-29 {
+  color: #000;
+  background-color: var(--nt-color-29);
+}
+.nt-contribs .nt-group-30 {
+  background-color: var(--nt-color-30);
+}
+.nt-contribs .nt-group-31 {
+  background-color: var(--nt-color-31);
+}
+.nt-contribs .nt-group-32 {
+  color: #000;
+  background-color: var(--nt-color-32);
+}
+.nt-contribs .nt-group-33 {
+  background-color: var(--nt-color-33);
+}
+.nt-contribs .nt-group-34 {
+  background-color: var(--nt-color-34);
+}
+.nt-contribs .nt-group-35 {
+  background-color: var(--nt-color-35);
+}
+.nt-contribs .nt-group-36 {
+  background-color: var(--nt-color-36);
+}
+.nt-contribs .nt-group-37 {
+  background-color: var(--nt-color-37);
+}
+.nt-contribs .nt-group-38 {
+  background-color: var(--nt-color-38);
+}
+.nt-contribs .nt-group-39 {
+  color: #000;
+  background-color: var(--nt-color-39);
+}
+.nt-contribs .nt-group-40 {
+  color: #000;
+  background-color: var(--nt-color-40);
+}
+.nt-contribs .nt-group-41 {
+  color: #000;
+  background-color: var(--nt-color-41);
+}
+.nt-contribs .nt-group-42 {
+  color: #000;
+  background-color: var(--nt-color-42);
+}
+.nt-contribs .nt-group-43 {
+  color: #000;
+  background-color: var(--nt-color-43);
+}
+.nt-contribs .nt-group-44 {
+  color: #000;
+  background-color: var(--nt-color-44);
+}
+.nt-contribs .nt-group-45 {
+  background-color: var(--nt-color-45);
+}
+.nt-contribs .nt-group-46 {
+  color: #000;
+  background-color: var(--nt-color-46);
+}
+.nt-contribs .nt-group-47 {
+  background-color: var(--nt-color-47);
+}
+.nt-contribs .nt-group-48 {
+  background-color: var(--nt-color-48);
+}
+.nt-contribs .nt-group-49 {
+  background-color: var(--nt-color-49);
+}
+
+/**
+ *   CSS for OpenAPI HTML generated with PyMdown Extensions option.
+ *
+ *   This CSS file works when using the OAD plugin with pymdownx.
+ *   See here how to use it:
+ *   https://www.neoteroi.dev/mkdocs-plugins/web/oad/
+ *
+ *   https://github.com/Neoteroi/mkdocs-plugins
+**/
+:root {
+  --http-get-color: green;
+  --http-delete-color: #dc0101;
+  --http-head-color: slateblue;
+  --http-options-color: steelblue;
+  --http-patch-color: darkorange;
+  --http-post-color: darkblue;
+  --http-put-color: darkmagenta;
+  --http-trace-color: darkcyan;
+  --http-route-param-color: rgb(51, 128, 210);
+  --oad-operation-separator-border-color: gray;
+  --oad-block-border-color: #00bfa5;
+  --oad-small-note-color: #666;
+  --oad-indent-border-color: #c5c5c5;
+}
+
+@media screen {
+  /* Slate theme, i.e. dark mode */
+  [data-md-color-scheme=slate] {
+    --http-get-color: #2ea82e;
+    --http-post-color: #0093c0;
+    --http-put-color: #c333c3;
+    --oad-small-note-color: #afafaf;
+  }
+}
+.api-tag {
+  font-weight: bold;
+}
+
+span[class^=http-] {
+  font-weight: bold;
+  color: #fff;
+  padding: 4px 1rem;
+  border-radius: 2px;
+  margin-right: 0.5rem;
+}
+
+.http-get {
+  background-color: var(--http-get-color);
+}
+
+.http-delete {
+  background-color: var(--http-delete-color);
+}
+
+.http-post {
+  background-color: var(--http-post-color);
+}
+
+.http-patch {
+  background-color: var(--http-patch-color);
+}
+
+.http-trace {
+  background-color: var(--http-trace-color);
+}
+
+.http-put {
+  background-color: var(--http-put-color);
+}
+
+.http-head {
+  background-color: var(--http-head-color);
+}
+
+.http-options {
+  background-color: var(--http-options-color);
+}
+
+.route-param {
+  color: var(--http-route-param-color);
+}
+
+.operation-separator + h3[id^=get] .route-param {
+  color: var(--http-get-color);
+}
+
+.operation-separator + h3[id^=delete] .route-param {
+  color: var(--http-delete-color);
+}
+
+.operation-separator + h3[id^=post] .route-param {
+  color: var(--http-post-color);
+}
+
+.operation-separator + h3[id^=patch] .route-param {
+  color: var(--http-patch-color);
+}
+
+.operation-separator + h3[id^=trace] .route-param {
+  color: var(--http-trace-color);
+}
+
+.operation-separator + h3[id^=put] .route-param {
+  color: var(--http-put-color);
+}
+
+.operation-separator + h3[id^=head] .route-param {
+  color: var(--http-head-color);
+}
+
+.operation-separator + h3[id^=options] .route-param {
+  color: var(--http-options-color);
+}
+
+.api-version {
+  font-size: 1.2rem;
+}
+
+.operation-separator {
+  margin: 0 !important;
+  border-bottom: 2px dotted var(--oad-operation-separator-border-color) !important;
+  padding-top: 0.5rem;
+}
+
+.operation-separator + h3 {
+  margin-top: 1rem;
+}
+
+.string-type {
+  color: var(--md-code-hl-string-color);
+}
+
+.integer-type, .number-type {
+  color: var(--md-code-hl-number-color);
+}
+
+.boolean-type {
+  color: var(--md-code-hl-keyword-color);
+}
+
+.format {
+  color: var(--md-code-hl-name-color);
+}
+
+.null-type {
+  color: var(--md-code-hl-keyword-color);
+}
+
+a.ref-link {
+  color: var(--md-code-hl-special-color);
+}
+
+.request-block + div {
+  padding-left: 1rem;
+  border-left: 2px dashed var(--oad-block-border-color);
+}
+
+.small-note {
+  font-size: 14px;
+  color: var(--oad-small-note-color);
+}
+
+.request-body-title {
+  margin-bottom: 4px;
+}
+
+.request-body-title + .tabbed-set,
+.response-title + .tabbed-set,
+.message-separator + .tabbed-set,
+.common-response,
+.response-section {
+  margin-top: 2px;
+  padding-left: 1rem;
+  border-left: 2px dotted var(--oad-indent-border-color);
+}
+
+.info-data {
+  font-size: 0.6rem;
+}
+
+.message-separator {
+  visibility: hidden;
+}
+
+.sub-section-title {
+  font-style: italic;
+  font-size: 14px;
+}
diff --git a/apidocs/docs.go b/docs/docs.go
similarity index 53%
rename from apidocs/docs.go
rename to docs/docs.go
index 4b521b1..f46ac36 100644
--- a/apidocs/docs.go
+++ b/docs/docs.go
@@ -1,5 +1,5 @@
-// Package apidocs Code generated by swaggo/swag. DO NOT EDIT
-package apidocs
+// Package docs Code generated by swaggo/swag. DO NOT EDIT
+package docs
 
 import "github.com/swaggo/swag"
 
@@ -19,7 +19,7 @@ const docTemplate = `{
     "host": "{{.Host}}",
     "basePath": "{{.BasePath}}",
     "paths": {
-        "/backends/llama-cpp/devices": {
+        "/api/v1/backends/llama-cpp/devices": {
             "get": {
                 "security": [
                     {
@@ -28,7 +28,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns a list of available devices for the llama server",
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "List available devices for llama server",
                 "responses": {
@@ -47,7 +47,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/backends/llama-cpp/help": {
+        "/api/v1/backends/llama-cpp/help": {
             "get": {
                 "security": [
                     {
@@ -56,7 +56,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns the help text for the llama server command",
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Get help for llama server",
                 "responses": {
@@ -75,7 +75,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/backends/llama-cpp/parse-command": {
+        "/api/v1/backends/llama-cpp/parse-command": {
             "post": {
                 "security": [
                     {
@@ -90,7 +90,7 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Parse llama-server command",
                 "parameters": [
@@ -108,7 +108,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Parsed options",
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     },
                     "400": {
@@ -132,7 +132,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/backends/llama-cpp/version": {
+        "/api/v1/backends/llama-cpp/version": {
             "get": {
                 "security": [
                     {
@@ -141,7 +141,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns the version of the llama server command",
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Get version of llama server",
                 "responses": {
@@ -160,7 +160,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/backends/mlx/parse-command": {
+        "/api/v1/backends/mlx/parse-command": {
             "post": {
                 "security": [
                     {
@@ -175,7 +175,7 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Parse mlx_lm.server command",
                 "parameters": [
@@ -193,7 +193,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Parsed options",
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     },
                     "400": {
@@ -208,7 +208,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/backends/vllm/parse-command": {
+        "/api/v1/backends/vllm/parse-command": {
             "post": {
                 "security": [
                     {
@@ -223,7 +223,7 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Parse vllm serve command",
                 "parameters": [
@@ -241,7 +241,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Parsed options",
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     },
                     "400": {
@@ -256,7 +256,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances": {
+        "/api/v1/instances": {
             "get": {
                 "security": [
                     {
@@ -265,7 +265,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns a list of all instances managed by the server",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "List all instances",
                 "responses": {
@@ -274,7 +274,7 @@ const docTemplate = `{
                         "schema": {
                             "type": "array",
                             "items": {
-                                "$ref": "#/definitions/instance.Process"
+                                "$ref": "#/definitions/instance.Instance"
                             }
                         }
                     },
@@ -287,7 +287,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances/{name}": {
+        "/api/v1/instances/{name}": {
             "get": {
                 "security": [
                     {
@@ -296,7 +296,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns the details of a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Get details of a specific instance",
                 "parameters": [
@@ -312,7 +312,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -340,7 +340,7 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Update an instance's configuration",
                 "parameters": [
@@ -357,7 +357,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     }
                 ],
@@ -365,7 +365,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Updated instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -393,7 +393,7 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Create and start a new instance",
                 "parameters": [
@@ -410,7 +410,7 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     }
                 ],
@@ -418,7 +418,7 @@ const docTemplate = `{
                     "201": {
                         "description": "Created instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -443,7 +443,7 @@ const docTemplate = `{
                 ],
                 "description": "Stops and removes a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Delete an instance",
                 "parameters": [
@@ -474,7 +474,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances/{name}/logs": {
+        "/api/v1/instances/{name}/logs": {
             "get": {
                 "security": [
                     {
@@ -483,7 +483,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns the logs from a specific instance by name with optional line limit",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Get logs from a specific instance",
                 "parameters": [
@@ -523,7 +523,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances/{name}/proxy": {
+        "/api/v1/instances/{name}/proxy": {
             "get": {
                 "security": [
                     {
@@ -532,9 +532,9 @@ const docTemplate = `{
                 ],
                 "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
-                "summary": "Proxy requests to a specific instance",
+                "summary": "Proxy requests to a specific instance, does not autostart instance if stopped",
                 "parameters": [
                     {
                         "type": "string",
@@ -576,9 +576,9 @@ const docTemplate = `{
                 ],
                 "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
-                "summary": "Proxy requests to a specific instance",
+                "summary": "Proxy requests to a specific instance, does not autostart instance if stopped",
                 "parameters": [
                     {
                         "type": "string",
@@ -613,7 +613,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances/{name}/restart": {
+        "/api/v1/instances/{name}/restart": {
             "post": {
                 "security": [
                     {
@@ -622,7 +622,7 @@ const docTemplate = `{
                 ],
                 "description": "Restarts a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Restart a running instance",
                 "parameters": [
@@ -638,7 +638,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Restarted instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -656,7 +656,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances/{name}/start": {
+        "/api/v1/instances/{name}/start": {
             "post": {
                 "security": [
                     {
@@ -665,7 +665,7 @@ const docTemplate = `{
                 ],
                 "description": "Starts a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Start a stopped instance",
                 "parameters": [
@@ -681,7 +681,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Started instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -699,7 +699,7 @@ const docTemplate = `{
                 }
             }
         },
-        "/instances/{name}/stop": {
+        "/api/v1/instances/{name}/stop": {
             "post": {
                 "security": [
                     {
@@ -708,7 +708,7 @@ const docTemplate = `{
                 ],
                 "description": "Stops a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Stop a running instance",
                 "parameters": [
@@ -724,7 +724,7 @@ const docTemplate = `{
                     "200": {
                         "description": "Stopped instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -742,6 +742,675 @@ const docTemplate = `{
                 }
             }
         },
+        "/api/v1/nodes": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)",
+                "tags": [
+                    "Nodes"
+                ],
+                "summary": "List all configured nodes",
+                "responses": {
+                    "200": {
+                        "description": "Map of nodes",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "$ref": "#/definitions/server.NodeResponse"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/nodes/{name}": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns the details of a specific node by name",
+                "tags": [
+                    "Nodes"
+                ],
+                "summary": "Get details of a specific node",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Node Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Node details",
+                        "schema": {
+                            "$ref": "#/definitions/server.NodeResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid name format",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "404": {
+                        "description": "Node not found",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/version": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns the version of the llamactl command",
+                "tags": [
+                    "System"
+                ],
+                "summary": "Get llamactl version",
+                "responses": {
+                    "200": {
+                        "description": "Version information",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the llama.cpp UI for the specified instance",
+                "produces": [
+                    "text/html"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp UI for the instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "query",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied HTML response",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/apply-template": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/completion": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/detokenize": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/embeddings": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/infill": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/metrics": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/props": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            },
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/reranking": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/slots": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/tokenize": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
         "/v1/": {
             "post": {
                 "security": [
@@ -754,7 +1423,7 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "openai"
+                    "OpenAI"
                 ],
                 "summary": "OpenAI-compatible proxy endpoint",
                 "responses": {
@@ -785,7 +1454,7 @@ const docTemplate = `{
                 ],
                 "description": "Returns a list of instances in a format compatible with OpenAI API",
                 "tags": [
-                    "openai"
+                    "OpenAI"
                 ],
                 "summary": "List instances in OpenAI-compatible format",
                 "responses": {
@@ -803,63 +1472,34 @@ const docTemplate = `{
                     }
                 }
             }
-        },
-        "/version": {
-            "get": {
-                "security": [
-                    {
-                        "ApiKeyAuth": []
-                    }
-                ],
-                "description": "Returns the version of the llamactl command",
-                "tags": [
-                    "version"
-                ],
-                "summary": "Get llamactl version",
-                "responses": {
-                    "200": {
-                        "description": "Version information",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                }
-            }
         }
     },
     "definitions": {
-        "backends.BackendType": {
-            "type": "string",
-            "enum": [
-                "llama_cpp",
-                "mlx_lm",
-                "vllm"
-            ],
-            "x-enum-varnames": [
-                "BackendTypeLlamaCpp",
-                "BackendTypeMlxLm",
-                "BackendTypeVllm"
-            ]
+        "instance.Instance": {
+            "type": "object",
+            "properties": {
+                "created": {
+                    "description": "Unix timestamp when the instance was created",
+                    "type": "integer"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
         },
-        "instance.CreateInstanceOptions": {
+        "instance.Options": {
             "type": "object",
             "properties": {
                 "auto_restart": {
                     "description": "Auto restart",
                     "type": "boolean"
                 },
-                "backend_options": {
+                "environment": {
+                    "description": "Environment variables",
                     "type": "object",
-                    "additionalProperties": {}
-                },
-                "backend_type": {
-                    "$ref": "#/definitions/backends.BackendType"
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
                 "idle_timeout": {
                     "description": "Idle timeout",
@@ -878,36 +1518,11 @@ const docTemplate = `{
                 }
             }
         },
-        "instance.InstanceStatus": {
-            "type": "integer",
-            "enum": [
-                0,
-                1,
-                2
-            ],
-            "x-enum-varnames": [
-                "Stopped",
-                "Running",
-                "Failed"
-            ]
-        },
-        "instance.Process": {
+        "server.NodeResponse": {
             "type": "object",
             "properties": {
-                "created": {
-                    "description": "Creation time",
-                    "type": "integer"
-                },
-                "name": {
+                "address": {
                     "type": "string"
-                },
-                "status": {
-                    "description": "Status",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/instance.InstanceStatus"
-                        }
-                    ]
                 }
             }
         },
@@ -950,6 +1565,13 @@ const docTemplate = `{
                 }
             }
         }
+    },
+    "securityDefinitions": {
+        "ApiKeyAuth": {
+            "type": "apiKey",
+            "name": "X-API-Key",
+            "in": "header"
+        }
     }
 }`
 
diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md
deleted file mode 100644
index b6846e3..0000000
--- a/docs/getting-started/quick-start.md
+++ /dev/null
@@ -1,190 +0,0 @@
-# Quick Start
-
-This guide will help you get Llamactl up and running in just a few minutes.
-
-## Step 1: Start Llamactl
-
-Start the Llamactl server:
-
-```bash
-llamactl
-```
-
-By default, Llamactl will start on `http://localhost:8080`.
-
-## Step 2: Access the Web UI
-
-Open your web browser and navigate to:
-
-```
-http://localhost:8080
-```
-
-Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
-
-You should see the Llamactl web interface.
-
-## Step 3: Create Your First Instance
-
-1. Click the "Add Instance" button
-2. Fill in the instance configuration:
-   - **Name**: Give your instance a descriptive name
-   - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
-   - **Model**: Model path or identifier for your chosen backend
-   - **Additional Options**: Backend-specific parameters
-
-3. Click "Create Instance"
-
-## Step 4: Start Your Instance
-
-Once created, you can:
-
-- **Start** the instance by clicking the start button
-- **Monitor** its status in real-time
-- **View logs** by clicking the logs button
-- **Stop** the instance when needed
-
-## Example Configurations
-
-Here are basic example configurations for each backend:
-
-**llama.cpp backend:**
-```json
-{
-  "name": "llama2-7b",
-  "backend_type": "llama_cpp",
-  "backend_options": {
-    "model": "/path/to/llama-2-7b-chat.gguf",
-    "threads": 4,
-    "ctx_size": 2048,
-    "gpu_layers": 32
-  }
-}
-```
-
-**MLX backend (macOS only):**
-```json
-{
-  "name": "mistral-mlx",
-  "backend_type": "mlx_lm",
-  "backend_options": {
-    "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
-    "temp": 0.7,
-    "max_tokens": 2048
-  }
-}
-```
-
-**vLLM backend:**
-```json
-{
-  "name": "dialogpt-vllm",
-  "backend_type": "vllm",
-  "backend_options": {
-    "model": "microsoft/DialoGPT-medium",
-    "tensor_parallel_size": 2,
-    "gpu_memory_utilization": 0.9
-  }
-}
-```
-
-## Docker Support
-
-Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
-
-```yaml
-backends:
-  vllm:
-    command: "vllm"
-    args: ["serve"]
-    docker:
-      enabled: true
-      image: "vllm/vllm-openai:latest"
-      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
-```
-
-## Using the API
-
-You can also manage instances via the REST API:
-
-```bash
-# List all instances
-curl http://localhost:8080/api/instances
-
-# Create a new llama.cpp instance
-curl -X POST http://localhost:8080/api/instances/my-model \
-  -H "Content-Type: application/json" \
-  -d '{
-    "backend_type": "llama_cpp",
-    "backend_options": {
-      "model": "/path/to/model.gguf"
-    }
-  }'
-
-# Start an instance
-curl -X POST http://localhost:8080/api/instances/my-model/start
-```
-
-## OpenAI Compatible API
-
-Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
-
-### Chat Completions
-
-Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
-
-```bash
-curl -X POST http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "my-model",
-    "messages": [
-      {
-        "role": "user",
-        "content": "Hello! Can you help me write a Python function?"
-      }
-    ],
-    "max_tokens": 150,
-    "temperature": 0.7
-  }'
-```
-
-### Using with Python OpenAI Client
-
-You can also use the official OpenAI Python client:
-
-```python
-from openai import OpenAI
-
-# Point the client to your Llamactl server
-client = OpenAI(
-    base_url="http://localhost:8080/v1",
-    api_key="not-needed"  # Llamactl doesn't require API keys by default
-)
-
-# Create a chat completion
-response = client.chat.completions.create(
-    model="my-model",  # Use the name of your instance
-    messages=[
-        {"role": "user", "content": "Explain quantum computing in simple terms"}
-    ],
-    max_tokens=200,
-    temperature=0.7
-)
-
-print(response.choices[0].message.content)
-```
-
-### List Available Models
-
-Get a list of running instances (models) in OpenAI-compatible format:
-
-```bash
-curl http://localhost:8080/v1/models
-```
-
-## Next Steps
-
-- Manage instances [Managing Instances](../user-guide/managing-instances.md)
-- Explore the [API Reference](../user-guide/api-reference.md)
-- Configure advanced settings in the [Configuration](configuration.md) guide
diff --git a/docs/images/create_instance.png b/docs/images/create_instance.png
index c1ce856..9972f40 100644
Binary files a/docs/images/create_instance.png and b/docs/images/create_instance.png differ
diff --git a/docs/images/dashboard.png b/docs/images/dashboard.png
index 393f374..55fe728 100644
Binary files a/docs/images/dashboard.png and b/docs/images/dashboard.png differ
diff --git a/docs/index.md b/docs/index.md
index 501d426..e81fed3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,20 +14,20 @@ Welcome to the Llamactl documentation!
 
 ## Quick Links
 
-- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
-- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
-- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
-- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
-- [API Reference](user-guide/api-reference.md) - Complete API documentation
+- [Installation Guide](installation.md) - Get Llamactl up and running
+- [Configuration Guide](configuration.md) - Detailed configuration options
+- [Quick Start](quick-start.md) - Your first steps with Llamactl
+- [Managing Instances](managing-instances.md) - Instance lifecycle management
+- [API Reference](api-reference.md) - Complete API documentation
 
 
 ## Getting Help
 
 If you need help or have questions:
 
-- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
+- Check the [Troubleshooting](troubleshooting.md) guide
 - Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
-- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
+- Review the [Configuration Guide](configuration.md) for advanced settings
 
 ## License
 
diff --git a/docs/getting-started/installation.md b/docs/installation.md
similarity index 93%
rename from docs/getting-started/installation.md
rename to docs/installation.md
index 413e1fc..1e4f4ae 100644
--- a/docs/getting-started/installation.md
+++ b/docs/installation.md
@@ -42,15 +42,10 @@ Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc
 vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
 
 ```bash
-# Install via pip (requires Python 3.8+, GPU required)
-pip install vllm
-
-# Or in a virtual environment (recommended)
+# Install in a virtual environment
 python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
-
-# For production deployments, consider container-based installation
 ```
 
 ## Installation Methods
@@ -82,7 +77,7 @@ llamactl provides Dockerfiles for creating Docker images with backends pre-insta
 
 **Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
 
-#### Using Docker Compose
+**Using Docker Compose**
 
 ```bash
 # Clone the repository
@@ -103,9 +98,9 @@ Access the dashboard at:
 - llamactl with llama.cpp: http://localhost:8080
 - llamactl with vLLM: http://localhost:8081
 
-#### Using Docker Build and Run
+**Using Docker Build and Run**
 
-**llamactl with llama.cpp CUDA:**
+1. llamactl with llama.cpp CUDA:
 ```bash
 docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
 docker run -d \
@@ -116,7 +111,7 @@ docker run -d \
   llamactl:llamacpp-cuda
 ```
 
-**llamactl with vLLM CUDA:**
+2. llamactl with vLLM CUDA:
 ```bash
 docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
 docker run -d \
@@ -127,7 +122,7 @@ docker run -d \
   llamactl:vllm-cuda
 ```
 
-**llamactl built from source:**
+3. llamactl built from source:
 ```bash
 docker build -f docker/Dockerfile.source -t llamactl:source .
 docker run -d \
diff --git a/docs/user-guide/managing-instances.md b/docs/managing-instances.md
similarity index 65%
rename from docs/user-guide/managing-instances.md
rename to docs/managing-instances.md
index b02de2d..d67c0c8 100644
--- a/docs/user-guide/managing-instances.md
+++ b/docs/managing-instances.md
@@ -9,13 +9,17 @@ Llamactl provides two ways to manage instances:
 - **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
 - **REST API**: Programmatic access for automation and integration
 
-![Dashboard Screenshot](../images/dashboard.png)
+![Dashboard Screenshot](images/dashboard.png)
 
 ### Authentication
 
-If authentication is enabled:
+Llamactl uses a **Management API Key** to authenticate requests to the management API (creating, starting, stopping instances). All curl examples below use `<token>` as a placeholder - replace this with your actual Management API Key.
+
+By default, authentication is required. If you don't configure a management API key in your configuration file, llamactl will auto-generate one and print it to the terminal on startup. See the [Configuration](configuration.md) guide for details.
+
+For Web UI access:
 1. Navigate to the web UI
-2. Enter your credentials
+2. Enter your Management API Key
 3. Bearer token is stored for the session
 
 ### Theme Support
@@ -33,9 +37,9 @@ Each instance is displayed as a card showing:
 
 ## Create Instance
 
-### Via Web UI
+**Via Web UI**
 
-![Create Instance Screenshot](../images/create_instance.png)
+![Create Instance Screenshot](images/create_instance.png)
 
 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
@@ -59,14 +63,19 @@ Each instance is displayed as a card showing:
     - **llama.cpp**: Threads, context size, GPU layers, port, etc.
     - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
     - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
+
+!!! tip "Auto-Assignment"
+    Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+
 8. Click **"Create"** to save the instance  
 
-### Via API
+**Via API**
 
 ```bash
 # Create llama.cpp instance with local model file
-curl -X POST http://localhost:8080/api/instances/my-llama-instance \
+curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \
   -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
   -d '{
     "backend_type": "llama_cpp",
     "backend_options": {
@@ -74,12 +83,14 @@ curl -X POST http://localhost:8080/api/instances/my-llama-instance \
       "threads": 8,
       "ctx_size": 4096,
       "gpu_layers": 32
-    }
+    },
+    "nodes": ["main"]
   }'
 
 # Create MLX instance (macOS only)
-curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
+curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
   -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
   -d '{
     "backend_type": "mlx_lm",
     "backend_options": {
@@ -89,12 +100,14 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
       "max_tokens": 2048
     },
     "auto_restart": true,
-    "max_restarts": 3
+    "max_restarts": 3,
+    "nodes": ["main"]
   }'
 
 # Create vLLM instance
-curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
+curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
   -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
   -d '{
     "backend_type": "vllm",
     "backend_options": {
@@ -108,24 +121,28 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
       "CUDA_VISIBLE_DEVICES": "0,1",
       "NCCL_DEBUG": "INFO",
       "PYTHONPATH": "/custom/path"
-    }
+    },
+    "nodes": ["main"]
   }'
 
 # Create llama.cpp instance with HuggingFace model
-curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
+curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \
   -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
   -d '{
     "backend_type": "llama_cpp",
     "backend_options": {
       "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
       "hf_file": "gemma-3-27b-it-GGUF.gguf",
       "gpu_layers": 32
-    }
+    },
+    "nodes": ["main"]
   }'
 
 # Create instance on specific remote node
-curl -X POST http://localhost:8080/api/instances/remote-llama \
+curl -X POST http://localhost:8080/api/v1/instances/remote-llama \
   -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
   -d '{
     "backend_type": "llama_cpp",
     "backend_options": {
@@ -134,46 +151,62 @@ curl -X POST http://localhost:8080/api/instances/remote-llama \
     },
     "nodes": ["worker1"]
   }'
+
+# Create instance on multiple nodes for high availability
+curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1", "worker2", "worker3"]
+  }'
 ```
 
 ## Start Instance
 
-### Via Web UI
+**Via Web UI**
 1. Click the **"Start"** button on an instance card
 2. Watch the status change to "Unknown"
 3. Monitor progress in the logs
 4. Instance status changes to "Ready" when ready
 
-### Via API
+**Via API**
 ```bash
-curl -X POST http://localhost:8080/api/instances/{name}/start
+curl -X POST http://localhost:8080/api/v1/instances/{name}/start \
+  -H "Authorization: Bearer <token>"
 ```
 
 ## Stop Instance
 
-### Via Web UI
+**Via Web UI**
 1. Click the **"Stop"** button on an instance card
 2. Instance gracefully shuts down
 
-### Via API
+**Via API**
 ```bash
-curl -X POST http://localhost:8080/api/instances/{name}/stop
+curl -X POST http://localhost:8080/api/v1/instances/{name}/stop \
+  -H "Authorization: Bearer <token>"
 ```
 
 ## Edit Instance
 
-### Via Web UI
+**Via Web UI**
 1. Click the **"Edit"** button on an instance card
 2. Modify settings in the configuration dialog
 3. Changes require instance restart to take effect
 4. Click **"Update & Restart"** to apply changes
 
-### Via API
+**Via API**
 Modify instance settings:
 
 ```bash
-curl -X PUT http://localhost:8080/api/instances/{name} \
+curl -X PUT http://localhost:8080/api/v1/instances/{name} \
   -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
   -d '{
     "backend_options": {
       "threads": 8,
@@ -188,29 +221,31 @@ curl -X PUT http://localhost:8080/api/instances/{name} \
 
 ## View Logs
 
-### Via Web UI
+**Via Web UI**
 
 1. Click the **"Logs"** button on any instance card
 2. Real-time log viewer opens
 
-### Via API
+**Via API**
 Check instance status in real-time:
 
 ```bash
-# Get instance details
-curl http://localhost:8080/api/instances/{name}/logs
+# Get instance logs
+curl http://localhost:8080/api/v1/instances/{name}/logs \
+  -H "Authorization: Bearer <token>"
 ```
 
 ## Delete Instance
 
-### Via Web UI
+**Via Web UI**
 1. Click the **"Delete"** button on an instance card
 2. Only stopped instances can be deleted
 3. Confirm deletion in the dialog
 
-### Via API
+**Via API**
 ```bash
-curl -X DELETE http://localhost:8080/api/instances/{name}
+curl -X DELETE http://localhost:8080/api/v1/instances/{name} \
+  -H "Authorization: Bearer <token>"
 ```
 
 ## Instance Proxy
@@ -218,8 +253,9 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
 Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
 
 ```bash
-# Get instance details
-curl http://localhost:8080/api/instances/{name}/proxy/
+# Proxy requests to the instance
+curl http://localhost:8080/api/v1/instances/{name}/proxy/ \
+  -H "Authorization: Bearer <token>"
 ```
 
 All backends provide OpenAI-compatible endpoints. Check the respective documentation:
@@ -229,15 +265,16 @@ All backends provide OpenAI-compatible endpoints. Check the respective documenta
 
 ### Instance Health
 
-#### Via Web UI
+**Via Web UI**
 
 1. The health status badge is displayed on each instance card
 
-#### Via API
+**Via API**
 
 Check the health status of your instances:
 
 ```bash
-curl http://localhost:8080/api/instances/{name}/proxy/health
+curl http://localhost:8080/api/v1/instances/{name}/proxy/health \
+  -H "Authorization: Bearer <token>"
 ```
 
diff --git a/docs/quick-start.md b/docs/quick-start.md
new file mode 100644
index 0000000..f65f9b2
--- /dev/null
+++ b/docs/quick-start.md
@@ -0,0 +1,263 @@
+# Quick Start
+
+This guide will help you get Llamactl up and running in just a few minutes.
+
+**Before you begin:** Ensure you have at least one backend installed (llama.cpp, MLX, or vLLM). See the [Installation Guide](installation.md#prerequisites) for backend setup.
+
+## Core Concepts
+
+Before you start, let's clarify a few key terms:
+
+- **Instance**: A running backend server that serves a specific model. Each instance has a unique name and runs independently.
+- **Backend**: The inference engine that actually runs the model (llama.cpp, MLX, or vLLM). You need at least one backend installed before creating instances.
+- **Node**: In multi-machine setups, a node represents one machine. Most users will just use the default "main" node for single-machine deployments.
+- **Proxy Architecture**: Llamactl acts as a proxy in front of your instances. You make requests to llamactl (e.g., `http://localhost:8080/v1/chat/completions`), and it routes them to the appropriate backend instance. This means you don't need to track individual instance ports or endpoints.
+
+## Authentication
+
+Llamactl uses two types of API keys:
+
+- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
+- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
+
+By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
+
+## Start Llamactl
+
+Start the Llamactl server:
+
+```bash
+llamactl
+```
+
+```
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⚠️  MANAGEMENT AUTHENTICATION REQUIRED
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🔑  Generated Management API Key:
+
+    sk-management-...
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⚠️  INFERENCE AUTHENTICATION REQUIRED
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🔑  Generated Inference API Key:
+
+    sk-inference-...
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+⚠️  IMPORTANT
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+• These keys are auto-generated and will change on restart
+• For production, add explicit keys to your configuration
+• Copy these keys before they disappear from the terminal
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Llamactl server listening on 0.0.0.0:8080
+```
+
+Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
+
+By default, Llamactl will start on `http://localhost:8080`.
+
+## Access the Web UI
+
+Open your web browser and navigate to:
+
+```
+http://localhost:8080
+```
+
+Login with the management API key from the terminal output.
+
+You should see the Llamactl web interface.
+
+## Create Your First Instance
+
+1. Click the "Add Instance" button
+2. Fill in the instance configuration:
+     - **Name**: Give your instance a descriptive name
+     - **Node**: Select which node to deploy the instance to (defaults to "main" for single-node setups)
+     - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
+     - **Model**: Model path or huggingface repo
+     - **Additional Options**: Backend-specific parameters
+
+    !!! tip "Auto-Assignment"
+        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+
+    !!! note "Remote Node Deployment"
+        If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
+
+3. Click "Create Instance"
+
+## Start Your Instance
+
+Once created, you can:
+
+- **Start** the instance by clicking the start button
+- **Monitor** its status in real-time
+- **View logs** by clicking the logs button
+- **Stop** the instance when needed
+
+## Example Configurations
+
+Here are basic example configurations for each backend:
+
+**llama.cpp backend:**
+```json
+{
+  "name": "llama2-7b",
+  "backend_type": "llama_cpp",
+  "backend_options": {
+    "model": "/path/to/llama-2-7b-chat.gguf",
+    "threads": 4,
+    "ctx_size": 2048,
+    "gpu_layers": 32
+  },
+  "nodes": ["main"]
+}
+```
+
+**MLX backend (macOS only):**
+```json
+{
+  "name": "mistral-mlx",
+  "backend_type": "mlx_lm",
+  "backend_options": {
+    "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
+    "temp": 0.7,
+    "max_tokens": 2048
+  },
+  "nodes": ["main"]
+}
+```
+
+**vLLM backend:**
+```json
+{
+  "name": "dialogpt-vllm",
+  "backend_type": "vllm",
+  "backend_options": {
+    "model": "microsoft/DialoGPT-medium",
+    "tensor_parallel_size": 2,
+    "gpu_memory_utilization": 0.9
+  },
+  "nodes": ["main"]
+}
+```
+
+**Remote node deployment example:**
+```json
+{
+  "name": "distributed-model",
+  "backend_type": "llama_cpp",
+  "backend_options": {
+    "model": "/path/to/model.gguf",
+    "gpu_layers": 32
+  },
+  "nodes": ["worker1"]
+}
+```
+
+## Docker Support
+
+Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
+
+```yaml
+backends:
+  vllm:
+    command: "vllm"
+    args: ["serve"]
+    docker:
+      enabled: true
+      image: "vllm/vllm-openai:latest"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+```
+
+## Using the API
+
+You can also manage instances via the REST API:
+
+```bash
+# List all instances
+curl http://localhost:8080/api/v1/instances
+
+# Create a new llama.cpp instance
+curl -X POST http://localhost:8080/api/v1/instances/my-model \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/path/to/model.gguf"
+    }
+  }'
+
+# Start an instance
+curl -X POST http://localhost:8080/api/v1/instances/my-model/start
+```
+
+## OpenAI Compatible API
+
+Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
+
+### Chat Completions
+
+Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
+
+```bash
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "my-model",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Hello! Can you help me write a Python function?"
+      }
+    ],
+    "max_tokens": 150,
+    "temperature": 0.7
+  }'
+```
+
+### Using with Python OpenAI Client
+
+You can also use the official OpenAI Python client:
+
+```python
+from openai import OpenAI
+
+# Point the client to your Llamactl server
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="your-inference-api-key"  # Use the inference API key from terminal or config
+)
+
+# Create a chat completion
+response = client.chat.completions.create(
+    model="my-model",  # Use the name of your instance
+    messages=[
+        {"role": "user", "content": "Explain quantum computing in simple terms"}
+    ],
+    max_tokens=200,
+    temperature=0.7
+)
+
+print(response.choices[0].message.content)
+```
+
+!!! note "API Key"
+    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
+
+### List Available Models
+
+Get a list of running instances (models) in OpenAI-compatible format:
+
+```bash
+curl http://localhost:8080/v1/models
+```
+
+## Next Steps
+
+- Manage instances [Managing Instances](managing-instances.md)
+- Explore the [API Reference](api-reference.md)
+- Configure advanced settings in the [Configuration](configuration.md) guide
diff --git a/apidocs/swagger.json b/docs/swagger.json
similarity index 53%
rename from apidocs/swagger.json
rename to docs/swagger.json
index 71471e6..26f9662 100644
--- a/apidocs/swagger.json
+++ b/docs/swagger.json
@@ -12,7 +12,7 @@
     },
     "basePath": "/api/v1",
     "paths": {
-        "/backends/llama-cpp/devices": {
+        "/api/v1/backends/llama-cpp/devices": {
             "get": {
                 "security": [
                     {
@@ -21,7 +21,7 @@
                 ],
                 "description": "Returns a list of available devices for the llama server",
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "List available devices for llama server",
                 "responses": {
@@ -40,7 +40,7 @@
                 }
             }
         },
-        "/backends/llama-cpp/help": {
+        "/api/v1/backends/llama-cpp/help": {
             "get": {
                 "security": [
                     {
@@ -49,7 +49,7 @@
                 ],
                 "description": "Returns the help text for the llama server command",
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Get help for llama server",
                 "responses": {
@@ -68,7 +68,7 @@
                 }
             }
         },
-        "/backends/llama-cpp/parse-command": {
+        "/api/v1/backends/llama-cpp/parse-command": {
             "post": {
                 "security": [
                     {
@@ -83,7 +83,7 @@
                     "application/json"
                 ],
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Parse llama-server command",
                 "parameters": [
@@ -101,7 +101,7 @@
                     "200": {
                         "description": "Parsed options",
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     },
                     "400": {
@@ -125,7 +125,7 @@
                 }
             }
         },
-        "/backends/llama-cpp/version": {
+        "/api/v1/backends/llama-cpp/version": {
             "get": {
                 "security": [
                     {
@@ -134,7 +134,7 @@
                 ],
                 "description": "Returns the version of the llama server command",
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Get version of llama server",
                 "responses": {
@@ -153,7 +153,7 @@
                 }
             }
         },
-        "/backends/mlx/parse-command": {
+        "/api/v1/backends/mlx/parse-command": {
             "post": {
                 "security": [
                     {
@@ -168,7 +168,7 @@
                     "application/json"
                 ],
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Parse mlx_lm.server command",
                 "parameters": [
@@ -186,7 +186,7 @@
                     "200": {
                         "description": "Parsed options",
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     },
                     "400": {
@@ -201,7 +201,7 @@
                 }
             }
         },
-        "/backends/vllm/parse-command": {
+        "/api/v1/backends/vllm/parse-command": {
             "post": {
                 "security": [
                     {
@@ -216,7 +216,7 @@
                     "application/json"
                 ],
                 "tags": [
-                    "backends"
+                    "Backends"
                 ],
                 "summary": "Parse vllm serve command",
                 "parameters": [
@@ -234,7 +234,7 @@
                     "200": {
                         "description": "Parsed options",
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     },
                     "400": {
@@ -249,7 +249,7 @@
                 }
             }
         },
-        "/instances": {
+        "/api/v1/instances": {
             "get": {
                 "security": [
                     {
@@ -258,7 +258,7 @@
                 ],
                 "description": "Returns a list of all instances managed by the server",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "List all instances",
                 "responses": {
@@ -267,7 +267,7 @@
                         "schema": {
                             "type": "array",
                             "items": {
-                                "$ref": "#/definitions/instance.Process"
+                                "$ref": "#/definitions/instance.Instance"
                             }
                         }
                     },
@@ -280,7 +280,7 @@
                 }
             }
         },
-        "/instances/{name}": {
+        "/api/v1/instances/{name}": {
             "get": {
                 "security": [
                     {
@@ -289,7 +289,7 @@
                 ],
                 "description": "Returns the details of a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Get details of a specific instance",
                 "parameters": [
@@ -305,7 +305,7 @@
                     "200": {
                         "description": "Instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -333,7 +333,7 @@
                     "application/json"
                 ],
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Update an instance's configuration",
                 "parameters": [
@@ -350,7 +350,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     }
                 ],
@@ -358,7 +358,7 @@
                     "200": {
                         "description": "Updated instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -386,7 +386,7 @@
                     "application/json"
                 ],
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Create and start a new instance",
                 "parameters": [
@@ -403,7 +403,7 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/instance.CreateInstanceOptions"
+                            "$ref": "#/definitions/instance.Options"
                         }
                     }
                 ],
@@ -411,7 +411,7 @@
                     "201": {
                         "description": "Created instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -436,7 +436,7 @@
                 ],
                 "description": "Stops and removes a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Delete an instance",
                 "parameters": [
@@ -467,7 +467,7 @@
                 }
             }
         },
-        "/instances/{name}/logs": {
+        "/api/v1/instances/{name}/logs": {
             "get": {
                 "security": [
                     {
@@ -476,7 +476,7 @@
                 ],
                 "description": "Returns the logs from a specific instance by name with optional line limit",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Get logs from a specific instance",
                 "parameters": [
@@ -516,7 +516,7 @@
                 }
             }
         },
-        "/instances/{name}/proxy": {
+        "/api/v1/instances/{name}/proxy": {
             "get": {
                 "security": [
                     {
@@ -525,9 +525,9 @@
                 ],
                 "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
-                "summary": "Proxy requests to a specific instance",
+                "summary": "Proxy requests to a specific instance, does not autostart instance if stopped",
                 "parameters": [
                     {
                         "type": "string",
@@ -569,9 +569,9 @@
                 ],
                 "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
-                "summary": "Proxy requests to a specific instance",
+                "summary": "Proxy requests to a specific instance, does not autostart instance if stopped",
                 "parameters": [
                     {
                         "type": "string",
@@ -606,7 +606,7 @@
                 }
             }
         },
-        "/instances/{name}/restart": {
+        "/api/v1/instances/{name}/restart": {
             "post": {
                 "security": [
                     {
@@ -615,7 +615,7 @@
                 ],
                 "description": "Restarts a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Restart a running instance",
                 "parameters": [
@@ -631,7 +631,7 @@
                     "200": {
                         "description": "Restarted instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -649,7 +649,7 @@
                 }
             }
         },
-        "/instances/{name}/start": {
+        "/api/v1/instances/{name}/start": {
             "post": {
                 "security": [
                     {
@@ -658,7 +658,7 @@
                 ],
                 "description": "Starts a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Start a stopped instance",
                 "parameters": [
@@ -674,7 +674,7 @@
                     "200": {
                         "description": "Started instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -692,7 +692,7 @@
                 }
             }
         },
-        "/instances/{name}/stop": {
+        "/api/v1/instances/{name}/stop": {
             "post": {
                 "security": [
                     {
@@ -701,7 +701,7 @@
                 ],
                 "description": "Stops a specific instance by name",
                 "tags": [
-                    "instances"
+                    "Instances"
                 ],
                 "summary": "Stop a running instance",
                 "parameters": [
@@ -717,7 +717,7 @@
                     "200": {
                         "description": "Stopped instance details",
                         "schema": {
-                            "$ref": "#/definitions/instance.Process"
+                            "$ref": "#/definitions/instance.Instance"
                         }
                     },
                     "400": {
@@ -735,6 +735,675 @@
                 }
             }
         },
+        "/api/v1/nodes": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)",
+                "tags": [
+                    "Nodes"
+                ],
+                "summary": "List all configured nodes",
+                "responses": {
+                    "200": {
+                        "description": "Map of nodes",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "$ref": "#/definitions/server.NodeResponse"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/nodes/{name}": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns the details of a specific node by name",
+                "tags": [
+                    "Nodes"
+                ],
+                "summary": "Get details of a specific node",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Node Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Node details",
+                        "schema": {
+                            "$ref": "#/definitions/server.NodeResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid name format",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "404": {
+                        "description": "Node not found",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/version": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns the version of the llamactl command",
+                "tags": [
+                    "System"
+                ],
+                "summary": "Get llamactl version",
+                "responses": {
+                    "200": {
+                        "description": "Version information",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the llama.cpp UI for the specified instance",
+                "produces": [
+                    "text/html"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp UI for the instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "query",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied HTML response",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/apply-template": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/completion": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/detokenize": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/embeddings": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/infill": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/metrics": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/props": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            },
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/reranking": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/slots": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/llama-cpp/{name}/tokenize": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Proxy requests to llama.cpp server instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Proxied response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
         "/v1/": {
             "post": {
                 "security": [
@@ -747,7 +1416,7 @@
                     "application/json"
                 ],
                 "tags": [
-                    "openai"
+                    "OpenAI"
                 ],
                 "summary": "OpenAI-compatible proxy endpoint",
                 "responses": {
@@ -778,7 +1447,7 @@
                 ],
                 "description": "Returns a list of instances in a format compatible with OpenAI API",
                 "tags": [
-                    "openai"
+                    "OpenAI"
                 ],
                 "summary": "List instances in OpenAI-compatible format",
                 "responses": {
@@ -796,63 +1465,34 @@
                     }
                 }
             }
-        },
-        "/version": {
-            "get": {
-                "security": [
-                    {
-                        "ApiKeyAuth": []
-                    }
-                ],
-                "description": "Returns the version of the llamactl command",
-                "tags": [
-                    "version"
-                ],
-                "summary": "Get llamactl version",
-                "responses": {
-                    "200": {
-                        "description": "Version information",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                }
-            }
         }
     },
     "definitions": {
-        "backends.BackendType": {
-            "type": "string",
-            "enum": [
-                "llama_cpp",
-                "mlx_lm",
-                "vllm"
-            ],
-            "x-enum-varnames": [
-                "BackendTypeLlamaCpp",
-                "BackendTypeMlxLm",
-                "BackendTypeVllm"
-            ]
+        "instance.Instance": {
+            "type": "object",
+            "properties": {
+                "created": {
+                    "description": "Unix timestamp when the instance was created",
+                    "type": "integer"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
         },
-        "instance.CreateInstanceOptions": {
+        "instance.Options": {
             "type": "object",
             "properties": {
                 "auto_restart": {
                     "description": "Auto restart",
                     "type": "boolean"
                 },
-                "backend_options": {
+                "environment": {
+                    "description": "Environment variables",
                     "type": "object",
-                    "additionalProperties": {}
-                },
-                "backend_type": {
-                    "$ref": "#/definitions/backends.BackendType"
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
                 "idle_timeout": {
                     "description": "Idle timeout",
@@ -871,36 +1511,11 @@
                 }
             }
         },
-        "instance.InstanceStatus": {
-            "type": "integer",
-            "enum": [
-                0,
-                1,
-                2
-            ],
-            "x-enum-varnames": [
-                "Stopped",
-                "Running",
-                "Failed"
-            ]
-        },
-        "instance.Process": {
+        "server.NodeResponse": {
             "type": "object",
             "properties": {
-                "created": {
-                    "description": "Creation time",
-                    "type": "integer"
-                },
-                "name": {
+                "address": {
                     "type": "string"
-                },
-                "status": {
-                    "description": "Status",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/instance.InstanceStatus"
-                        }
-                    ]
                 }
             }
         },
@@ -943,5 +1558,12 @@
                 }
             }
         }
+    },
+    "securityDefinitions": {
+        "ApiKeyAuth": {
+            "type": "apiKey",
+            "name": "X-API-Key",
+            "in": "header"
+        }
     }
 }
\ No newline at end of file
diff --git a/apidocs/swagger.yaml b/docs/swagger.yaml
similarity index 50%
rename from apidocs/swagger.yaml
rename to docs/swagger.yaml
index a5db184..7506036 100644
--- a/apidocs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -1,25 +1,23 @@
 basePath: /api/v1
 definitions:
-  backends.BackendType:
-    enum:
-    - llama_cpp
-    - mlx_lm
-    - vllm
-    type: string
-    x-enum-varnames:
-    - BackendTypeLlamaCpp
-    - BackendTypeMlxLm
-    - BackendTypeVllm
-  instance.CreateInstanceOptions:
+  instance.Instance:
+    properties:
+      created:
+        description: Unix timestamp when the instance was created
+        type: integer
+      name:
+        type: string
+    type: object
+  instance.Options:
     properties:
       auto_restart:
         description: Auto restart
         type: boolean
-      backend_options:
-        additionalProperties: {}
+      environment:
+        additionalProperties:
+          type: string
+        description: Environment variables
         type: object
-      backend_type:
-        $ref: '#/definitions/backends.BackendType'
       idle_timeout:
         description: Idle timeout
         type: integer
@@ -32,27 +30,10 @@ definitions:
         description: seconds
         type: integer
     type: object
-  instance.InstanceStatus:
-    enum:
-    - 0
-    - 1
-    - 2
-    type: integer
-    x-enum-varnames:
-    - Stopped
-    - Running
-    - Failed
-  instance.Process:
+  server.NodeResponse:
     properties:
-      created:
-        description: Creation time
-        type: integer
-      name:
+      address:
         type: string
-      status:
-        allOf:
-        - $ref: '#/definitions/instance.InstanceStatus'
-        description: Status
     type: object
   server.OpenAIInstance:
     properties:
@@ -88,7 +69,7 @@ info:
   title: llamactl API
   version: "1.0"
 paths:
-  /backends/llama-cpp/devices:
+  /api/v1/backends/llama-cpp/devices:
     get:
       description: Returns a list of available devices for the llama server
       responses:
@@ -104,8 +85,8 @@ paths:
       - ApiKeyAuth: []
       summary: List available devices for llama server
       tags:
-      - backends
-  /backends/llama-cpp/help:
+      - Backends
+  /api/v1/backends/llama-cpp/help:
     get:
       description: Returns the help text for the llama server command
       responses:
@@ -121,8 +102,8 @@ paths:
       - ApiKeyAuth: []
       summary: Get help for llama server
       tags:
-      - backends
-  /backends/llama-cpp/parse-command:
+      - Backends
+  /api/v1/backends/llama-cpp/parse-command:
     post:
       consumes:
       - application/json
@@ -140,7 +121,7 @@ paths:
         "200":
           description: Parsed options
           schema:
-            $ref: '#/definitions/instance.CreateInstanceOptions'
+            $ref: '#/definitions/instance.Options'
         "400":
           description: Invalid request or command
           schema:
@@ -157,8 +138,8 @@ paths:
       - ApiKeyAuth: []
       summary: Parse llama-server command
       tags:
-      - backends
-  /backends/llama-cpp/version:
+      - Backends
+  /api/v1/backends/llama-cpp/version:
     get:
       description: Returns the version of the llama server command
       responses:
@@ -174,8 +155,8 @@ paths:
       - ApiKeyAuth: []
       summary: Get version of llama server
       tags:
-      - backends
-  /backends/mlx/parse-command:
+      - Backends
+  /api/v1/backends/mlx/parse-command:
     post:
       consumes:
       - application/json
@@ -193,7 +174,7 @@ paths:
         "200":
           description: Parsed options
           schema:
-            $ref: '#/definitions/instance.CreateInstanceOptions'
+            $ref: '#/definitions/instance.Options'
         "400":
           description: Invalid request or command
           schema:
@@ -204,8 +185,8 @@ paths:
       - ApiKeyAuth: []
       summary: Parse mlx_lm.server command
       tags:
-      - backends
-  /backends/vllm/parse-command:
+      - Backends
+  /api/v1/backends/vllm/parse-command:
     post:
       consumes:
       - application/json
@@ -223,7 +204,7 @@ paths:
         "200":
           description: Parsed options
           schema:
-            $ref: '#/definitions/instance.CreateInstanceOptions'
+            $ref: '#/definitions/instance.Options'
         "400":
           description: Invalid request or command
           schema:
@@ -234,8 +215,8 @@ paths:
       - ApiKeyAuth: []
       summary: Parse vllm serve command
       tags:
-      - backends
-  /instances:
+      - Backends
+  /api/v1/instances:
     get:
       description: Returns a list of all instances managed by the server
       responses:
@@ -243,7 +224,7 @@ paths:
           description: List of instances
           schema:
             items:
-              $ref: '#/definitions/instance.Process'
+              $ref: '#/definitions/instance.Instance'
             type: array
         "500":
           description: Internal Server Error
@@ -253,8 +234,8 @@ paths:
       - ApiKeyAuth: []
       summary: List all instances
       tags:
-      - instances
-  /instances/{name}:
+      - Instances
+  /api/v1/instances/{name}:
     delete:
       description: Stops and removes a specific instance by name
       parameters:
@@ -278,7 +259,7 @@ paths:
       - ApiKeyAuth: []
       summary: Delete an instance
       tags:
-      - instances
+      - Instances
     get:
       description: Returns the details of a specific instance by name
       parameters:
@@ -291,7 +272,7 @@ paths:
         "200":
           description: Instance details
           schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
         "400":
           description: Invalid name format
           schema:
@@ -304,7 +285,7 @@ paths:
       - ApiKeyAuth: []
       summary: Get details of a specific instance
       tags:
-      - instances
+      - Instances
     post:
       consumes:
       - application/json
@@ -320,12 +301,12 @@ paths:
         name: options
         required: true
         schema:
-          $ref: '#/definitions/instance.CreateInstanceOptions'
+          $ref: '#/definitions/instance.Options'
       responses:
         "201":
           description: Created instance details
           schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
         "400":
           description: Invalid request body
           schema:
@@ -338,7 +319,7 @@ paths:
       - ApiKeyAuth: []
       summary: Create and start a new instance
       tags:
-      - instances
+      - Instances
     put:
       consumes:
       - application/json
@@ -354,12 +335,12 @@ paths:
         name: options
         required: true
         schema:
-          $ref: '#/definitions/instance.CreateInstanceOptions'
+          $ref: '#/definitions/instance.Options'
       responses:
         "200":
           description: Updated instance details
           schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
         "400":
           description: Invalid name format
           schema:
@@ -372,8 +353,8 @@ paths:
       - ApiKeyAuth: []
       summary: Update an instance's configuration
       tags:
-      - instances
-  /instances/{name}/logs:
+      - Instances
+  /api/v1/instances/{name}/logs:
     get:
       description: Returns the logs from a specific instance by name with optional
         line limit
@@ -404,8 +385,8 @@ paths:
       - ApiKeyAuth: []
       summary: Get logs from a specific instance
       tags:
-      - instances
-  /instances/{name}/proxy:
+      - Instances
+  /api/v1/instances/{name}/proxy:
     get:
       description: Forwards HTTP requests to the llama-server instance running on
         a specific port
@@ -432,9 +413,10 @@ paths:
             type: string
       security:
       - ApiKeyAuth: []
-      summary: Proxy requests to a specific instance
+      summary: Proxy requests to a specific instance, does not autostart instance
+        if stopped
       tags:
-      - instances
+      - Instances
     post:
       description: Forwards HTTP requests to the llama-server instance running on
         a specific port
@@ -461,10 +443,11 @@ paths:
             type: string
       security:
       - ApiKeyAuth: []
-      summary: Proxy requests to a specific instance
+      summary: Proxy requests to a specific instance, does not autostart instance
+        if stopped
       tags:
-      - instances
-  /instances/{name}/restart:
+      - Instances
+  /api/v1/instances/{name}/restart:
     post:
       description: Restarts a specific instance by name
       parameters:
@@ -477,7 +460,7 @@ paths:
         "200":
           description: Restarted instance details
           schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
         "400":
           description: Invalid name format
           schema:
@@ -490,8 +473,8 @@ paths:
       - ApiKeyAuth: []
       summary: Restart a running instance
       tags:
-      - instances
-  /instances/{name}/start:
+      - Instances
+  /api/v1/instances/{name}/start:
     post:
       description: Starts a specific instance by name
       parameters:
@@ -504,7 +487,7 @@ paths:
         "200":
           description: Started instance details
           schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
         "400":
           description: Invalid name format
           schema:
@@ -517,8 +500,8 @@ paths:
       - ApiKeyAuth: []
       summary: Start a stopped instance
       tags:
-      - instances
-  /instances/{name}/stop:
+      - Instances
+  /api/v1/instances/{name}/stop:
     post:
       description: Stops a specific instance by name
       parameters:
@@ -531,7 +514,7 @@ paths:
         "200":
           description: Stopped instance details
           schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
         "400":
           description: Invalid name format
           schema:
@@ -544,7 +527,444 @@ paths:
       - ApiKeyAuth: []
       summary: Stop a running instance
       tags:
-      - instances
+      - Instances
+  /api/v1/nodes:
+    get:
+      description: Returns a map of all nodes configured in the server (node name
+        -> node config)
+      responses:
+        "200":
+          description: Map of nodes
+          schema:
+            additionalProperties:
+              $ref: '#/definitions/server.NodeResponse'
+            type: object
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: List all configured nodes
+      tags:
+      - Nodes
+  /api/v1/nodes/{name}:
+    get:
+      description: Returns the details of a specific node by name
+      parameters:
+      - description: Node Name
+        in: path
+        name: name
+        required: true
+        type: string
+      responses:
+        "200":
+          description: Node details
+          schema:
+            $ref: '#/definitions/server.NodeResponse'
+        "400":
+          description: Invalid name format
+          schema:
+            type: string
+        "404":
+          description: Node not found
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Get details of a specific node
+      tags:
+      - Nodes
+  /api/v1/version:
+    get:
+      description: Returns the version of the llamactl command
+      responses:
+        "200":
+          description: Version information
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Get llamactl version
+      tags:
+      - System
+  /llama-cpp/{name}/:
+    get:
+      description: Proxies requests to the llama.cpp UI for the specified instance
+      parameters:
+      - description: Instance Name
+        in: query
+        name: name
+        required: true
+        type: string
+      produces:
+      - text/html
+      responses:
+        "200":
+          description: Proxied HTML response
+          schema:
+            type: string
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp UI for the instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/apply-template:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/completion:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/detokenize:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/embeddings:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/infill:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/metrics:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/props:
+    get:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/reranking:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/slots:
+    get:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
+  /llama-cpp/{name}/tokenize:
+    post:
+      description: Proxies requests to the specified llama.cpp server instance, starting
+        it on-demand if configured
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Proxied response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Proxy requests to llama.cpp server instance
+      tags:
+      - Llama.cpp
   /v1/:
     post:
       consumes:
@@ -567,7 +987,7 @@ paths:
       - ApiKeyAuth: []
       summary: OpenAI-compatible proxy endpoint
       tags:
-      - openai
+      - OpenAI
   /v1/models:
     get:
       description: Returns a list of instances in a format compatible with OpenAI
@@ -585,22 +1005,10 @@ paths:
       - ApiKeyAuth: []
       summary: List instances in OpenAI-compatible format
       tags:
-      - openai
-  /version:
-    get:
-      description: Returns the version of the llamactl command
-      responses:
-        "200":
-          description: Version information
-          schema:
-            type: string
-        "500":
-          description: Internal Server Error
-          schema:
-            type: string
-      security:
-      - ApiKeyAuth: []
-      summary: Get llamactl version
-      tags:
-      - version
+      - OpenAI
+securityDefinitions:
+  ApiKeyAuth:
+    in: header
+    name: X-API-Key
+    type: apiKey
 swagger: "2.0"
diff --git a/docs/user-guide/troubleshooting.md b/docs/troubleshooting.md
similarity index 54%
rename from docs/user-guide/troubleshooting.md
rename to docs/troubleshooting.md
index ca2b4df..1f33c0a 100644
--- a/docs/user-guide/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -26,62 +26,67 @@ Issues specific to Llamactl deployment and operation.
 
 ## Instance Management Issues
 
-### Model Loading Failures
+### Instance Fails to Start
 
-**Problem:** Instance fails to start with model loading errors
-
-**Common Solutions:**  
-- **llama-server not found:** Ensure `llama-server` binary is in PATH  
-- **Wrong model format:** Ensure model is in GGUF format  
-- **Insufficient memory:** Use smaller model or reduce context size  
-- **Path issues:** Use absolute paths to model files  
-
-### Memory Issues
-
-**Problem:** Out of memory errors or system becomes unresponsive
+**Problem:** Instance fails to start or immediately stops
 
 **Solutions:**
-1. **Reduce context size:**
-   ```json
-   {
-     "n_ctx": 1024
-   }
+
+1. **Check instance logs** to see the actual error:
+   ```bash
+   curl http://localhost:8080/api/v1/instances/{name}/logs
+   # Or check log files directly
+   tail -f ~/.local/share/llamactl/logs/{instance-name}.log
    ```
 
-2. **Use quantized models:**  
-   - Try Q4_K_M instead of higher precision models  
-   - Use smaller model variants (7B instead of 13B)  
+2. **Verify backend is installed:**  
+     - **llama.cpp**: Ensure `llama-server` is in PATH
+     - **MLX**: Ensure `mlx-lm` Python package is installed
+     - **vLLM**: Ensure `vllm` Python package is installed
 
-### GPU Configuration
+3. **Check model path and format:**
+     - Use absolute paths to model files
+     - Verify model format matches backend (GGUF for llama.cpp, etc.)
 
-**Problem:** GPU not being used effectively
+4. **Verify backend command configuration:**
+     - Check that the backend `command` is correctly configured in the global config
+     - For virtual environments, specify the full path to the command (e.g., `/path/to/venv/bin/mlx_lm.server`)
+     - See the [Configuration Guide](configuration.md) for backend configuration details
+     - Test the backend directly (see [Backend-Specific Issues](#backend-specific-issues) below)
 
-**Solutions:**
-1. **Configure GPU layers:**
-   ```json
-   {
-     "n_gpu_layers": 35
-   }
-   ```
+### Backend-Specific Issues
 
-### Advanced Instance Issues
+**Problem:** Model loading, memory, GPU, or performance issues
 
-**Problem:** Complex model loading, performance, or compatibility issues
+Most model-specific issues (memory, GPU configuration, performance tuning) are backend-specific and should be resolved by consulting the respective backend documentation:
 
-Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
+**llama.cpp:**
+- [llama.cpp GitHub](https://github.com/ggml-org/llama.cpp)
+- [llama-server README](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
 
-**Resources:**  
-- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)  
-- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)  
-- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)  
+**MLX:**
+- [MLX-LM GitHub](https://github.com/ml-explore/mlx-lm)
+- [MLX-LM Server Guide](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
+
+**vLLM:**
+- [vLLM Documentation](https://docs.vllm.ai/en/stable/)
+- [OpenAI Compatible Server](https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html)
+- [vllm serve Command](https://docs.vllm.ai/en/stable/cli/serve.html#vllm-serve)
+
+**Testing backends directly:**
+
+Testing your model and configuration directly with the backend helps determine if the issue is with llamactl or the backend itself:
 
-**Testing directly with llama-server:**  
 ```bash
-# Test your model and parameters directly with llama-server
-llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
-```
+# llama.cpp
+llama-server --model /path/to/model.gguf --port 8081
 
-This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
+# MLX
+mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --port 8081
+
+# vLLM
+vllm serve microsoft/DialoGPT-medium --port 8081
+```
 
 ## API and Network Issues
 
diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md
deleted file mode 100644
index 472cd0b..0000000
--- a/docs/user-guide/api-reference.md
+++ /dev/null
@@ -1,560 +0,0 @@
-# API Reference
-
-Complete reference for the Llamactl REST API.
-
-## Base URL
-
-All API endpoints are relative to the base URL:
-
-```
-http://localhost:8080/api/v1
-```
-
-## Authentication
-
-Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
-
-```bash
-curl -H "Authorization: Bearer <your-api-key>" \
-  http://localhost:8080/api/v1/instances
-```
-
-The server supports two types of API keys:
-- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
-- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
-
-## System Endpoints
-
-### Get Llamactl Version
-
-Get the version information of the llamactl server.
-
-```http
-GET /api/v1/version
-```
-
-**Response:**
-```
-Version: 1.0.0
-Commit: abc123
-Build Time: 2024-01-15T10:00:00Z
-```
-
-### Get Llama Server Help
-
-Get help text for the llama-server command.
-
-```http
-GET /api/v1/server/help
-```
-
-**Response:** Plain text help output from `llama-server --help`
-
-### Get Llama Server Version
-
-Get version information of the llama-server binary.
-
-```http
-GET /api/v1/server/version
-```
-
-**Response:** Plain text version output from `llama-server --version`
-
-### List Available Devices
-
-List available devices for llama-server.
-
-```http
-GET /api/v1/server/devices
-```
-
-**Response:** Plain text device list from `llama-server --list-devices`
-
-## Instances
-
-### List All Instances
-
-Get a list of all instances.
-
-```http
-GET /api/v1/instances
-```
-
-**Response:**
-```json
-[
-  {
-    "name": "llama2-7b",
-    "status": "running",
-    "created": 1705312200
-  }
-]
-```
-
-### Get Instance Details
-
-Get detailed information about a specific instance.
-
-```http
-GET /api/v1/instances/{name}
-```
-
-**Response:**
-```json
-{
-  "name": "llama2-7b",
-  "status": "running",
-  "created": 1705312200
-}
-```
-
-### Create Instance
-
-Create and start a new instance.
-
-```http
-POST /api/v1/instances/{name}
-```
-
-**Request Body:** JSON object with instance configuration. Common fields include:
-
-- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`)
-- `backend_options`: Backend-specific configuration
-- `auto_restart`: Enable automatic restart on failure
-- `max_restarts`: Maximum restart attempts
-- `restart_delay`: Delay between restarts in seconds
-- `on_demand_start`: Start instance when receiving requests
-- `idle_timeout`: Idle timeout in minutes
-- `environment`: Environment variables as key-value pairs
-- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
-
-See [Managing Instances](managing-instances.md) for complete configuration options.
-
-**Response:**
-```json
-{
-  "name": "llama2-7b",
-  "status": "running",
-  "created": 1705312200
-}
-```
-
-### Update Instance
-
-Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
-
-```http
-PUT /api/v1/instances/{name}
-```
-
-**Request Body:** JSON object with configuration fields to update.
-
-**Response:**
-```json
-{
-  "name": "llama2-7b",
-  "status": "running",
-  "created": 1705312200
-}
-```
-
-### Delete Instance
-
-Stop and remove an instance.
-
-```http
-DELETE /api/v1/instances/{name}
-```
-
-**Response:** `204 No Content`
-
-## Instance Operations
-
-### Start Instance
-
-Start a stopped instance.
-
-```http
-POST /api/v1/instances/{name}/start
-```
-
-**Response:**
-```json
-{
-  "name": "llama2-7b",
-  "status": "running",
-  "created": 1705312200
-}
-```
-
-**Error Responses:**
-- `409 Conflict`: Maximum number of running instances reached
-- `500 Internal Server Error`: Failed to start instance
-
-### Stop Instance
-
-Stop a running instance.
-
-```http
-POST /api/v1/instances/{name}/stop
-```
-
-**Response:**
-```json
-{
-  "name": "llama2-7b",
-  "status": "stopped",
-  "created": 1705312200
-}
-```
-
-### Restart Instance
-
-Restart an instance (stop then start).
-
-```http
-POST /api/v1/instances/{name}/restart
-```
-
-**Response:**
-```json
-{
-  "name": "llama2-7b",
-  "status": "running",
-  "created": 1705312200
-}
-```
-
-### Get Instance Logs
-
-Retrieve instance logs.
-
-```http
-GET /api/v1/instances/{name}/logs
-```
-
-**Query Parameters:**
-- `lines`: Number of lines to return (default: all lines, use -1 for all)
-
-**Response:** Plain text log output
-
-**Example:**
-```bash
-curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
-```
-
-### Proxy to Instance
-
-Proxy HTTP requests directly to the llama-server instance.
-
-```http
-GET /api/v1/instances/{name}/proxy/*
-POST /api/v1/instances/{name}/proxy/*
-```
-
-This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
-
-**Example - Check Instance Health:**
-```bash
-curl -H "Authorization: Bearer your-api-key" \
-  http://localhost:8080/api/v1/instances/my-model/proxy/health
-```
-
-This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
-
-**Error Responses:**
-- `503 Service Unavailable`: Instance is not running
-
-## OpenAI-Compatible API
-
-Llamactl provides OpenAI-compatible endpoints for inference operations.
-
-### List Models
-
-List all instances in OpenAI-compatible format.
-
-```http
-GET /v1/models
-```
-
-**Response:**
-```json
-{
-  "object": "list",
-  "data": [
-    {
-      "id": "llama2-7b",
-      "object": "model",
-      "created": 1705312200,
-      "owned_by": "llamactl"
-    }
-  ]
-}
-```
-
-### Chat Completions, Completions, Embeddings
-
-All OpenAI-compatible inference endpoints are available:
-
-```http
-POST /v1/chat/completions
-POST /v1/completions
-POST /v1/embeddings
-POST /v1/rerank
-POST /v1/reranking
-```
-
-**Request Body:** Standard OpenAI format with `model` field specifying the instance name
-
-**Example:**
-```json
-{
-  "model": "llama2-7b",
-  "messages": [
-    {
-      "role": "user",
-      "content": "Hello, how are you?"
-    }
-  ]
-}
-```
-
-The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
-
-**Error Responses:**
-- `400 Bad Request`: Invalid request body or missing instance name
-- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
-- `409 Conflict`: Cannot start instance due to maximum instances limit
-
-## Instance Status Values
-
-Instances can have the following status values:
-- `stopped`: Instance is not running
-- `running`: Instance is running and ready to accept requests
-- `failed`: Instance failed to start or crashed  
-
-## Error Responses
-
-All endpoints may return error responses in the following format:
-
-```json
-{
-  "error": "Error message description"
-}
-```
-
-### Common HTTP Status Codes
-
-- `200`: Success
-- `201`: Created
-- `204`: No Content (successful deletion)
-- `400`: Bad Request (invalid parameters or request body)
-- `401`: Unauthorized (missing or invalid API key)
-- `403`: Forbidden (insufficient permissions)
-- `404`: Not Found (instance not found)
-- `409`: Conflict (instance already exists, max instances reached)
-- `500`: Internal Server Error
-- `503`: Service Unavailable (instance not running)
-
-## Examples
-
-### Complete Instance Lifecycle
-
-```bash
-# Create and start instance
-curl -X POST http://localhost:8080/api/v1/instances/my-model \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer your-api-key" \
-  -d '{
-    "backend_type": "llama_cpp",
-    "backend_options": {
-      "model": "/models/llama-2-7b.gguf",
-      "gpu_layers": 32
-    },
-    "environment": {
-      "CUDA_VISIBLE_DEVICES": "0",
-      "OMP_NUM_THREADS": "8"
-    }
-  }'
-
-# Check instance status
-curl -H "Authorization: Bearer your-api-key" \
-  http://localhost:8080/api/v1/instances/my-model
-
-# Get instance logs
-curl -H "Authorization: Bearer your-api-key" \
-  "http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
-
-# Use OpenAI-compatible chat completions
-curl -X POST http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer your-inference-api-key" \
-  -d '{
-    "model": "my-model",
-    "messages": [
-      {"role": "user", "content": "Hello!"}
-    ],
-    "max_tokens": 100
-  }'
-
-# Stop instance
-curl -X POST -H "Authorization: Bearer your-api-key" \
-  http://localhost:8080/api/v1/instances/my-model/stop
-
-# Delete instance
-curl -X DELETE -H "Authorization: Bearer your-api-key" \
-  http://localhost:8080/api/v1/instances/my-model
-```
-
-### Remote Node Instance Example
-
-```bash
-# Create instance on specific remote node
-curl -X POST http://localhost:8080/api/v1/instances/remote-model \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer your-api-key" \
-  -d '{
-    "backend_type": "llama_cpp",
-    "backend_options": {
-      "model": "/models/llama-2-7b.gguf",
-      "gpu_layers": 32
-    },
-    "nodes": ["worker1"]
-  }'
-
-# Check status of remote instance
-curl -H "Authorization: Bearer your-api-key" \
-  http://localhost:8080/api/v1/instances/remote-model
-
-# Use remote instance with OpenAI-compatible API
-curl -X POST http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer your-inference-api-key" \
-  -d '{
-    "model": "remote-model",
-    "messages": [
-      {"role": "user", "content": "Hello from remote node!"}
-    ]
-  }'
-```
-
-### Using the Proxy Endpoint
-
-You can also directly proxy requests to the llama-server instance:
-
-```bash
-# Direct proxy to instance (bypasses OpenAI compatibility layer)
-curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer your-api-key" \
-  -d '{
-    "prompt": "Hello, world!",
-    "n_predict": 50
-  }'
-```
-
-## Backend-Specific Endpoints
-
-### Parse Commands
-
-Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
-
-#### Parse Llama.cpp Command
-
-Parse a llama-server command string into instance options.
-
-```http
-POST /api/v1/backends/llama-cpp/parse-command
-```
-
-**Request Body:**
-```json
-{
-  "command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
-}
-```
-
-**Response:**
-```json
-{
-  "backend_type": "llama_cpp",
-  "llama_server_options": {
-    "model": "/path/to/model.gguf",
-    "ctx_size": 2048,
-    "port": 8080
-  }
-}
-```
-
-#### Parse MLX-LM Command
-
-Parse an MLX-LM server command string into instance options.
-
-```http
-POST /api/v1/backends/mlx/parse-command
-```
-
-**Request Body:**
-```json
-{
-  "command": "mlx_lm.server --model /path/to/model --port 8080"
-}
-```
-
-**Response:**
-```json
-{
-  "backend_type": "mlx_lm",
-  "mlx_server_options": {
-    "model": "/path/to/model",
-    "port": 8080
-  }
-}
-```
-
-#### Parse vLLM Command
-
-Parse a vLLM serve command string into instance options.
-
-```http
-POST /api/v1/backends/vllm/parse-command
-```
-
-**Request Body:**
-```json
-{
-  "command": "vllm serve /path/to/model --port 8080"
-}
-```
-
-**Response:**
-```json
-{
-  "backend_type": "vllm",
-  "vllm_server_options": {
-    "model": "/path/to/model",
-    "port": 8080
-  }
-}
-```
-
-**Error Responses for Parse Commands:**
-- `400 Bad Request`: Invalid request body, empty command, or parse error
-- `500 Internal Server Error`: Encoding error
-
-## Auto-Generated Documentation
-
-The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
-
-1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
-2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
-
-## Swagger Documentation
-
-If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
-
-```
-http://localhost:8080/swagger/
-```
-
-This provides a complete interactive interface for testing all API endpoints.
diff --git a/mkdocs.yml b/mkdocs.yml
index 70cbef3..df2ded7 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -25,8 +25,8 @@ theme:
         name: Switch to light mode
   features:
     - navigation.tabs
-    - navigation.sections
-    - navigation.expand
+    - navigation.tabs.sticky
+    - toc.integrate
     - navigation.top
     - search.highlight
     - search.share
@@ -49,14 +49,12 @@ markdown_extensions:
 
 nav:
   - Home: index.md
-  - Getting Started:
-    - Installation: getting-started/installation.md
-    - Quick Start: getting-started/quick-start.md
-    - Configuration: getting-started/configuration.md
-  - User Guide:
-    - Managing Instances: user-guide/managing-instances.md
-    - API Reference: user-guide/api-reference.md
-    - Troubleshooting: user-guide/troubleshooting.md
+  - Installation: installation.md
+  - Quick Start: quick-start.md
+  - Configuration: configuration.md
+  - Managing Instances: managing-instances.md
+  - API Reference: api-reference.md
+  - Troubleshooting: troubleshooting.md
 
 plugins:
   - search
@@ -66,6 +64,8 @@ plugins:
       css_dir: css
       javascript_dir: js
       canonical_version: null
+  - neoteroi.mkdocsoad:
+      use_pymdownx: true
 
 hooks:
   - docs/readme_sync.py
@@ -78,3 +78,6 @@ extra:
   social:
     - icon: fontawesome/brands/github
       link: https://github.com/lordmathis/llamactl
+
+extra_css:
+- css/css-v1.1.3.css
diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go
index 47ef02d..390ecb0 100644
--- a/pkg/server/handlers_backends.go
+++ b/pkg/server/handlers_backends.go
@@ -44,7 +44,7 @@ func (h *Handler) stripLlamaCppPrefix(r *http.Request, instName string) {
 // LlamaCppUIProxy godoc
 // @Summary Proxy requests to llama.cpp UI for the instance
 // @Description Proxies requests to the llama.cpp UI for the specified instance
-// @Tags backends
+// @Tags Llama.cpp
 // @Security ApiKeyAuth
 // @Produce html
 // @Param name query string true "Instance Name"
@@ -83,14 +83,24 @@ func (h *Handler) LlamaCppUIProxy() http.HandlerFunc {
 // LlamaCppProxy godoc
 // @Summary Proxy requests to llama.cpp server instance
 // @Description Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured
-// @Tags backends
+// @Tags Llama.cpp
 // @Security ApiKeyAuth
 // @Produce json
-// @Param name query string true "Instance Name"
+// @Param name path string true "Instance Name"
 // @Success 200 {object} map[string]any "Proxied response"
 // @Failure 400 {string} string "Invalid instance"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /llama-cpp/{name}/* [post]
+// @Router /llama-cpp/{name}/props [get]
+// @Router /llama-cpp/{name}/slots [get]
+// @Router /llama-cpp/{name}/apply-template [post]
+// @Router /llama-cpp/{name}/completion [post]
+// @Router /llama-cpp/{name}/detokenize [post]
+// @Router /llama-cpp/{name}/embeddings [post]
+// @Router /llama-cpp/{name}/infill [post]
+// @Router /llama-cpp/{name}/metrics [post]
+// @Router /llama-cpp/{name}/props [post]
+// @Router /llama-cpp/{name}/reranking [post]
+// @Router /llama-cpp/{name}/tokenize [post]
 func (h *Handler) LlamaCppProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 
@@ -150,7 +160,7 @@ func parseHelper(w http.ResponseWriter, r *http.Request, backend interface {
 // ParseLlamaCommand godoc
 // @Summary Parse llama-server command
 // @Description Parses a llama-server command string into instance options
-// @Tags backends
+// @Tags Backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
@@ -158,7 +168,7 @@ func parseHelper(w http.ResponseWriter, r *http.Request, backend interface {
 // @Success 200 {object} instance.Options "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Failure 500 {object} map[string]string "Internal Server Error"
-// @Router /backends/llama-cpp/parse-command [post]
+// @Router /api/v1/backends/llama-cpp/parse-command [post]
 func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		parsedOptions, ok := parseHelper(w, r, &backends.LlamaServerOptions{})
@@ -180,14 +190,14 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
 // ParseMlxCommand godoc
 // @Summary Parse mlx_lm.server command
 // @Description Parses MLX-LM server command string into instance options
-// @Tags backends
+// @Tags Backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.Options "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
-// @Router /backends/mlx/parse-command [post]
+// @Router /api/v1/backends/mlx/parse-command [post]
 func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		parsedOptions, ok := parseHelper(w, r, &backends.MlxServerOptions{})
@@ -209,14 +219,14 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 // ParseVllmCommand godoc
 // @Summary Parse vllm serve command
 // @Description Parses a vLLM serve command string into instance options
-// @Tags backends
+// @Tags Backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.Options "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
-// @Router /backends/vllm/parse-command [post]
+// @Router /api/v1/backends/vllm/parse-command [post]
 func (h *Handler) ParseVllmCommand() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		parsedOptions, ok := parseHelper(w, r, &backends.VllmServerOptions{})
@@ -251,12 +261,12 @@ func (h *Handler) executeLlamaServerCommand(flag, errorMsg string) http.HandlerF
 // LlamaServerHelpHandler godoc
 // @Summary Get help for llama server
 // @Description Returns the help text for the llama server command
-// @Tags backends
+// @Tags Backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Help text"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/help [get]
+// @Router /api/v1/backends/llama-cpp/help [get]
 func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--help", "Failed to get help")
 }
@@ -264,12 +274,12 @@ func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
 // LlamaServerVersionHandler godoc
 // @Summary Get version of llama server
 // @Description Returns the version of the llama server command
-// @Tags backends
+// @Tags Backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/version [get]
+// @Router /api/v1/backends/llama-cpp/version [get]
 func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--version", "Failed to get version")
 }
@@ -277,12 +287,12 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
 // LlamaServerListDevicesHandler godoc
 // @Summary List available devices for llama server
 // @Description Returns a list of available devices for the llama server
-// @Tags backends
+// @Tags Backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "List of devices"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/devices [get]
+// @Router /api/v1/backends/llama-cpp/devices [get]
 func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
 }
diff --git a/pkg/server/handlers_instances.go b/pkg/server/handlers_instances.go
index 24fe3e7..0480f22 100644
--- a/pkg/server/handlers_instances.go
+++ b/pkg/server/handlers_instances.go
@@ -16,12 +16,12 @@ import (
 // ListInstances godoc
 // @Summary List all instances
 // @Description Returns a list of all instances managed by the server
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
-// @Success 200 {array} instance.Process "List of instances"
+// @Success 200 {array} instance.Instance "List of instances"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances [get]
+// @Router /api/v1/instances [get]
 func (h *Handler) ListInstances() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		instances, err := h.InstanceManager.ListInstances()
@@ -37,16 +37,16 @@ func (h *Handler) ListInstances() http.HandlerFunc {
 // CreateInstance godoc
 // @Summary Create and start a new instance
 // @Description Creates a new instance with the provided configuration options
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Param options body instance.Options true "Instance configuration options"
-// @Success 201 {object} instance.Process "Created instance details"
+// @Success 201 {object} instance.Instance "Created instance details"
 // @Failure 400 {string} string "Invalid request body"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [post]
+// @Router /api/v1/instances/{name} [post]
 func (h *Handler) CreateInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -75,14 +75,14 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
 // GetInstance godoc
 // @Summary Get details of a specific instance
 // @Description Returns the details of a specific instance by name
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Instance details"
+// @Success 200 {object} instance.Instance "Instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [get]
+// @Router /api/v1/instances/{name} [get]
 func (h *Handler) GetInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -105,16 +105,16 @@ func (h *Handler) GetInstance() http.HandlerFunc {
 // UpdateInstance godoc
 // @Summary Update an instance's configuration
 // @Description Updates the configuration of a specific instance by name
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Param options body instance.Options true "Instance configuration options"
-// @Success 200 {object} instance.Process "Updated instance details"
+// @Success 200 {object} instance.Instance "Updated instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [put]
+// @Router /api/v1/instances/{name} [put]
 func (h *Handler) UpdateInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -143,14 +143,14 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
 // StartInstance godoc
 // @Summary Start a stopped instance
 // @Description Starts a specific instance by name
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Started instance details"
+// @Success 200 {object} instance.Instance "Started instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/start [post]
+// @Router /api/v1/instances/{name}/start [post]
 func (h *Handler) StartInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -179,14 +179,14 @@ func (h *Handler) StartInstance() http.HandlerFunc {
 // StopInstance godoc
 // @Summary Stop a running instance
 // @Description Stops a specific instance by name
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Stopped instance details"
+// @Success 200 {object} instance.Instance "Stopped instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/stop [post]
+// @Router /api/v1/instances/{name}/stop [post]
 func (h *Handler) StopInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -209,14 +209,14 @@ func (h *Handler) StopInstance() http.HandlerFunc {
 // RestartInstance godoc
 // @Summary Restart a running instance
 // @Description Restarts a specific instance by name
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Restarted instance details"
+// @Success 200 {object} instance.Instance "Restarted instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/restart [post]
+// @Router /api/v1/instances/{name}/restart [post]
 func (h *Handler) RestartInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -239,13 +239,13 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
 // DeleteInstance godoc
 // @Summary Delete an instance
 // @Description Stops and removes a specific instance by name
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 204 "No Content"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [delete]
+// @Router /api/v1/instances/{name} [delete]
 func (h *Handler) DeleteInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -267,7 +267,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
 // GetInstanceLogs godoc
 // @Summary Get logs from a specific instance
 // @Description Returns the logs from a specific instance by name with optional line limit
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Param lines query string false "Number of lines to retrieve (default: all lines)"
@@ -275,7 +275,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
 // @Success 200 {string} string "Instance logs"
 // @Failure 400 {string} string "Invalid name format or lines parameter"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/logs [get]
+// @Router /api/v1/instances/{name}/logs [get]
 func (h *Handler) GetInstanceLogs() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
@@ -310,15 +310,15 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
 // InstanceProxy godoc
 // @Summary Proxy requests to a specific instance, does not autostart instance if stopped
 // @Description Forwards HTTP requests to the llama-server instance running on a specific port
-// @Tags instances
+// @Tags Instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 200 "Request successfully proxied to instance"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Failure 503 {string} string "Instance is not running"
-// @Router /instances/{name}/proxy [get]
-// @Router /instances/{name}/proxy [post]
+// @Router /api/v1/instances/{name}/proxy [get]
+// @Router /api/v1/instances/{name}/proxy [post]
 func (h *Handler) InstanceProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		inst, err := h.getInstance(r)
diff --git a/pkg/server/handlers_nodes.go b/pkg/server/handlers_nodes.go
index 7c84b0a..4aef53a 100644
--- a/pkg/server/handlers_nodes.go
+++ b/pkg/server/handlers_nodes.go
@@ -14,12 +14,12 @@ type NodeResponse struct {
 // ListNodes godoc
 // @Summary List all configured nodes
 // @Description Returns a map of all nodes configured in the server (node name -> node config)
-// @Tags nodes
+// @Tags Nodes
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {object} map[string]NodeResponse "Map of nodes"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /nodes [get]
+// @Router /api/v1/nodes [get]
 func (h *Handler) ListNodes() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		// Convert to sanitized response format (map of name -> NodeResponse)
@@ -37,7 +37,7 @@ func (h *Handler) ListNodes() http.HandlerFunc {
 // GetNode godoc
 // @Summary Get details of a specific node
 // @Description Returns the details of a specific node by name
-// @Tags nodes
+// @Tags Nodes
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Node Name"
@@ -45,7 +45,7 @@ func (h *Handler) ListNodes() http.HandlerFunc {
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 404 {string} string "Node not found"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /nodes/{name} [get]
+// @Router /api/v1/nodes/{name} [get]
 func (h *Handler) GetNode() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
diff --git a/pkg/server/handlers_openai.go b/pkg/server/handlers_openai.go
index 35ac746..d221200 100644
--- a/pkg/server/handlers_openai.go
+++ b/pkg/server/handlers_openai.go
@@ -25,7 +25,7 @@ type OpenAIInstance struct {
 // OpenAIListInstances godoc
 // @Summary List instances in OpenAI-compatible format
 // @Description Returns a list of instances in a format compatible with OpenAI API
-// @Tags openai
+// @Tags OpenAI
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
@@ -61,7 +61,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
 // OpenAIProxy godoc
 // @Summary OpenAI-compatible proxy endpoint
 // @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
-// @Tags openai
+// @Tags OpenAI
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
diff --git a/pkg/server/handlers_system.go b/pkg/server/handlers_system.go
index 2e61288..46410f3 100644
--- a/pkg/server/handlers_system.go
+++ b/pkg/server/handlers_system.go
@@ -8,12 +8,12 @@ import (
 // VersionHandler godoc
 // @Summary Get llamactl version
 // @Description Returns the version of the llamactl command
-// @Tags version
+// @Tags System
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
-// @Router /version [get]
+// @Router /api/v1/version [get]
 func (h *Handler) VersionHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		versionInfo := fmt.Sprintf("Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
diff --git a/pkg/server/routes.go b/pkg/server/routes.go
index ffe89ec..6587601 100644
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -8,7 +8,7 @@ import (
 	"github.com/go-chi/cors"
 	httpSwagger "github.com/swaggo/http-swagger"
 
-	_ "llamactl/apidocs"
+	_ "llamactl/docs"
 	"llamactl/webui"
 )