diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..8df96b6
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,65 @@
+name: Build and Deploy Documentation
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - 'docs-requirements.txt'
+      - '.github/workflows/docs.yml'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - 'docs-requirements.txt'
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Needed for git-revision-date-localized plugin
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          pip install -r docs-requirements.txt
+
+      - name: Build documentation
+        run: |
+          mkdocs build --strict
+
+      - name: Upload documentation artifact
+        if: github.ref == 'refs/heads/main'
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: ./site
+
+  deploy:
+    if: github.ref == 'refs/heads/main'
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 78c8613..1f4a50e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -129,6 +129,50 @@ Use this format for pull request titles:
 - Use meaningful component and variable names
 - Prefer functional components over class components
 
+## Documentation Development
+
+This project uses MkDocs for documentation. When working on documentation:
+
+### Setup Documentation Environment
+
+```bash
+# Install documentation dependencies
+pip install -r docs-requirements.txt
+```
+
+### Development Workflow
+
+```bash
+# Serve documentation locally for development
+mkdocs serve
+```
+The documentation will be available at http://localhost:8000
+
+```bash
+# Build static documentation site
+mkdocs build
+```
+The built site will be in the `site/` directory.
+
+### Documentation Structure
+
+- `docs/` - Documentation content (Markdown files)
+- `mkdocs.yml` - MkDocs configuration
+- `docs-requirements.txt` - Python dependencies for documentation
+
+### Adding New Documentation
+
+When adding new documentation:
+
+1. Create Markdown files in the appropriate `docs/` subdirectory
+2. Update the navigation in `mkdocs.yml`
+3. Test locally with `mkdocs serve`
+4. Submit a pull request
+
+### Documentation Deployment
+
+Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
+
 ## Getting Help
 
 - Check existing [issues](https://github.com/lordmathis/llamactl/issues)
diff --git a/README.md b/README.md
index d9edfd5..a2a1e48 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
 💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
 💾 **State Persistence**: Ensure instances remain intact across server restarts  
 
-![Dashboard Screenshot](docs/images/screenshot.png)
+![Dashboard Screenshot](docs/images/dashboard.png)
 
 **Choose llamactl if**: You need authentication, health monitoring, auto-restart, and centralized management of multiple llama-server instances  
 **Choose Ollama if**: You want the simplest setup with strong community ecosystem and third-party integrations  
@@ -123,7 +123,6 @@ instances:
   on_demand_start_timeout: 120   # Default on-demand start timeout in seconds
   timeout_check_interval: 5      # Idle instance timeout check in minutes
 
-
 auth:
   require_inference_auth: true   # Require auth for inference endpoints
   inference_keys: []             # Keys for inference endpoints
@@ -131,107 +130,7 @@ auth:
   management_keys: []            # Keys for management endpoints
 ```
 
-<details><summary><strong>Full Configuration Guide</strong></summary>
-
-llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:  
-
-```
-Defaults < Configuration file < Environment variables
-```
-
-### Configuration Files
-
-#### Configuration File Locations
-
-Configuration files are searched in the following locations (in order of precedence):
-
-**Linux/macOS:**
-- `./llamactl.yaml` or `./config.yaml` (current directory)
-- `$HOME/.config/llamactl/config.yaml`
-- `/etc/llamactl/config.yaml`
-
-**Windows:**
-- `./llamactl.yaml` or `./config.yaml` (current directory)
-- `%APPDATA%\llamactl\config.yaml`
-- `%USERPROFILE%\llamactl\config.yaml`
-- `%PROGRAMDATA%\llamactl\config.yaml`
-
-You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
-
-### Configuration Options
-
-#### Server Configuration
-
-```yaml
-server:
-  host: "0.0.0.0"         # Server host to bind to (default: "0.0.0.0")
-  port: 8080              # Server port to bind to (default: 8080)
-  allowed_origins: ["*"]  # CORS allowed origins (default: ["*"])
-  enable_swagger: false   # Enable Swagger UI (default: false)
-```
-
-**Environment Variables:**
-- `LLAMACTL_HOST` - Server host
-- `LLAMACTL_PORT` - Server port
-- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
-- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
-
-#### Instance Configuration
-
-```yaml
-instances:
-  port_range: [8000, 9000]                          # Port range for instances (default: [8000, 9000])
-  data_dir: "~/.local/share/llamactl"               # Directory for all llamactl data (default varies by OS)
-  configs_dir: "~/.local/share/llamactl/instances"  # Directory for instance configs (default: data_dir/instances)
-  logs_dir: "~/.local/share/llamactl/logs"          # Directory for instance logs (default: data_dir/logs)
-  auto_create_dirs: true                            # Automatically create data/config/logs directories (default: true)
-  max_instances: -1                                 # Maximum instances (-1 = unlimited)
-  max_running_instances: -1                         # Maximum running instances (-1 = unlimited)
-  enable_lru_eviction: true                         # Enable LRU eviction for idle instances
-  llama_executable: "llama-server"                  # Path to llama-server executable
-  default_auto_restart: true                        # Default auto-restart setting
-  default_max_restarts: 3                           # Default maximum restart attempts
-  default_restart_delay: 5                          # Default restart delay in seconds
-  default_on_demand_start: true                     # Default on-demand start setting
-  on_demand_start_timeout: 120                      # Default on-demand start timeout in seconds
-  timeout_check_interval: 5                         # Default instance timeout check interval in minutes
-```
-
-**Environment Variables:**
-- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")
-- `LLAMACTL_DATA_DIRECTORY` - Data directory path
-- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path
-- `LLAMACTL_LOGS_DIR` - Log directory path
-- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)
-- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances
-- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances
-- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances
-- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
-- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)
-- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
-- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
-- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)
-- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
-- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
-
-
-#### Authentication Configuration
-
-```yaml
-auth:
-  require_inference_auth: true           # Require API key for OpenAI endpoints (default: true)
-  inference_keys: []                     # List of valid inference API keys
-  require_management_auth: true          # Require API key for management endpoints (default: true)
-  management_keys: []                    # List of valid management API keys
-```
-
-**Environment Variables:**
-- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
-- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
-- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
-- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
-
-</details>
+For detailed configuration options including environment variables, file locations, and advanced settings, see the [Configuration Guide](docs/getting-started/configuration.md).
 
 ## License
 
diff --git a/apidocs/docs.go b/apidocs/docs.go
index 78bd3c0..7ea502e 100644
--- a/apidocs/docs.go
+++ b/apidocs/docs.go
@@ -884,6 +884,10 @@ const docTemplate = `{
                 "host": {
                     "type": "string"
                 },
+                "idle_timeout": {
+                    "description": "Idle timeout",
+                    "type": "integer"
+                },
                 "ignore_eos": {
                     "type": "boolean"
                 },
@@ -1018,6 +1022,10 @@ const docTemplate = `{
                 "numa": {
                     "type": "string"
                 },
+                "on_demand_start": {
+                    "description": "On demand start",
+                    "type": "boolean"
+                },
                 "override_kv": {
                     "type": "array",
                     "items": {
@@ -1078,8 +1086,7 @@ const docTemplate = `{
                 "reranking": {
                     "type": "boolean"
                 },
-                "restart_delay_seconds": {
-                    "description": "RestartDelay duration in seconds",
+                "restart_delay": {
                     "type": "integer"
                 },
                 "rope_freq_base": {
@@ -1194,6 +1201,19 @@ const docTemplate = `{
                 }
             }
         },
+        "instance.InstanceStatus": {
+            "type": "integer",
+            "enum": [
+                0,
+                1,
+                2
+            ],
+            "x-enum-varnames": [
+                "Stopped",
+                "Running",
+                "Failed"
+            ]
+        },
         "instance.Process": {
             "type": "object",
             "properties": {
@@ -1204,9 +1224,13 @@ const docTemplate = `{
                 "name": {
                     "type": "string"
                 },
-                "running": {
+                "status": {
                     "description": "Status",
-                    "type": "boolean"
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/instance.InstanceStatus"
+                        }
+                    ]
                 }
             }
         },
diff --git a/apidocs/swagger.json b/apidocs/swagger.json
index 95493f1..be8d193 100644
--- a/apidocs/swagger.json
+++ b/apidocs/swagger.json
@@ -877,6 +877,10 @@
                 "host": {
                     "type": "string"
                 },
+                "idle_timeout": {
+                    "description": "Idle timeout",
+                    "type": "integer"
+                },
                 "ignore_eos": {
                     "type": "boolean"
                 },
@@ -1011,6 +1015,10 @@
                 "numa": {
                     "type": "string"
                 },
+                "on_demand_start": {
+                    "description": "On demand start",
+                    "type": "boolean"
+                },
                 "override_kv": {
                     "type": "array",
                     "items": {
@@ -1071,8 +1079,7 @@
                 "reranking": {
                     "type": "boolean"
                 },
-                "restart_delay_seconds": {
-                    "description": "RestartDelay duration in seconds",
+                "restart_delay": {
                     "type": "integer"
                 },
                 "rope_freq_base": {
@@ -1187,6 +1194,19 @@
                 }
             }
         },
+        "instance.InstanceStatus": {
+            "type": "integer",
+            "enum": [
+                0,
+                1,
+                2
+            ],
+            "x-enum-varnames": [
+                "Stopped",
+                "Running",
+                "Failed"
+            ]
+        },
         "instance.Process": {
             "type": "object",
             "properties": {
@@ -1197,9 +1217,13 @@
                 "name": {
                     "type": "string"
                 },
-                "running": {
+                "status": {
                     "description": "Status",
-                    "type": "boolean"
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/instance.InstanceStatus"
+                        }
+                    ]
                 }
             }
         },
diff --git a/apidocs/swagger.yaml b/apidocs/swagger.yaml
index c32e7f5..bc6e4ec 100644
--- a/apidocs/swagger.yaml
+++ b/apidocs/swagger.yaml
@@ -136,6 +136,9 @@ definitions:
         type: string
       host:
         type: string
+      idle_timeout:
+        description: Idle timeout
+        type: integer
       ignore_eos:
         type: boolean
       jinja:
@@ -226,6 +229,9 @@ definitions:
         type: boolean
       numa:
         type: string
+      on_demand_start:
+        description: On demand start
+        type: boolean
       override_kv:
         items:
           type: string
@@ -266,8 +272,7 @@ definitions:
         type: number
       reranking:
         type: boolean
-      restart_delay_seconds:
-        description: RestartDelay duration in seconds
+      restart_delay:
         type: integer
       rope_freq_base:
         type: number
@@ -344,6 +349,16 @@ definitions:
       yarn_orig_ctx:
         type: integer
     type: object
+  instance.InstanceStatus:
+    enum:
+    - 0
+    - 1
+    - 2
+    type: integer
+    x-enum-varnames:
+    - Stopped
+    - Running
+    - Failed
   instance.Process:
     properties:
       created:
@@ -351,9 +366,10 @@ definitions:
         type: integer
       name:
         type: string
-      running:
+      status:
+        allOf:
+        - $ref: '#/definitions/instance.InstanceStatus'
         description: Status
-        type: boolean
     type: object
   server.OpenAIInstance:
     properties:
diff --git a/docs-requirements.txt b/docs-requirements.txt
new file mode 100644
index 0000000..256e652
--- /dev/null
+++ b/docs-requirements.txt
@@ -0,0 +1,4 @@
+mkdocs-material==9.5.3
+mkdocs==1.5.3
+pymdown-extensions==10.7
+mkdocs-git-revision-date-localized-plugin==1.2.4
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
new file mode 100644
index 0000000..64b097a
--- /dev/null
+++ b/docs/getting-started/configuration.md
@@ -0,0 +1,150 @@
+# Configuration
+
+llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
+
+```
+Defaults < Configuration file < Environment variables
+```
+
+llamactl works out of the box with sensible defaults, but you can customize the behavior to suit your needs.
+
+## Default Configuration
+
+Here's the default configuration with all available options:
+
+```yaml
+server:
+  host: "0.0.0.0"                # Server host to bind to
+  port: 8080                     # Server port to bind to
+  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
+  enable_swagger: false          # Enable Swagger UI for API docs
+
+instances:
+  port_range: [8000, 9000]       # Port range for instances
+  data_dir: ~/.local/share/llamactl         # Data directory (platform-specific, see below)
+  configs_dir: ~/.local/share/llamactl/instances  # Instance configs directory
+  logs_dir: ~/.local/share/llamactl/logs    # Logs directory
+  auto_create_dirs: true         # Auto-create data/config/logs dirs if missing
+  max_instances: -1              # Max instances (-1 = unlimited)
+  max_running_instances: -1      # Max running instances (-1 = unlimited)
+  enable_lru_eviction: true      # Enable LRU eviction for idle instances
+  llama_executable: llama-server # Path to llama-server executable
+  default_auto_restart: true     # Auto-restart new instances by default
+  default_max_restarts: 3        # Max restarts for new instances
+  default_restart_delay: 5       # Restart delay (seconds) for new instances
+  default_on_demand_start: true  # Default on-demand start setting
+  on_demand_start_timeout: 120   # Default on-demand start timeout in seconds
+  timeout_check_interval: 5      # Idle instance timeout check in minutes
+
+auth:
+  require_inference_auth: true   # Require auth for inference endpoints
+  inference_keys: []             # Keys for inference endpoints
+  require_management_auth: true  # Require auth for management endpoints
+  management_keys: []            # Keys for management endpoints
+```
+
+## Configuration Files
+
+### Configuration File Locations
+
+Configuration files are searched in the following locations (in order of precedence):
+
+**Linux:**  
+- `./llamactl.yaml` or `./config.yaml` (current directory)  
+- `$HOME/.config/llamactl/config.yaml`  
+- `/etc/llamactl/config.yaml`  
+
+**macOS:**  
+- `./llamactl.yaml` or `./config.yaml` (current directory)  
+- `$HOME/Library/Application Support/llamactl/config.yaml`  
+- `/Library/Application Support/llamactl/config.yaml`  
+
+**Windows:**  
+- `./llamactl.yaml` or `./config.yaml` (current directory)  
+- `%APPDATA%\llamactl\config.yaml`  
+- `%USERPROFILE%\llamactl\config.yaml`  
+- `%PROGRAMDATA%\llamactl\config.yaml`  
+
+You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable.
+
+## Configuration Options
+
+### Server Configuration
+
+```yaml
+server:
+  host: "0.0.0.0"         # Server host to bind to (default: "0.0.0.0")
+  port: 8080              # Server port to bind to (default: 8080)
+  allowed_origins: ["*"]  # CORS allowed origins (default: ["*"])
+  enable_swagger: false   # Enable Swagger UI (default: false)
+```
+
+**Environment Variables:**  
+- `LLAMACTL_HOST` - Server host  
+- `LLAMACTL_PORT` - Server port  
+- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins  
+- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)  
+
+### Instance Configuration
+
+```yaml
+instances:
+  port_range: [8000, 9000]                          # Port range for instances (default: [8000, 9000])
+  data_dir: "~/.local/share/llamactl"               # Directory for all llamactl data (default varies by OS)
+  configs_dir: "~/.local/share/llamactl/instances"  # Directory for instance configs (default: data_dir/instances)
+  logs_dir: "~/.local/share/llamactl/logs"          # Directory for instance logs (default: data_dir/logs)
+  auto_create_dirs: true                            # Automatically create data/config/logs directories (default: true)
+  max_instances: -1                                 # Maximum instances (-1 = unlimited)
+  max_running_instances: -1                         # Maximum running instances (-1 = unlimited)
+  enable_lru_eviction: true                         # Enable LRU eviction for idle instances
+  llama_executable: "llama-server"                  # Path to llama-server executable
+  default_auto_restart: true                        # Default auto-restart setting
+  default_max_restarts: 3                           # Default maximum restart attempts
+  default_restart_delay: 5                          # Default restart delay in seconds
+  default_on_demand_start: true                     # Default on-demand start setting
+  on_demand_start_timeout: 120                      # Default on-demand start timeout in seconds
+  timeout_check_interval: 5                         # Default instance timeout check interval in minutes
+```
+
+**Environment Variables:**  
+- `LLAMACTL_INSTANCE_PORT_RANGE` - Port range (format: "8000-9000" or "8000,9000")  
+- `LLAMACTL_DATA_DIRECTORY` - Data directory path  
+- `LLAMACTL_INSTANCES_DIR` - Instance configs directory path  
+- `LLAMACTL_LOGS_DIR` - Log directory path  
+- `LLAMACTL_AUTO_CREATE_DATA_DIR` - Auto-create data/config/logs directories (true/false)  
+- `LLAMACTL_MAX_INSTANCES` - Maximum number of instances  
+- `LLAMACTL_MAX_RUNNING_INSTANCES` - Maximum number of running instances  
+- `LLAMACTL_ENABLE_LRU_EVICTION` - Enable LRU eviction for idle instances  
+- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable  
+- `LLAMACTL_DEFAULT_AUTO_RESTART` - Default auto-restart setting (true/false)  
+- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts  
+- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds  
+- `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)  
+- `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds  
+- `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes  
+
+### Authentication Configuration
+
+```yaml
+auth:
+  require_inference_auth: true           # Require API key for OpenAI endpoints (default: true)
+  inference_keys: []                     # List of valid inference API keys
+  require_management_auth: true          # Require API key for management endpoints (default: true)
+  management_keys: []                    # List of valid management API keys
+```
+
+**Environment Variables:**  
+- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)  
+- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys  
+- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)  
+- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys  
+
+## Command Line Options
+
+View all available command line options:
+
+```bash
+llamactl --help
+```
+
+You can also override configuration using command line flags when starting llamactl.
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
new file mode 100644
index 0000000..90f78a8
--- /dev/null
+++ b/docs/getting-started/installation.md
@@ -0,0 +1,70 @@
+# Installation
+
+This guide will walk you through installing Llamactl on your system.
+
+## Prerequisites
+
+You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
+
+
+**Quick install methods:**
+
+```bash
+# Homebrew (macOS/Linux)
+brew install llama.cpp
+# Winget (Windows)
+winget install llama.cpp
+```
+
+Or build from source - see llama.cpp docs
+
+## Installation Methods
+
+### Option 1: Download Binary (Recommended)
+
+Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
+
+```bash
+# Linux/macOS - Get latest version and download
+LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
+curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
+sudo mv llamactl /usr/local/bin/
+
+# Or download manually from:
+# https://github.com/lordmathis/llamactl/releases/latest
+
+# Windows - Download from releases page
+```
+
+### Option 2: Build from Source
+
+Requirements:
+- Go 1.24 or later
+- Node.js 22 or later
+- Git
+
+If you prefer to build from source:
+
+```bash
+# Clone the repository
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+
+# Build the web UI
+cd webui && npm ci && npm run build && cd ..
+
+# Build the application
+go build -o llamactl ./cmd/server
+```
+
+## Verification
+
+Verify your installation by checking the version:
+
+```bash
+llamactl --version
+```
+
+## Next Steps
+
+Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md
new file mode 100644
index 0000000..4de1065
--- /dev/null
+++ b/docs/getting-started/quick-start.md
@@ -0,0 +1,143 @@
+# Quick Start
+
+This guide will help you get Llamactl up and running in just a few minutes.
+
+## Step 1: Start Llamactl
+
+Start the Llamactl server:
+
+```bash
+llamactl
+```
+
+By default, Llamactl will start on `http://localhost:8080`.
+
+## Step 2: Access the Web UI
+
+Open your web browser and navigate to:
+
+```
+http://localhost:8080
+```
+
+Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
+
+You should see the Llamactl web interface.
+
+## Step 3: Create Your First Instance
+
+1. Click the "Add Instance" button
+2. Fill in the instance configuration:
+   - **Name**: Give your instance a descriptive name
+   - **Model Path**: Path to your Llama.cpp model file
+   - **Additional Options**: Any extra Llama.cpp parameters
+
+3. Click "Create Instance"
+
+## Step 4: Start Your Instance
+
+Once created, you can:
+
+- **Start** the instance by clicking the start button
+- **Monitor** its status in real-time
+- **View logs** by clicking the logs button
+- **Stop** the instance when needed
+
+## Example Configuration
+
+Here's a basic example configuration for a Llama 2 model:
+
+```json
+{
+  "name": "llama2-7b",
+  "model_path": "/path/to/llama-2-7b-chat.gguf",
+  "options": {
+    "threads": 4,
+    "context_size": 2048
+  }
+}
+```
+
+## Using the API
+
+You can also manage instances via the REST API:
+
+```bash
+# List all instances
+curl http://localhost:8080/api/instances
+
+# Create a new instance
+curl -X POST http://localhost:8080/api/instances \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "my-model",
+    "model_path": "/path/to/model.gguf",
+  }'
+
+# Start an instance
+curl -X POST http://localhost:8080/api/instances/my-model/start
+```
+
+## OpenAI Compatible API
+
+Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
+
+### Chat Completions
+
+Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
+
+```bash
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "my-model",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Hello! Can you help me write a Python function?"
+      }
+    ],
+    "max_tokens": 150,
+    "temperature": 0.7
+  }'
+```
+
+### Using with Python OpenAI Client
+
+You can also use the official OpenAI Python client:
+
+```python
+from openai import OpenAI
+
+# Point the client to your Llamactl server
+client = OpenAI(
+    base_url="http://localhost:8080/v1",
+    api_key="not-needed"  # Llamactl doesn't require API keys by default
+)
+
+# Create a chat completion
+response = client.chat.completions.create(
+    model="my-model",  # Use the name of your instance
+    messages=[
+        {"role": "user", "content": "Explain quantum computing in simple terms"}
+    ],
+    max_tokens=200,
+    temperature=0.7
+)
+
+print(response.choices[0].message.content)
+```
+
+### List Available Models
+
+Get a list of running instances (models) in OpenAI-compatible format:
+
+```bash
+curl http://localhost:8080/v1/models
+```
+
+## Next Steps
+
+- Manage instances [Managing Instances](../user-guide/managing-instances.md)
+- Explore the [API Reference](../user-guide/api-reference.md)
+- Configure advanced settings in the [Configuration](configuration.md) guide
diff --git a/docs/images/create_instance.png b/docs/images/create_instance.png
new file mode 100644
index 0000000..c1ce856
Binary files /dev/null and b/docs/images/create_instance.png differ
diff --git a/docs/images/dashboard.png b/docs/images/dashboard.png
new file mode 100644
index 0000000..01cea2a
Binary files /dev/null and b/docs/images/dashboard.png differ
diff --git a/docs/images/screenshot.png b/docs/images/screenshot.png
deleted file mode 100644
index 1c77ed2..0000000
Binary files a/docs/images/screenshot.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..d3e7bb9
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,41 @@
+# Llamactl Documentation
+
+Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp instances with OpenAI-compatible API routing.**
+
+![Dashboard Screenshot](images/dashboard.png)
+
+## What is Llamactl?
+
+Llamactl is designed to simplify the deployment and management of llama-server instances. It provides a modern solution for running multiple large language models with centralized management.
+
+## Features
+
+🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)  
+🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name  
+🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)  
+🔐 **API Key Authentication**: Separate keys for management vs inference access  
+📊 **Instance Monitoring**: Health checks, auto-restart, log management  
+⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
+💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests  
+💾 **State Persistence**: Ensure instances remain intact across server restarts  
+
+## Quick Links
+
+- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
+- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
+- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
+- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
+- [API Reference](user-guide/api-reference.md) - Complete API documentation
+
+
+## Getting Help
+
+If you need help or have questions:
+
+- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
+- Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
+- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
+
+## License
+
+MIT License - see the [LICENSE](https://github.com/lordmathis/llamactl/blob/main/LICENSE) file.
diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md
new file mode 100644
index 0000000..3f99e53
--- /dev/null
+++ b/docs/user-guide/api-reference.md
@@ -0,0 +1,412 @@
+# API Reference
+
+Complete reference for the Llamactl REST API.
+
+## Base URL
+
+All API endpoints are relative to the base URL:
+
+```
+http://localhost:8080/api/v1
+```
+
+## Authentication
+
+Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
+
+```bash
+curl -H "Authorization: Bearer <your-api-key>" \
+  http://localhost:8080/api/v1/instances
+```
+
+The server supports two types of API keys:
+- **Management API Keys**: Required for instance management operations (CRUD operations on instances)
+- **Inference API Keys**: Required for OpenAI-compatible inference endpoints
+
+## System Endpoints
+
+### Get Llamactl Version
+
+Get the version information of the llamactl server.
+
+```http
+GET /api/v1/version
+```
+
+**Response:**
+```
+Version: 1.0.0
+Commit: abc123
+Build Time: 2024-01-15T10:00:00Z
+```
+
+### Get Llama Server Help
+
+Get help text for the llama-server command.
+
+```http
+GET /api/v1/server/help
+```
+
+**Response:** Plain text help output from `llama-server --help`
+
+### Get Llama Server Version
+
+Get version information of the llama-server binary.
+
+```http
+GET /api/v1/server/version
+```
+
+**Response:** Plain text version output from `llama-server --version`
+
+### List Available Devices
+
+List available devices for llama-server.
+
+```http
+GET /api/v1/server/devices
+```
+
+**Response:** Plain text device list from `llama-server --list-devices`
+
+## Instances
+
+### List All Instances
+
+Get a list of all instances.
+
+```http
+GET /api/v1/instances
+```
+
+**Response:**
+```json
+[
+  {
+    "name": "llama2-7b",
+    "status": "running",
+    "created": 1705312200
+  }
+]
+```
+
+### Get Instance Details
+
+Get detailed information about a specific instance.
+
+```http
+GET /api/v1/instances/{name}
+```
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "running",
+  "created": 1705312200
+}
+```
+
+### Create Instance
+
+Create and start a new instance.
+
+```http
+POST /api/v1/instances/{name}
+```
+
+**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "running",
+  "created": 1705312200
+}
+```
+
+### Update Instance
+
+Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
+
+```http
+PUT /api/v1/instances/{name}
+```
+
+**Request Body:** JSON object with configuration fields to update.
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "running",
+  "created": 1705312200
+}
+```
+
+### Delete Instance
+
+Stop and remove an instance.
+
+```http
+DELETE /api/v1/instances/{name}
+```
+
+**Response:** `204 No Content`
+
+## Instance Operations
+
+### Start Instance
+
+Start a stopped instance.
+
+```http
+POST /api/v1/instances/{name}/start
+```
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "starting",
+  "created": 1705312200
+}
+```
+
+**Error Responses:**
+- `409 Conflict`: Maximum number of running instances reached
+- `500 Internal Server Error`: Failed to start instance
+
+### Stop Instance
+
+Stop a running instance.
+
+```http
+POST /api/v1/instances/{name}/stop
+```
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "stopping",
+  "created": 1705312200
+}
+```
+
+### Restart Instance
+
+Restart an instance (stop then start).
+
+```http
+POST /api/v1/instances/{name}/restart
+```
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "restarting",
+  "created": 1705312200
+}
+```
+
+### Get Instance Logs
+
+Retrieve instance logs.
+
+```http
+GET /api/v1/instances/{name}/logs
+```
+
+**Query Parameters:**
+- `lines`: Number of lines to return (default: all lines, use -1 for all)
+
+**Response:** Plain text log output
+
+**Example:**
+```bash
+curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
+```
+
+### Proxy to Instance
+
+Proxy HTTP requests directly to the llama-server instance.
+
+```http
+GET /api/v1/instances/{name}/proxy/*
+POST /api/v1/instances/{name}/proxy/*
+```
+
+This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
+
+**Example - Check Instance Health:**
+```bash
+curl -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/my-model/proxy/health
+```
+
+This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
+
+**Error Responses:**
+- `503 Service Unavailable`: Instance is not running
+
+## OpenAI-Compatible API
+
+Llamactl provides OpenAI-compatible endpoints for inference operations.
+
+### List Models
+
+List all instances in OpenAI-compatible format.
+
+```http
+GET /v1/models
+```
+
+**Response:**
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "llama2-7b",
+      "object": "model",
+      "created": 1705312200,
+      "owned_by": "llamactl"
+    }
+  ]
+}
+```
+
+### Chat Completions, Completions, Embeddings
+
+All OpenAI-compatible inference endpoints are available:
+
+```http
+POST /v1/chat/completions
+POST /v1/completions
+POST /v1/embeddings
+POST /v1/rerank
+POST /v1/reranking
+```
+
+**Request Body:** Standard OpenAI format with `model` field specifying the instance name
+
+**Example:**
+```json
+{
+  "model": "llama2-7b",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hello, how are you?"
+    }
+  ]
+}
+```
+
+The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
+
+**Error Responses:**
+- `400 Bad Request`: Invalid request body or missing model name
+- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
+- `409 Conflict`: Cannot start instance due to maximum instances limit
+
+## Instance Status Values
+
+Instances can have the following status values:  
+- `stopped`: Instance is not running  
+- `running`: Instance is running and ready to accept requests  
+- `failed`: Instance failed to start or crashed  
+
+## Error Responses
+
+All endpoints may return error responses in the following format:
+
+```json
+{
+  "error": "Error message description"
+}
+```
+
+### Common HTTP Status Codes
+
+- `200`: Success
+- `201`: Created
+- `204`: No Content (successful deletion)
+- `400`: Bad Request (invalid parameters or request body)
+- `401`: Unauthorized (missing or invalid API key)
+- `403`: Forbidden (insufficient permissions)
+- `404`: Not Found (instance not found)
+- `409`: Conflict (instance already exists, max instances reached)
+- `500`: Internal Server Error
+- `503`: Service Unavailable (instance not running)
+
+## Examples
+
+### Complete Instance Lifecycle
+
+```bash
+# Create and start instance
+curl -X POST http://localhost:8080/api/v1/instances/my-model \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-api-key" \
+  -d '{
+    "model": "/models/llama-2-7b.gguf"
+  }'
+
+# Check instance status
+curl -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/my-model
+
+# Get instance logs
+curl -H "Authorization: Bearer your-api-key" \
+  "http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
+
+# Use OpenAI-compatible chat completions
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-inference-api-key" \
+  -d '{
+    "model": "my-model",
+    "messages": [
+      {"role": "user", "content": "Hello!"}
+    ],
+    "max_tokens": 100
+  }'
+
+# Stop instance
+curl -X POST -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/my-model/stop
+
+# Delete instance
+curl -X DELETE -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/my-model
+```
+
+### Using the Proxy Endpoint
+
+You can also directly proxy requests to the llama-server instance:
+
+```bash
+# Direct proxy to instance (bypasses OpenAI compatibility layer)
+curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-api-key" \
+  -d '{
+    "prompt": "Hello, world!",
+    "n_predict": 50
+  }'
+```
+
+## Swagger Documentation
+
+If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
+
+```
+http://localhost:8080/swagger/
+```
+
+This provides a complete interactive interface for testing all API endpoints.
diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md
new file mode 100644
index 0000000..90e4552
--- /dev/null
+++ b/docs/user-guide/managing-instances.md
@@ -0,0 +1,190 @@
+# Managing Instances
+
+Learn how to effectively manage your Llama.cpp instances with Llamactl through both the Web UI and API.
+
+## Overview
+
+Llamactl provides two ways to manage instances:
+
+- **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
+- **REST API**: Programmatic access for automation and integration
+
+![Dashboard Screenshot](../images/dashboard.png)
+
+### Authentication
+
+If authentication is enabled:
+1. Navigate to the web UI
+2. Enter your credentials
+3. Bearer token is stored for the session
+
+### Theme Support
+
+- Switch between light and dark themes
+- Setting is remembered across sessions
+
+## Instance Cards
+
+Each instance is displayed as a card showing:
+
+- **Instance name**
+- **Health status badge** (unknown, ready, error, failed)
+- **Action buttons** (start, stop, edit, logs, delete)
+
+## Create Instance
+
+### Via Web UI
+
+![Create Instance Screenshot](../images/create_instance.png)
+
+1. Click the **"Create Instance"** button on the dashboard
+2. Enter a unique **Name** for your instance (only required field)
+3. Configure model source (choose one):
+    - **Model Path**: Full path to your downloaded GGUF model file
+    - **HuggingFace Repo**: Repository name (e.g., `unsloth/gemma-3-27b-it-GGUF`)
+    - **HuggingFace File**: Specific file within the repo (optional, uses default if not specified)
+4. Configure optional instance management settings:
+    - **Auto Restart**: Automatically restart instance on failure
+    - **Max Restarts**: Maximum number of restart attempts
+    - **Restart Delay**: Delay in seconds between restart attempts
+    - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
+    - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
+5. Configure optional llama-server backend options:
+    - **Threads**: Number of CPU threads to use
+    - **Context Size**: Context window size (ctx_size)
+    - **GPU Layers**: Number of layers to offload to GPU
+    - **Port**: Network port (auto-assigned by llamactl if not specified)
+    - **Additional Parameters**: Any other llama-server command line options (see [llama-server documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md))
+6. Click **"Create"** to save the instance  
+
+### Via API
+
+```bash
+# Create instance with local model file
+curl -X POST http://localhost:8080/api/instances/my-instance \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/path/to/model.gguf",
+      "threads": 8,
+      "ctx_size": 4096
+    }
+  }'
+
+# Create instance with HuggingFace model
+curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
+      "hf_file": "gemma-3-27b-it-GGUF.gguf",
+      "gpu_layers": 32
+    },
+    "auto_restart": true,
+    "max_restarts": 3
+  }'
+```
+
+## Start Instance
+
+### Via Web UI
+1. Click the **"Start"** button on an instance card
+2. Watch the status change to "Unknown"
+3. Monitor progress in the logs
+4. Instance status changes to "Ready" when ready
+
+### Via API
+```bash
+curl -X POST http://localhost:8080/api/instances/{name}/start
+```
+
+## Stop Instance
+
+### Via Web UI
+1. Click the **"Stop"** button on an instance card
+2. Instance gracefully shuts down
+
+### Via API
+```bash
+curl -X POST http://localhost:8080/api/instances/{name}/stop
+```
+
+## Edit Instance
+
+### Via Web UI
+1. Click the **"Edit"** button on an instance card
+2. Modify settings in the configuration dialog
+3. Changes require instance restart to take effect
+4. Click **"Update & Restart"** to apply changes
+
+### Via API
+Modify instance settings:
+
+```bash
+curl -X PUT http://localhost:8080/api/instances/{name} \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_options": {
+      "threads": 8,
+      "context_size": 4096
+    }
+  }'
+```
+
+!!! note
+    Configuration changes require restarting the instance to take effect.
+
+
+## View Logs
+
+### Via Web UI
+
+1. Click the **"Logs"** button on any instance card
+2. Real-time log viewer opens
+
+### Via API
+Check instance status in real-time:
+
+```bash
+# Get instance details
+curl http://localhost:8080/api/instances/{name}/logs
+```
+
+## Delete Instance
+
+### Via Web UI
+1. Click the **"Delete"** button on an instance card
+2. Only stopped instances can be deleted
+3. Confirm deletion in the dialog
+
+### Via API
+```bash
+curl -X DELETE http://localhost:8080/api/instances/{name}
+```
+
+## Instance Proxy
+
+Llamactl proxies all requests to the underlying llama-server instances.
+
+```bash
+# Get instance details
+curl http://localhost:8080/api/instances/{name}/proxy/
+```
+
+Check llama-server [docs](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for more information.
+
+### Instance Health
+
+#### Via Web UI
+
+1. The health status badge is displayed on each instance card
+
+#### Via API
+
+Check the health status of your instances:
+
+```bash
+curl http://localhost:8080/api/instances/{name}/proxy/health
+```
diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md
new file mode 100644
index 0000000..5608139
--- /dev/null
+++ b/docs/user-guide/troubleshooting.md
@@ -0,0 +1,160 @@
+# Troubleshooting
+
+Issues specific to Llamactl deployment and operation.
+
+## Configuration Issues
+
+### Invalid Configuration
+
+**Problem:** Invalid configuration preventing startup
+
+**Solutions:**
+1. Use minimal configuration:
+   ```yaml
+   server:
+     host: "0.0.0.0"
+     port: 8080
+   instances:
+     port_range: [8000, 9000]
+   ```
+
+2. Check data directory permissions:
+   ```bash
+   # Ensure data directory is writable (default: ~/.local/share/llamactl)
+   mkdir -p ~/.local/share/llamactl/{instances,logs}
+   ```
+
+## Instance Management Issues
+
+### Model Loading Failures
+
+**Problem:** Instance fails to start with model loading errors
+
+**Common Solutions:**  
+- **llama-server not found:** Ensure `llama-server` binary is in PATH  
+- **Wrong model format:** Ensure model is in GGUF format  
+- **Insufficient memory:** Use smaller model or reduce context size  
+- **Path issues:** Use absolute paths to model files  
+
+### Memory Issues
+
+**Problem:** Out of memory errors or system becomes unresponsive
+
+**Solutions:**
+1. **Reduce context size:**
+   ```json
+   {
+     "n_ctx": 1024
+   }
+   ```
+
+2. **Use quantized models:**  
+   - Try Q4_K_M instead of higher precision models  
+   - Use smaller model variants (7B instead of 13B)  
+
+### GPU Configuration
+
+**Problem:** GPU not being used effectively
+
+**Solutions:**
+1. **Configure GPU layers:**
+   ```json
+   {
+     "n_gpu_layers": 35
+   }
+   ```
+
+### Advanced Instance Issues
+
+**Problem:** Complex model loading, performance, or compatibility issues
+
+Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
+
+**Resources:**  
+- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)  
+- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)  
+- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)  
+
+**Testing directly with llama-server:**  
+```bash
+# Test your model and parameters directly with llama-server
+llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
+```
+
+This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
+
+## API and Network Issues
+
+### CORS Errors
+
+**Problem:** Web UI shows CORS errors in browser console
+
+**Solutions:**
+1. **Configure allowed origins:**
+   ```yaml
+   server:
+     allowed_origins:
+       - "http://localhost:3000"
+       - "https://yourdomain.com"
+   ```
+
+## Authentication Issues
+
+**Problem:** API requests failing with authentication errors
+
+**Solutions:**
+1. **Disable authentication temporarily:**
+   ```yaml
+   auth:
+     require_management_auth: false
+     require_inference_auth: false
+   ```
+
+2. **Configure API keys:**
+   ```yaml
+   auth:
+     management_keys:
+       - "your-management-key"
+     inference_keys:
+       - "your-inference-key"
+   ```
+
+3. **Use correct Authorization header:**
+   ```bash
+   curl -H "Authorization: Bearer your-api-key" \
+     http://localhost:8080/api/v1/instances
+   ```
+
+## Debugging and Logs
+
+### Viewing Instance Logs
+
+```bash
+# Get instance logs via API
+curl http://localhost:8080/api/v1/instances/{name}/logs
+
+# Or check log files directly
+tail -f ~/.local/share/llamactl/logs/{instance-name}.log
+```
+
+### Enable Debug Logging
+
+```bash
+export LLAMACTL_LOG_LEVEL=debug
+llamactl
+```
+
+## Getting Help
+
+When reporting issues, include:
+
+1. **System information:**
+   ```bash
+   llamactl --version
+   ```
+
+2. **Configuration file** (remove sensitive keys)
+
+3. **Relevant log output**
+
+4. **Steps to reproduce the issue**
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..ed4be3a
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,68 @@
+site_name: Llamatl Documentation
+site_description: User documentation for Llamatl - A management tool for Llama.cpp instances
+site_author: Llamatl Team
+site_url: https://llamactl.org
+
+repo_name: lordmathis/llamactl
+repo_url: https://github.com/lordmathis/llamactl
+
+theme:
+  name: material
+  palette:
+    # Palette toggle for light mode
+    - scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Palette toggle for dark mode
+    - scheme: slate
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+  features:
+    - navigation.tabs
+    - navigation.sections
+    - navigation.expand
+    - navigation.top
+    - search.highlight
+    - search.share
+    - content.code.copy
+
+markdown_extensions:
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+  - admonition
+  - pymdownx.details
+  - pymdownx.tabbed:
+      alternate_style: true
+  - attr_list
+  - md_in_html
+  - toc:
+      permalink: true
+
+nav:
+  - Home: index.md
+  - Getting Started:
+    - Installation: getting-started/installation.md
+    - Quick Start: getting-started/quick-start.md
+    - Configuration: getting-started/configuration.md
+  - User Guide:
+    - Managing Instances: user-guide/managing-instances.md
+    - API Reference: user-guide/api-reference.md
+    - Troubleshooting: user-guide/troubleshooting.md
+
+plugins:
+  - search
+  - git-revision-date-localized
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/lordmathis/llamactl