Merge pull request #118 from lordmathis/chore/remove-deprecated

chore: Remove deprecated code
Remove deprecated code
2025-12-23 09:34:23 +00:00 · 2025-12-22 21:53:19 +01:00 · 2025-12-22 21:49:37 +01:00 · 2025-12-22 21:23:58 +01:00 · 2025-12-22 21:20:42 +01:00 · 2025-12-22 20:55:46 +01:00
75 changed files with 3730 additions and 1423 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -45,15 +45,23 @@ jobs:
  build:
    name: Build Binaries
    needs: build-webui
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.runner }}
    strategy:
      matrix:
-        goos: [linux, windows, darwin]
-        goarch: [amd64, arm64]
-        exclude:
-          # Windows ARM64 support is limited
-          - goos: windows
+        include:
+          - goos: linux
+            goarch: amd64
+            runner: ubuntu-latest
+          - goos: linux
            goarch: arm64
+            runner: ubuntu-latest
+            cc: aarch64-linux-gnu-gcc
+          - goos: darwin
+            goarch: arm64
+            runner: macos-latest
+          - goos: windows
+            goarch: amd64
+            runner: windows-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -70,11 +78,19 @@ jobs:
          name: webui-dist
          path: webui/dist/

+      - name: Install cross-compilation tools (Linux ARM64 only)
+        if: matrix.cc != ''
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc-aarch64-linux-gnu
+
      - name: Build binary
        env:
          GOOS: ${{ matrix.goos }}
          GOARCH: ${{ matrix.goarch }}
-          CGO_ENABLED: 0
+          CGO_ENABLED: 1
+          CC: ${{ matrix.cc }}
+        shell: bash
        run: |
          # Set binary extension for Windows
          BINARY_NAME="llamactl"
@@ -91,8 +107,10 @@ jobs:
            ARCHIVE_OS="macos"
          fi
          ARCHIVE_NAME="llamactl-${{ github.ref_name }}-${ARCHIVE_OS}-${{ matrix.goarch }}"
+          
          if [ "${{ matrix.goos }}" = "windows" ]; then
-            zip "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
+            # Use 7z on Windows (pre-installed)
+            7z a "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
            echo "ASSET_PATH=${ARCHIVE_NAME}.zip" >> $GITHUB_ENV
          else
            tar -czf "${ARCHIVE_NAME}.tar.gz" "${BINARY_NAME}"
@@ -179,4 +197,4 @@ jobs:
        with:
          files: assets/checksums.txt
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -14,6 +14,7 @@
                "GO_ENV": "development",
                "LLAMACTL_CONFIG_PATH": "${workspaceFolder}/llamactl.dev.yaml"
            },
+            "console": "integratedTerminal",
        }
    ]
 }
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@

 **🚀 Easy Model Management**
 - **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality)
+- **Dynamic Multi-Model Instances**: llama.cpp router mode - serve multiple models from a single instance with on-demand loading
 - **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits
 - **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs

@@ -183,7 +184,6 @@ data_dir: ~/.local/share/llamactl  # Main data directory (database, instances, l

 instances:
  port_range: [8000, 9000]                        # Port range for instances
-  configs_dir: ~/.local/share/llamactl/instances  # Instance configs directory (platform dependent)
  logs_dir: ~/.local/share/llamactl/logs          # Logs directory (platform dependent)
  auto_create_dirs: true                          # Auto-create data/config/logs dirs if missing
  max_instances: -1                               # Max instances (-1 = unlimited)
@@ -195,6 +195,9 @@ instances:
  default_on_demand_start: true                   # Default on-demand start setting
  on_demand_start_timeout: 120                    # Default on-demand start timeout in seconds
  timeout_check_interval: 5                       # Idle instance timeout check in minutes
+  log_rotation_enabled: true                      # Enable log rotation (default: true)
+  log_rotation_max_size: 100                      # Max log file size in MB before rotation (default: 100)
+  log_rotation_compress: false                    # Compress rotated log files (default: false)

 database:
  path: ~/.local/share/llamactl/llamactl.db  # Database file path (platform dependent)
@@ -203,8 +206,7 @@ database:
  connection_max_lifetime: 5m                # Connection max lifetime

 auth:
-  require_inference_auth: true   # Require auth for inference endpoints
-  inference_keys: []             # Keys for inference endpoints
+  require_inference_auth: true   # Require auth for inference endpoints, API keys are created in web UI
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints
 ```
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -1,6 +1,7 @@
 package main

 import (
+	"context"
 	"fmt"
 	"llamactl/pkg/config"
 	"llamactl/pkg/database"
@@ -11,6 +12,7 @@ import (
 	"os"
 	"os/signal"
 	"syscall"
+	"time"
 )

 // version is set at build time using -ldflags "-X main.version=1.0.0"
@@ -55,11 +57,6 @@ func main() {
 			log.Printf("Error creating data directory %s: %v\nData persistence may not be available.", cfg.DataDir, err)
 		}

-		// Create instances directory
-		if err := os.MkdirAll(cfg.Instances.InstancesDir, 0755); err != nil {
-			log.Printf("Error creating instances directory %s: %v\nPersistence will not be available.", cfg.Instances.InstancesDir, err)
-		}
-
 		// Create logs directory
 		if err := os.MkdirAll(cfg.Instances.LogsDir, 0755); err != nil {
 			log.Printf("Error creating log directory %s: %v\nInstance logs will not be available.", cfg.Instances.LogsDir, err)
@@ -82,11 +79,6 @@ func main() {
 		log.Fatalf("Failed to run database migrations: %v", err)
 	}

-	// Migrate from JSON files if needed (one-time migration)
-	if err := migrateFromJSON(&cfg, db); err != nil {
-		log.Printf("Warning: Failed to migrate from JSON: %v", err)
-	}
-
 	// Initialize the instance manager with dependency injection
 	instanceManager := manager.New(&cfg, db)

@@ -116,14 +108,23 @@ func main() {
 	<-stop
 	fmt.Println("Shutting down server...")

-	if err := server.Close(); err != nil {
+	// Create shutdown context with timeout
+	shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer shutdownCancel()
+
+	// Shutdown HTTP server gracefully
+	if err := server.Shutdown(shutdownCtx); err != nil {
 		log.Printf("Error shutting down server: %v\n", err)
 	} else {
 		fmt.Println("Server shut down gracefully.")
 	}

-	// Wait for all instances to stop
+	// Stop all instances and cleanup
 	instanceManager.Shutdown()

+	if err := db.Close(); err != nil {
+		log.Printf("Error closing database: %v\n", err)
+	}
+
 	fmt.Println("Exiting llamactl.")
 }
--- a/cmd/server/migrate_json.go
+++ b/cmd/server/migrate_json.go
@@ -1,96 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"fmt"
-	"llamactl/pkg/config"
-	"llamactl/pkg/database"
-	"llamactl/pkg/instance"
-	"log"
-	"os"
-	"path/filepath"
-)
-
-// migrateFromJSON migrates instances from JSON files to SQLite database
-// This is a one-time migration that runs on first startup with existing JSON files.
-func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
-	instancesDir := cfg.Instances.InstancesDir
-	if instancesDir == "" {
-		return nil // No instances directory configured
-	}
-
-	// Check if instances directory exists
-	if _, err := os.Stat(instancesDir); os.IsNotExist(err) {
-		return nil // No instances directory, nothing to migrate
-	}
-
-	// Check if database is empty (no instances)
-	existing, err := db.LoadAll()
-	if err != nil {
-		return fmt.Errorf("failed to check existing instances: %w", err)
-	}
-
-	if len(existing) > 0 {
-		return nil // Database already has instances, skip migration
-	}
-
-	// Find all JSON files
-	files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
-	if err != nil {
-		return fmt.Errorf("failed to list instance files: %w", err)
-	}
-
-	if len(files) == 0 {
-		return nil // No JSON files to migrate
-	}
-
-	log.Printf("Migrating %d instances from JSON to SQLite...", len(files))
-
-	// Migrate each JSON file
-	var migrated int
-	for _, file := range files {
-		if err := migrateJSONFile(file, db); err != nil {
-			log.Printf("Failed to migrate %s: %v", file, err)
-			continue
-		}
-		migrated++
-	}
-
-	log.Printf("Successfully migrated %d/%d instances to SQLite", migrated, len(files))
-
-	// Archive old JSON files
-	if migrated > 0 {
-		archiveDir := filepath.Join(instancesDir, "json_archive")
-		if err := os.MkdirAll(archiveDir, 0755); err == nil {
-			for _, file := range files {
-				newPath := filepath.Join(archiveDir, filepath.Base(file))
-				if err := os.Rename(file, newPath); err != nil {
-					log.Printf("Failed to archive %s: %v", file, err)
-				}
-			}
-			log.Printf("Archived old JSON files to %s", archiveDir)
-		}
-	}
-
-	return nil
-}
-
-// migrateJSONFile migrates a single JSON file to the database
-func migrateJSONFile(filename string, db database.InstanceStore) error {
-	data, err := os.ReadFile(filename)
-	if err != nil {
-		return fmt.Errorf("failed to read file: %w", err)
-	}
-
-	var inst instance.Instance
-	if err := json.Unmarshal(data, &inst); err != nil {
-		return fmt.Errorf("failed to unmarshal instance: %w", err)
-	}
-
-	if err := db.Save(&inst); err != nil {
-		return fmt.Errorf("failed to save instance to database: %w", err)
-	}
-
-	log.Printf("Migrated instance %s from JSON to SQLite", inst.Name)
-	return nil
-}
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -74,7 +74,6 @@ database:

 auth:
  require_inference_auth: true   # Require auth for inference endpoints
-  inference_keys: []             # Keys for inference endpoints
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints

@@ -231,6 +230,9 @@ instances:
  default_on_demand_start: true # Default on-demand start setting
  on_demand_start_timeout: 120  # Default on-demand start timeout in seconds
  timeout_check_interval: 5     # Default instance timeout check interval in minutes
+  log_rotation_enabled: true    # Enable log rotation (default: true)
+  log_rotation_max_size: 100    # Max log file size in MB before rotation (default: 100)
+  log_rotation_compress: false  # Compress rotated log files (default: false)
 ```

 **Environment Variables:**
@@ -247,6 +249,9 @@ instances:
 - `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)  
 - `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
 - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
+- `LLAMACTL_LOG_ROTATION_ENABLED` - Enable log rotation (true/false)
+- `LLAMACTL_LOG_ROTATION_MAX_SIZE` - Max log file size in MB
+- `LLAMACTL_LOG_ROTATION_COMPRESS` - Compress rotated logs (true/false)

 ### Database Configuration

@@ -266,17 +271,33 @@ database:

 ### Authentication Configuration

+llamactl supports two types of authentication:
+
+- **Management API Keys**: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables.
+- **Inference API Keys**: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database.
+
 ```yaml
 auth:
  require_inference_auth: true           # Require API key for OpenAI endpoints (default: true)
-  inference_keys: []                     # List of valid inference API keys
  require_management_auth: true          # Require API key for management endpoints (default: true)
  management_keys: []                    # List of valid management API keys
 ```

+**Managing Inference API Keys:**
+
+Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys:
+
+1. Open the web UI and log in with a management API key
+2. Navigate to **Settings → API Keys**
+3. Click **Create API Key**
+4. Configure the key:
+   - **Name**: A descriptive name for the key
+   - **Expiration**: Optional expiration date
+   - **Permissions**: Grant access to all instances or specific instances only
+5. Copy the generated key - it won't be shown again
+
 **Environment Variables:**
 - `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
 - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
 - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys

--- a/docs/docs.go
+++ b/docs/docs.go
@@ -999,6 +999,156 @@ const docTemplate = `{
                }
            }
        },
+        "/api/v1/llama-cpp/{name}/models": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns a list of models available in the specified llama.cpp instance",
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "List models in a llama.cpp instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Models list response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/llama-cpp/{name}/models/{model}/load": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Loads the specified model in the given llama.cpp instance",
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Load a model in a llama.cpp instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Model Name",
+                        "name": "model",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Success message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/llama-cpp/{name}/models/{model}/unload": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Unloads the specified model in the given llama.cpp instance",
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Unload a model in a llama.cpp instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Model Name",
+                        "name": "model",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Success message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
        "/api/v1/nodes": {
            "get": {
                "security": [
@@ -1788,13 +1938,6 @@ const docTemplate = `{
        "config.AuthConfig": {
            "type": "object",
            "properties": {
-                "inference_keys": {
-                    "description": "List of keys for OpenAI compatible inference endpoints",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
                "management_keys": {
                    "description": "List of keys for management endpoints",
                    "type": "array",
@@ -1905,10 +2048,6 @@ const docTemplate = `{
                    "description": "Automatically create the data directory if it doesn't exist",
                    "type": "boolean"
                },
-                "configs_dir": {
-                    "description": "Instance config directory override (relative to data_dir if not absolute)",
-                    "type": "string"
-                },
                "default_auto_restart": {
                    "description": "Default auto-restart setting for new instances",
                    "type": "boolean"
@@ -1929,6 +2068,21 @@ const docTemplate = `{
                    "description": "Enable LRU eviction for instance logs",
                    "type": "boolean"
                },
+                "logRotationCompress": {
+                    "description": "Whether to compress rotated log files",
+                    "type": "boolean",
+                    "default": false
+                },
+                "logRotationEnabled": {
+                    "description": "Log rotation enabled",
+                    "type": "boolean",
+                    "default": true
+                },
+                "logRotationMaxSize": {
+                    "description": "Maximum log file size in MB before rotation",
+                    "type": "integer",
+                    "default": 100
+                },
                "logs_dir": {
                    "description": "Logs directory override (relative to data_dir if not absolute)",
                    "type": "string"
@@ -2063,20 +2217,19 @@ const docTemplate = `{
        "server.CreateKeyRequest": {
            "type": "object",
            "properties": {
-                "expiresAt": {
-                    "type": "integer",
-                    "format": "int64"
+                "expires_at": {
+                    "type": "integer"
                },
-                "instancePermissions": {
+                "instance_ids": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/server.InstancePermission"
+                        "type": "integer"
                    }
                },
                "name": {
                    "type": "string"
                },
-                "permissionMode": {
+                "permission_mode": {
                    "$ref": "#/definitions/auth.PermissionMode"
                }
            }
@@ -2087,9 +2240,6 @@ const docTemplate = `{
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
@@ -2116,29 +2266,9 @@ const docTemplate = `{
                }
            }
        },
-        "server.InstancePermission": {
-            "type": "object",
-            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
-                "instance_id": {
-                    "type": "integer"
-                }
-            }
-        },
        "server.KeyPermissionResponse": {
            "type": "object",
            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
                "instance_id": {
                    "type": "integer"
                },
@@ -2153,9 +2283,6 @@ const docTemplate = `{
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
--- a/docs/managing-instances.md
+++ b/docs/managing-instances.md
@@ -222,6 +222,100 @@ curl -X DELETE http://localhost:8080/api/v1/instances/{name} \
  -H "Authorization: Bearer <token>"
 ```

+## Multi-Model llama.cpp Instances
+
+!!! info "llama.cpp Router Mode"
+    llama.cpp instances support [**router mode**](https://huggingface.co/blog/ggml-org/model-management-in-llamacpp), allowing a single instance to serve multiple models dynamically. Models are loaded on-demand from the llama.cpp cache without restarting the instance.
+
+### Creating a Multi-Model Instance
+
+**Via Web UI**
+
+1. Click **"Create Instance"**
+2. Select **Backend Type**: "Llama Server"
+3. Leave **Backend Options** empty `{}` or omit the model field
+4. Create the instance
+
+**Via API**
+
+```bash
+# Create instance without specifying a model (router mode)
+curl -X POST http://localhost:8080/api/v1/instances/my-router \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {},
+    "nodes": ["main"]
+  }'
+```
+
+### Managing Models
+
+**Via Web UI**
+
+1. Start the router mode instance
+2. Instance card displays a badge showing loaded/total models (e.g., "2/5 models")
+3. Click the **"Models"** button on the instance card
+4. Models dialog opens showing:
+    - All available models from llama.cpp instance
+    - Status indicator (loaded, loading, or unloaded)
+    - Load/Unload buttons for each model
+5. Click **"Load"** to load a model into memory
+6. Click **"Unload"** to free up memory
+
+**Via API**
+
+```bash
+# List available models
+curl http://localhost:8080/api/v1/llama-cpp/my-router/models \
+  -H "Authorization: Bearer <token>"
+
+# Load a model
+curl -X POST http://localhost:8080/api/v1/llama-cpp/my-router/models/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf/load \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
+  -d '{"model": "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"}'
+
+# Unload a model
+curl -X POST http://localhost:8080/api/v1/llama-cpp/my-router/models/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf/unload \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
+  -d '{"model": "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"}'
+```
+
+### Using Multi-Model Instances
+
+When making inference requests to a multi-model instance, specify the model using the format `instance_name/model_name`:
+
+```bash
+# OpenAI-compatible chat completion with specific model
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <inference-key>" \
+  -d '{
+    "model": "my-router/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf",
+    "messages": [
+      {"role": "user", "content": "Hello!"}
+    ]
+  }'
+
+# List all available models (includes multi-model instances)
+curl http://localhost:8080/v1/models \
+  -H "Authorization: Bearer <inference-key>"
+```
+
+The response from `/v1/models` will include each model from multi-model instances as separate entries in the format `instance_name/model_name`.
+
+### Model Discovery
+
+Models are automatically discovered from the llama.cpp cache directory. The default cache locations are:
+
+- **Linux/macOS**: `~/.cache/llama.cpp/`
+- **Windows**: `%LOCALAPPDATA%\llama.cpp\`
+
+Place your GGUF model files in the cache directory, and they will appear in the models list when you start a router mode instance.
+
 ## Instance Proxy

 Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -17,10 +17,10 @@ Before you start, let's clarify a few key terms:

 Llamactl uses two types of API keys:

- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
+- **Management API Key**: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal.
+- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). These are created and managed via the web UI.

-By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
+By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the [Configuration](configuration.md) guide.

 ## Start Llamactl

@@ -38,24 +38,17 @@ llamactl

    sk-management-...

-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-⚠️  INFERENCE AUTHENTICATION REQUIRED
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-🔑  Generated Inference API Key:
-
-    sk-inference-...
-
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ⚠️  IMPORTANT
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-• These keys are auto-generated and will change on restart
-• For production, add explicit keys to your configuration
-• Copy these keys before they disappear from the terminal
+• This key is auto-generated and will change on restart
+• For production, add explicit management_keys to your configuration
+• Copy this key before it disappears from the terminal
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Llamactl server listening on 0.0.0.0:8080
 ```

-Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
+Copy the **Management API Key** from the terminal - you'll need it to access the web UI.

 By default, Llamactl will start on `http://localhost:8080`.

@@ -82,7 +75,7 @@ You should see the Llamactl web interface.
     - **Additional Options**: Backend-specific parameters

    !!! tip "Auto-Assignment"
-        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values.

    !!! note "Remote Node Deployment"
        If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
@@ -98,6 +91,24 @@ Once created, you can:
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed

+## Create an Inference API Key
+
+To make inference requests to your instances, you'll need an inference API key:
+
+1. In the web UI, click the **Settings** icon (gear icon in the top-right)
+2. Navigate to the **API Keys** tab
+3. Click **Create API Key**
+4. Configure your key:
+   - **Name**: Give it a descriptive name (e.g., "Production Key", "Development Key")
+   - **Expiration**: Optionally set an expiration date for the key
+   - **Permissions**: Choose whether the key can access all instances or only specific ones
+5. Click **Create**
+6. **Copy the generated key** - it will only be shown once!
+
+The key will look like: `llamactl-...`
+
+You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances).
+
 ## Example Configurations

 Here are basic example configurations for each backend:
@@ -246,7 +257,7 @@ print(response.choices[0].message.content)
 ```

 !!! note "API Key"
-    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
+    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key you created via the web UI (Settings → API Keys).

 ### List Available Models

--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -992,6 +992,156 @@
                }
            }
        },
+        "/api/v1/llama-cpp/{name}/models": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns a list of models available in the specified llama.cpp instance",
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "List models in a llama.cpp instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Models list response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid instance",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/llama-cpp/{name}/models/{model}/load": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Loads the specified model in the given llama.cpp instance",
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Load a model in a llama.cpp instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Model Name",
+                        "name": "model",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Success message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/api/v1/llama-cpp/{name}/models/{model}/unload": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Unloads the specified model in the given llama.cpp instance",
+                "tags": [
+                    "Llama.cpp"
+                ],
+                "summary": "Unload a model in a llama.cpp instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Model Name",
+                        "name": "model",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Success message",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
        "/api/v1/nodes": {
            "get": {
                "security": [
@@ -1781,13 +1931,6 @@
        "config.AuthConfig": {
            "type": "object",
            "properties": {
-                "inference_keys": {
-                    "description": "List of keys for OpenAI compatible inference endpoints",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
                "management_keys": {
                    "description": "List of keys for management endpoints",
                    "type": "array",
@@ -1898,10 +2041,6 @@
                    "description": "Automatically create the data directory if it doesn't exist",
                    "type": "boolean"
                },
-                "configs_dir": {
-                    "description": "Instance config directory override (relative to data_dir if not absolute)",
-                    "type": "string"
-                },
                "default_auto_restart": {
                    "description": "Default auto-restart setting for new instances",
                    "type": "boolean"
@@ -1922,6 +2061,21 @@
                    "description": "Enable LRU eviction for instance logs",
                    "type": "boolean"
                },
+                "logRotationCompress": {
+                    "description": "Whether to compress rotated log files",
+                    "type": "boolean",
+                    "default": false
+                },
+                "logRotationEnabled": {
+                    "description": "Log rotation enabled",
+                    "type": "boolean",
+                    "default": true
+                },
+                "logRotationMaxSize": {
+                    "description": "Maximum log file size in MB before rotation",
+                    "type": "integer",
+                    "default": 100
+                },
                "logs_dir": {
                    "description": "Logs directory override (relative to data_dir if not absolute)",
                    "type": "string"
@@ -2056,20 +2210,19 @@
        "server.CreateKeyRequest": {
            "type": "object",
            "properties": {
-                "expiresAt": {
-                    "type": "integer",
-                    "format": "int64"
+                "expires_at": {
+                    "type": "integer"
                },
-                "instancePermissions": {
+                "instance_ids": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/server.InstancePermission"
+                        "type": "integer"
                    }
                },
                "name": {
                    "type": "string"
                },
-                "permissionMode": {
+                "permission_mode": {
                    "$ref": "#/definitions/auth.PermissionMode"
                }
            }
@@ -2080,9 +2233,6 @@
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
@@ -2109,29 +2259,9 @@
                }
            }
        },
-        "server.InstancePermission": {
-            "type": "object",
-            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
-                "instance_id": {
-                    "type": "integer"
-                }
-            }
-        },
        "server.KeyPermissionResponse": {
            "type": "object",
            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
                "instance_id": {
                    "type": "integer"
                },
@@ -2146,9 +2276,6 @@
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -39,11 +39,6 @@ definitions:
    type: object
  config.AuthConfig:
    properties:
-      inference_keys:
-        description: List of keys for OpenAI compatible inference endpoints
-        items:
-          type: string
-        type: array
      management_keys:
        description: List of keys for management endpoints
        items:
@@ -118,10 +113,6 @@ definitions:
      auto_create_dirs:
        description: Automatically create the data directory if it doesn't exist
        type: boolean
-      configs_dir:
-        description: Instance config directory override (relative to data_dir if not
-          absolute)
-        type: string
      default_auto_restart:
        description: Default auto-restart setting for new instances
        type: boolean
@@ -137,6 +128,18 @@ definitions:
      enable_lru_eviction:
        description: Enable LRU eviction for instance logs
        type: boolean
+      logRotationCompress:
+        default: false
+        description: Whether to compress rotated log files
+        type: boolean
+      logRotationEnabled:
+        default: true
+        description: Log rotation enabled
+        type: boolean
+      logRotationMaxSize:
+        default: 100
+        description: Maximum log file size in MB before rotation
+        type: integer
      logs_dir:
        description: Logs directory override (relative to data_dir if not absolute)
        type: string
@@ -232,24 +235,21 @@ definitions:
    type: object
  server.CreateKeyRequest:
    properties:
-      expiresAt:
-        format: int64
+      expires_at:
        type: integer
-      instancePermissions:
+      instance_ids:
        items:
-          $ref: '#/definitions/server.InstancePermission'
+          type: integer
        type: array
      name:
        type: string
-      permissionMode:
+      permission_mode:
        $ref: '#/definitions/auth.PermissionMode'
    type: object
  server.CreateKeyResponse:
    properties:
      created_at:
        type: integer
-      enabled:
-        type: boolean
      expires_at:
        type: integer
      id:
@@ -267,21 +267,8 @@ definitions:
      user_id:
        type: string
    type: object
-  server.InstancePermission:
-    properties:
-      can_infer:
-        type: boolean
-      can_view_logs:
-        type: boolean
-      instance_id:
-        type: integer
-    type: object
  server.KeyPermissionResponse:
    properties:
-      can_infer:
-        type: boolean
-      can_view_logs:
-        type: boolean
      instance_id:
        type: integer
      instance_name:
@@ -291,8 +278,6 @@ definitions:
    properties:
      created_at:
        type: integer
-      enabled:
-        type: boolean
      expires_at:
        type: integer
      id:
@@ -973,6 +958,102 @@ paths:
      summary: Stop a running instance
      tags:
      - Instances
+  /api/v1/llama-cpp/{name}/models:
+    get:
+      description: Returns a list of models available in the specified llama.cpp instance
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      responses:
+        "200":
+          description: Models list response
+          schema:
+            additionalProperties: true
+            type: object
+        "400":
+          description: Invalid instance
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: List models in a llama.cpp instance
+      tags:
+      - Llama.cpp
+  /api/v1/llama-cpp/{name}/models/{model}/load:
+    post:
+      description: Loads the specified model in the given llama.cpp instance
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      - description: Model Name
+        in: path
+        name: model
+        required: true
+        type: string
+      responses:
+        "200":
+          description: Success message
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "400":
+          description: Invalid request
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Load a model in a llama.cpp instance
+      tags:
+      - Llama.cpp
+  /api/v1/llama-cpp/{name}/models/{model}/unload:
+    post:
+      description: Unloads the specified model in the given llama.cpp instance
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      - description: Model Name
+        in: path
+        name: model
+        required: true
+        type: string
+      responses:
+        "200":
+          description: Success message
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "400":
+          description: Invalid request
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      security:
+      - ApiKeyAuth: []
+      summary: Unload a model in a llama.cpp instance
+      tags:
+      - Llama.cpp
  /api/v1/nodes:
    get:
      description: Returns a map of all nodes configured in the server (node name
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -115,15 +115,15 @@ vllm serve microsoft/DialoGPT-medium --port 8081
     require_inference_auth: false
   ```

-2. **Configure API keys:**
+2. **Configure management API keys:**
   ```yaml
   auth:
     management_keys:
       - "your-management-key"
-     inference_keys:
-       - "your-inference-key"
   ```

+   For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key.
+
 3. **Use correct Authorization header:**
   ```bash
   curl -H "Authorization: Bearer your-api-key" \
--- a/go.mod
+++ b/go.mod
@@ -3,13 +3,14 @@ module llamactl
 go 1.24.5

 require (
+	github.com/DeRuina/timberjack v1.3.9
 	github.com/go-chi/chi/v5 v5.2.2
 	github.com/go-chi/cors v1.2.2
 	github.com/golang-migrate/migrate/v4 v4.19.1
 	github.com/mattn/go-sqlite3 v1.14.24
 	github.com/swaggo/http-swagger v1.3.4
 	github.com/swaggo/swag v1.16.5
-	golang.org/x/crypto v0.45.0
+	golang.org/x/crypto v0.46.0
 	gopkg.in/yaml.v3 v3.0.1
 )

@@ -20,11 +21,12 @@ require (
 	github.com/go-openapi/spec v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.1 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/klauspost/compress v1.17.11 // indirect
 	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/swaggo/files v1.0.1 // indirect
 	golang.org/x/mod v0.29.0 // indirect
 	golang.org/x/net v0.47.0 // indirect
 	golang.org/x/sync v0.18.0 // indirect
-	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/sys v0.39.0 // indirect
 	golang.org/x/tools v0.38.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,11 @@
+github.com/DeRuina/timberjack v1.3.9 h1:6UXZ1I7ExPGTX/1UNYawR58LlOJUHKBPiYC7WQ91eBo=
+github.com/DeRuina/timberjack v1.3.9/go.mod h1:RLoeQrwrCGIEF8gO5nV5b/gMD0QIy7bzQhBUgpp1EqE=
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
+github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
 github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618=
 github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
 github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE=
@@ -20,6 +24,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -45,8 +51,8 @@ github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
-golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
+golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
 golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
@@ -66,8 +72,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
-golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
+golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
--- a/pkg/auth/key.go
+++ b/pkg/auth/key.go
@@ -20,17 +20,14 @@ type APIKey struct {
 	UserID         string
 	PermissionMode PermissionMode
 	ExpiresAt      *int64
-	Enabled        bool
 	CreatedAt      int64
 	UpdatedAt      int64
 	LastUsedAt     *int64
 }

 type KeyPermission struct {
-	KeyID       int
-	InstanceID  int
-	CanInfer    bool
-	CanViewLogs bool
+	KeyID      int
+	InstanceID int
 }

 // GenerateKey generates a cryptographically secure API key with the given prefix
--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -14,6 +14,7 @@ const (
 	BackendTypeLlamaCpp BackendType = "llama_cpp"
 	BackendTypeMlxLm    BackendType = "mlx_lm"
 	BackendTypeVllm     BackendType = "vllm"
+	BackendTypeUnknown  BackendType = "unknown"
 )

 type backend interface {
@@ -55,13 +56,15 @@ func (o *Options) UnmarshalJSON(data []byte) error {
 	}

 	// Create backend from constructor map
-	if o.BackendOptions != nil {
-		constructor, exists := backendConstructors[o.BackendType]
-		if !exists {
-			return fmt.Errorf("unsupported backend type: %s", o.BackendType)
-		}
+	constructor, exists := backendConstructors[o.BackendType]
+	if !exists {
+		return fmt.Errorf("unsupported backend type: %s", o.BackendType)
+	}

-		backend := constructor()
+	backend := constructor()
+
+	// If backend_options is provided, unmarshal into the backend
+	if o.BackendOptions != nil {
 		optionsData, err := json.Marshal(o.BackendOptions)
 		if err != nil {
 			return fmt.Errorf("failed to marshal backend options: %w", err)
@@ -70,10 +73,11 @@ func (o *Options) UnmarshalJSON(data []byte) error {
 		if err := json.Unmarshal(optionsData, backend); err != nil {
 			return fmt.Errorf("failed to unmarshal backend options: %w", err)
 		}
-
-		// Store in the appropriate typed field for backward compatibility
-		o.setBackendOptions(backend)
 	}
+	// If backend_options is nil or empty, backend remains as empty struct (for router mode)
+
+	// Store in the appropriate typed field
+	o.setBackendOptions(backend)

 	return nil
 }
--- a/pkg/backends/llama.go
+++ b/pkg/backends/llama.go
@@ -327,20 +327,30 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 }

 func (o *LlamaServerOptions) GetPort() int {
+	if o == nil {
+		return 0
+	}
 	return o.Port
 }

 func (o *LlamaServerOptions) SetPort(port int) {
+	if o == nil {
+		return
+	}
 	o.Port = port
 }

 func (o *LlamaServerOptions) GetHost() string {
+	if o == nil {
+		return "localhost"
+	}
 	return o.Host
 }

 func (o *LlamaServerOptions) Validate() error {
+	// Allow nil options for router mode where llama.cpp manages models dynamically
 	if o == nil {
-		return validation.ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
+		return nil
 	}

 	// Use reflection to check all string fields for injection patterns
@@ -370,6 +380,9 @@ func (o *LlamaServerOptions) Validate() error {

 // BuildCommandArgs converts InstanceOptions to command line arguments
 func (o *LlamaServerOptions) BuildCommandArgs() []string {
+	if o == nil {
+		return []string{}
+	}
 	// Llama uses multiple flags for arrays by default (not comma-separated)
 	// Use package-level llamaMultiValuedFlags variable
 	args := BuildCommandArgs(o, llamaMultiValuedFlags)
@@ -381,6 +394,9 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
 }

 func (o *LlamaServerOptions) BuildDockerArgs() []string {
+	if o == nil {
+		return []string{}
+	}
 	// For llama, Docker args are the same as normal args
 	return o.BuildCommandArgs()
 }
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -6,234 +6,18 @@ import (
 	"log"
 	"os"
 	"path/filepath"
-	"runtime"
-	"strconv"
-	"strings"
-	"time"

 	"gopkg.in/yaml.v3"
 )

-// BackendSettings contains structured backend configuration
-type BackendSettings struct {
-	Command         string            `yaml:"command" json:"command"`
-	Args            []string          `yaml:"args" json:"args"`
-	Environment     map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
-	Docker          *DockerSettings   `yaml:"docker,omitempty" json:"docker,omitempty"`
-	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
-}
-
-// DockerSettings contains Docker-specific configuration
-type DockerSettings struct {
-	Enabled     bool              `yaml:"enabled" json:"enabled"`
-	Image       string            `yaml:"image" json:"image"`
-	Args        []string          `yaml:"args" json:"args"`
-	Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
-}
-
-// BackendConfig contains backend executable configurations
-type BackendConfig struct {
-	LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
-	VLLM     BackendSettings `yaml:"vllm" json:"vllm"`
-	MLX      BackendSettings `yaml:"mlx" json:"mlx"`
-}
-
-// AppConfig represents the configuration for llamactl
-type AppConfig struct {
-	Server    ServerConfig          `yaml:"server" json:"server"`
-	Backends  BackendConfig         `yaml:"backends" json:"backends"`
-	Instances InstancesConfig       `yaml:"instances" json:"instances"`
-	Database  DatabaseConfig        `yaml:"database" json:"database"`
-	Auth      AuthConfig            `yaml:"auth" json:"auth"`
-	LocalNode string                `yaml:"local_node,omitempty" json:"local_node,omitempty"`
-	Nodes     map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
-
-	// Directory where all llamactl data will be stored (database, instances, logs, etc.)
-	DataDir string `yaml:"data_dir" json:"data_dir"`
-
-	Version    string `yaml:"-" json:"version"`
-	CommitHash string `yaml:"-" json:"commit_hash"`
-	BuildTime  string `yaml:"-" json:"build_time"`
-}
-
-// ServerConfig contains HTTP server configuration
-type ServerConfig struct {
-	// Server host to bind to
-	Host string `yaml:"host" json:"host"`
-
-	// Server port to bind to
-	Port int `yaml:"port" json:"port"`
-
-	// Allowed origins for CORS (e.g., "http://localhost:3000")
-	AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
-
-	// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
-	AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
-
-	// Enable Swagger UI for API documentation
-	EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
-
-	// Response headers to send with responses
-	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
-}
-
-// DatabaseConfig contains database configuration settings
-type DatabaseConfig struct {
-	// Database file path (relative to the top-level data_dir or absolute)
-	Path string `yaml:"path" json:"path"`
-
-	// Connection settings
-	MaxOpenConnections int           `yaml:"max_open_connections" json:"max_open_connections"`
-	MaxIdleConnections int           `yaml:"max_idle_connections" json:"max_idle_connections"`
-	ConnMaxLifetime    time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
-}
-
-// InstancesConfig contains instance management configuration
-type InstancesConfig struct {
-	// Port range for instances (e.g., 8000,9000)
-	PortRange [2]int `yaml:"port_range" json:"port_range"`
-
-
-	// Instance config directory override (relative to data_dir if not absolute)
-	InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
-
-	// Logs directory override (relative to data_dir if not absolute)
-	LogsDir string `yaml:"logs_dir" json:"logs_dir"`
-
-	// Automatically create the data directory if it doesn't exist
-	AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
-
-	// Maximum number of instances that can be created
-	MaxInstances int `yaml:"max_instances" json:"max_instances"`
-
-	// Maximum number of instances that can be running at the same time
-	MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
-
-	// Enable LRU eviction for instance logs
-	EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
-
-	// Default auto-restart setting for new instances
-	DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
-
-	// Default max restarts for new instances
-	DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
-
-	// Default restart delay for new instances (in seconds)
-	DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
-
-	// Default on-demand start setting for new instances
-	DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
-
-	// How long to wait for an instance to start on demand (in seconds)
-	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
-
-	// Interval for checking instance timeouts (in minutes)
-	TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
-}
-
-// AuthConfig contains authentication settings
-type AuthConfig struct {
-
-	// Require authentication for OpenAI compatible inference endpoints
-	RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
-
-	// List of keys for OpenAI compatible inference endpoints
-	InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
-
-	// Require authentication for management endpoints
-	RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
-
-	// List of keys for management endpoints
-	ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
-}
-
-type NodeConfig struct {
-	Address string `yaml:"address" json:"address"`
-	APIKey  string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
-}
-
 // LoadConfig loads configuration with the following precedence:
 // 1. Hardcoded defaults
 // 2. Config file
 // 3. Environment variables
 func LoadConfig(configPath string) (AppConfig, error) {
 	// 1. Start with defaults
-	defaultDataDir := getDefaultDataDirectory()
-
-	cfg := AppConfig{
-		Server: ServerConfig{
-			Host:           "0.0.0.0",
-			Port:           8080,
-			AllowedOrigins: []string{"*"}, // Default to allow all origins
-			AllowedHeaders: []string{"*"}, // Default to allow all headers
-			EnableSwagger:  false,
-		},
-		LocalNode: "main",
-		Nodes:     map[string]NodeConfig{},
-		DataDir:   defaultDataDir,
-		Backends: BackendConfig{
-			LlamaCpp: BackendSettings{
-				Command:     "llama-server",
-				Args:        []string{},
-				Environment: map[string]string{},
-				Docker: &DockerSettings{
-					Enabled: false,
-					Image:   "ghcr.io/ggml-org/llama.cpp:server",
-					Args: []string{
-						"run", "--rm", "--network", "host", "--gpus", "all",
-						"-v", filepath.Join(defaultDataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
-					Environment: map[string]string{},
-				},
-			},
-			VLLM: BackendSettings{
-				Command: "vllm",
-				Args:    []string{"serve"},
-				Docker: &DockerSettings{
-					Enabled: false,
-					Image:   "vllm/vllm-openai:latest",
-					Args: []string{
-						"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
-						"-v", filepath.Join(defaultDataDir, "huggingface") + ":/root/.cache/huggingface",
-					},
-					Environment: map[string]string{},
-				},
-			},
-			MLX: BackendSettings{
-				Command: "mlx_lm.server",
-				Args:    []string{},
-				// No Docker section for MLX - not supported
-			},
-		},
-		Instances: InstancesConfig{
-			PortRange: [2]int{8000, 9000},
-			// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
-			// should be relative path to DataDir if not explicitly set.
-			InstancesDir:         "",
-			LogsDir:              "",
-			AutoCreateDirs:       true,
-			MaxInstances:         -1, // -1 means unlimited
-			MaxRunningInstances:  -1, // -1 means unlimited
-			EnableLRUEviction:    true,
-			DefaultAutoRestart:   true,
-			DefaultMaxRestarts:   3,
-			DefaultRestartDelay:  5,
-			DefaultOnDemandStart: true,
-			OnDemandStartTimeout: 120, // 2 minutes
-			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
-		},
-		Database: DatabaseConfig{
-			Path:               "", // Will be set to data_dir/llamactl.db if empty
-			MaxOpenConnections: 25,
-			MaxIdleConnections: 5,
-			ConnMaxLifetime:    5 * time.Minute,
-		},
-		Auth: AuthConfig{
-			RequireInferenceAuth:  true,
-			InferenceKeys:         []string{},
-			RequireManagementAuth: true,
-			ManagementKeys:        []string{},
-		},
-	}
+	defaultDataDir := getDefaultDataDir()
+	cfg := getDefaultConfig(defaultDataDir)

 	// 2. Load from config file
 	if err := loadConfigFile(&cfg, configPath); err != nil {
@@ -249,9 +33,6 @@ func LoadConfig(configPath string) (AppConfig, error) {
 	loadEnvVars(&cfg)

 	// Set default directories if not specified
-	if cfg.Instances.InstancesDir == "" {
-		cfg.Instances.InstancesDir = filepath.Join(cfg.DataDir, "instances")
-	}
 	if cfg.Instances.LogsDir == "" {
 		cfg.Instances.LogsDir = filepath.Join(cfg.DataDir, "logs")
 	}
@@ -292,372 +73,6 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
 	return nil
 }

-// loadEnvVars overrides config with environment variables
-func loadEnvVars(cfg *AppConfig) {
-	// Server config
-	if host := os.Getenv("LLAMACTL_HOST"); host != "" {
-		cfg.Server.Host = host
-	}
-	if port := os.Getenv("LLAMACTL_PORT"); port != "" {
-		if p, err := strconv.Atoi(port); err == nil {
-			cfg.Server.Port = p
-		}
-	}
-	if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
-		cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
-	}
-	if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
-		if b, err := strconv.ParseBool(enableSwagger); err == nil {
-			cfg.Server.EnableSwagger = b
-		}
-	}
-
-	// Data config
-	if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
-		cfg.DataDir = dataDir
-	}
-	if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
-		cfg.Instances.InstancesDir = instancesDir
-	}
-	if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
-		cfg.Instances.LogsDir = logsDir
-	}
-	if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
-		if b, err := strconv.ParseBool(autoCreate); err == nil {
-			cfg.Instances.AutoCreateDirs = b
-		}
-	}
-
-	// Instance config
-	if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
-		if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
-			cfg.Instances.PortRange = ports
-		}
-	}
-	if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
-		if m, err := strconv.Atoi(maxInstances); err == nil {
-			cfg.Instances.MaxInstances = m
-		}
-	}
-	if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
-		if m, err := strconv.Atoi(maxRunning); err == nil {
-			cfg.Instances.MaxRunningInstances = m
-		}
-	}
-	if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
-		if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
-			cfg.Instances.EnableLRUEviction = b
-		}
-	}
-	// Backend config
-	// LlamaCpp backend
-	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
-		cfg.Backends.LlamaCpp.Command = llamaCmd
-	}
-	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
-		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
-		if cfg.Backends.LlamaCpp.Environment == nil {
-			cfg.Backends.LlamaCpp.Environment = make(map[string]string)
-		}
-		parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
-	}
-	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
-		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
-			if cfg.Backends.LlamaCpp.Docker == nil {
-				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-			}
-			cfg.Backends.LlamaCpp.Docker.Enabled = b
-		}
-	}
-	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
-		if cfg.Backends.LlamaCpp.Docker == nil {
-			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-		}
-		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
-	}
-	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
-		if cfg.Backends.LlamaCpp.Docker == nil {
-			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-		}
-		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
-	}
-	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
-		if cfg.Backends.LlamaCpp.Docker == nil {
-			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-		}
-		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
-			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
-		}
-		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
-		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
-			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
-		}
-		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
-	}
-
-	// vLLM backend
-	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
-		cfg.Backends.VLLM.Command = vllmCmd
-	}
-	if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
-		cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
-	}
-	if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
-		if cfg.Backends.VLLM.Environment == nil {
-			cfg.Backends.VLLM.Environment = make(map[string]string)
-		}
-		parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
-	}
-	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
-		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
-			if cfg.Backends.VLLM.Docker == nil {
-				cfg.Backends.VLLM.Docker = &DockerSettings{}
-			}
-			cfg.Backends.VLLM.Docker.Enabled = b
-		}
-	}
-	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
-		if cfg.Backends.VLLM.Docker == nil {
-			cfg.Backends.VLLM.Docker = &DockerSettings{}
-		}
-		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
-	}
-	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
-		if cfg.Backends.VLLM.Docker == nil {
-			cfg.Backends.VLLM.Docker = &DockerSettings{}
-		}
-		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
-	}
-	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
-		if cfg.Backends.VLLM.Docker == nil {
-			cfg.Backends.VLLM.Docker = &DockerSettings{}
-		}
-		if cfg.Backends.VLLM.Docker.Environment == nil {
-			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
-		}
-		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
-		if cfg.Backends.VLLM.ResponseHeaders == nil {
-			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
-		}
-		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
-	}
-
-	// MLX backend
-	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
-		cfg.Backends.MLX.Command = mlxCmd
-	}
-	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
-		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
-	}
-	if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
-		if cfg.Backends.MLX.Environment == nil {
-			cfg.Backends.MLX.Environment = make(map[string]string)
-		}
-		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
-		if cfg.Backends.MLX.ResponseHeaders == nil {
-			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
-		}
-		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
-	}
-
-	// Instance defaults
-	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
-		if b, err := strconv.ParseBool(autoRestart); err == nil {
-			cfg.Instances.DefaultAutoRestart = b
-		}
-	}
-	if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
-		if m, err := strconv.Atoi(maxRestarts); err == nil {
-			cfg.Instances.DefaultMaxRestarts = m
-		}
-	}
-	if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
-		if seconds, err := strconv.Atoi(restartDelay); err == nil {
-			cfg.Instances.DefaultRestartDelay = seconds
-		}
-	}
-	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
-		if b, err := strconv.ParseBool(onDemandStart); err == nil {
-			cfg.Instances.DefaultOnDemandStart = b
-		}
-	}
-	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
-		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
-			cfg.Instances.OnDemandStartTimeout = seconds
-		}
-	}
-	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
-		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
-			cfg.Instances.TimeoutCheckInterval = minutes
-		}
-	}
-	// Auth config
-	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
-		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
-			cfg.Auth.RequireInferenceAuth = b
-		}
-	}
-	if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
-		cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
-	}
-	if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
-		if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
-			cfg.Auth.RequireManagementAuth = b
-		}
-	}
-	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
-		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
-	}
-
-	// Local node config
-	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
-		cfg.LocalNode = localNode
-	}
-
-	// Database config
-	if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
-		cfg.Database.Path = dbPath
-	}
-	if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
-		if m, err := strconv.Atoi(maxOpenConns); err == nil {
-			cfg.Database.MaxOpenConnections = m
-		}
-	}
-	if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
-		if m, err := strconv.Atoi(maxIdleConns); err == nil {
-			cfg.Database.MaxIdleConnections = m
-		}
-	}
-	if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
-		if d, err := time.ParseDuration(connMaxLifetime); err == nil {
-			cfg.Database.ConnMaxLifetime = d
-		}
-	}
-}
-
-// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
-func ParsePortRange(s string) [2]int {
-	var parts []string
-
-	// Try both separators
-	if strings.Contains(s, "-") {
-		parts = strings.Split(s, "-")
-	} else if strings.Contains(s, ",") {
-		parts = strings.Split(s, ",")
-	}
-
-	// Parse the two parts
-	if len(parts) == 2 {
-		start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
-		end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
-		if err1 == nil && err2 == nil {
-			return [2]int{start, end}
-		}
-	}
-
-	return [2]int{0, 0} // Invalid format
-}
-
-// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
-// and populates the provided environment map
-func parseEnvVars(envString string, envMap map[string]string) {
-	if envString == "" {
-		return
-	}
-	for _, envPair := range strings.Split(envString, ",") {
-		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
-			envMap[parts[0]] = parts[1]
-		}
-	}
-}
-
-// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
-// and populates the provided environment map
-func parseHeaders(envString string, envMap map[string]string) {
-	if envString == "" {
-		return
-	}
-	for _, envPair := range strings.Split(envString, ";") {
-		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
-			envMap[parts[0]] = parts[1]
-		}
-	}
-}
-
-// getDefaultDataDirectory returns platform-specific default data directory
-func getDefaultDataDirectory() string {
-	switch runtime.GOOS {
-	case "windows":
-		// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
-		if programData := os.Getenv("PROGRAMDATA"); programData != "" {
-			return filepath.Join(programData, "llamactl")
-		}
-		if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
-			return filepath.Join(localAppData, "llamactl")
-		}
-		return "C:\\ProgramData\\llamactl" // Final fallback
-
-	case "darwin":
-		// For macOS, use user's Application Support directory
-		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
-			return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
-		}
-		return "/usr/local/var/llamactl" // Fallback
-
-	default:
-		// Linux and other Unix-like systems
-		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
-			return filepath.Join(homeDir, ".local", "share", "llamactl")
-		}
-		return "/var/lib/llamactl" // Final fallback
-	}
-}
-
-// getDefaultConfigLocations returns platform-specific config file locations
-func getDefaultConfigLocations() []string {
-	var locations []string
-	// Use ./llamactl.yaml and ./config.yaml as the default config file
-	locations = append(locations, "llamactl.yaml")
-	locations = append(locations, "config.yaml")
-
-	homeDir, _ := os.UserHomeDir()
-
-	switch runtime.GOOS {
-	case "windows":
-		// Windows: Use APPDATA if available, else user home, fallback to ProgramData
-		if appData := os.Getenv("APPDATA"); appData != "" {
-			locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
-		} else if homeDir != "" {
-			locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
-		}
-		locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
-
-	case "darwin":
-		// macOS: Use Application Support in user home, fallback to /Library/Application Support
-		if homeDir != "" {
-			locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
-		}
-		locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
-
-	default:
-		// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
-		if homeDir != "" {
-			locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
-		}
-		locations = append(locations, "/etc/llamactl/config.yaml")
-	}
-
-	return locations
-}
-
 // SanitizedCopy returns a copy of the AppConfig with sensitive information removed
 func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
 	// Deep copy via JSON marshal/unmarshal to avoid concurrent map access
@@ -674,7 +89,6 @@ func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
 	}

 	// Clear sensitive information
-	sanitized.Auth.InferenceKeys = []string{}
 	sanitized.Auth.ManagementKeys = []string{}

 	// Clear API keys from nodes
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -41,9 +41,6 @@ func TestLoadConfig_Defaults(t *testing.T) {
 		t.Fatalf("Failed to get user home directory: %v", err)
 	}

-	if cfg.Instances.InstancesDir != filepath.Join(homedir, ".local", "share", "llamactl", "instances") {
-		t.Errorf("Expected default instances directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "instances"), cfg.Instances.InstancesDir)
-	}
 	if cfg.Instances.LogsDir != filepath.Join(homedir, ".local", "share", "llamactl", "logs") {
 		t.Errorf("Expected default logs directory '%s', got %q", filepath.Join(homedir, ".local", "share", "llamactl", "logs"), cfg.Instances.LogsDir)
 	}
@@ -78,8 +75,8 @@ server:
  port: 9090
 instances:
  port_range: [7000, 8000]
-  logs_dir: "/custom/logs"
  max_instances: 5
+  logs_dir: "/custom/logs"
  llama_executable: "/usr/bin/llama-server"
  default_auto_restart: false
  default_max_restarts: 10
@@ -219,7 +216,6 @@ instances:
 	}
 }

-
 func TestParsePortRange(t *testing.T) {
 	tests := []struct {
 		name     string
@@ -248,7 +244,6 @@ func TestParsePortRange(t *testing.T) {
 	}
 }

-
 func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
 	bc := &config.BackendConfig{
 		LlamaCpp: config.BackendSettings{
@@ -305,7 +300,6 @@ func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
 	}
 }

-
 func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
 	// Test that backend environment variables work correctly
 	envVars := map[string]string{
@@ -375,7 +369,6 @@ func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
 	}
 }

-
 func TestLoadConfig_LocalNode(t *testing.T) {
 	t.Run("default local node", func(t *testing.T) {
 		cfg, err := config.LoadConfig("nonexistent-file.yaml")
--- a/pkg/config/defaults.go
+++ b/pkg/config/defaults.go
@@ -0,0 +1,150 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"time"
+)
+
+func getDefaultConfig(dataDir string) AppConfig {
+	return AppConfig{
+		Server: ServerConfig{
+			Host:           "0.0.0.0",
+			Port:           8080,
+			AllowedOrigins: []string{"*"}, // Default to allow all origins
+			AllowedHeaders: []string{"*"}, // Default to allow all headers
+			EnableSwagger:  false,
+		},
+		LocalNode: "main",
+		Nodes:     map[string]NodeConfig{},
+		DataDir:   dataDir,
+		Backends: BackendConfig{
+			LlamaCpp: BackendSettings{
+				Command:     "llama-server",
+				Args:        []string{},
+				Environment: map[string]string{},
+				Docker: &DockerSettings{
+					Enabled: false,
+					Image:   "ghcr.io/ggml-org/llama.cpp:server",
+					Args: []string{
+						"run", "--rm", "--network", "host", "--gpus", "all",
+						"-v", filepath.Join(dataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
+					Environment: map[string]string{},
+				},
+			},
+			VLLM: BackendSettings{
+				Command: "vllm",
+				Args:    []string{"serve"},
+				Docker: &DockerSettings{
+					Enabled: false,
+					Image:   "vllm/vllm-openai:latest",
+					Args: []string{
+						"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
+						"-v", filepath.Join(dataDir, "huggingface") + ":/root/.cache/huggingface",
+					},
+					Environment: map[string]string{},
+				},
+			},
+			MLX: BackendSettings{
+				Command: "mlx_lm.server",
+				Args:    []string{},
+				// No Docker section for MLX - not supported
+			},
+		},
+		Instances: InstancesConfig{
+			PortRange:            [2]int{8000, 9000},
+			AutoCreateDirs:       true,
+			MaxInstances:         -1, // -1 means unlimited
+			MaxRunningInstances:  -1, // -1 means unlimited
+			EnableLRUEviction:    true,
+			DefaultAutoRestart:   true,
+			DefaultMaxRestarts:   3,
+			DefaultRestartDelay:  5,
+			DefaultOnDemandStart: true,
+			OnDemandStartTimeout: 120, // 2 minutes
+			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
+			LogsDir:              "",  // Will be set to data_dir/logs if empty
+			LogRotationEnabled:   true,
+			LogRotationMaxSize:   100,
+			LogRotationCompress:  false,
+		},
+		Database: DatabaseConfig{
+			Path:               "", // Will be set to data_dir/llamactl.db if empty
+			MaxOpenConnections: 25,
+			MaxIdleConnections: 5,
+			ConnMaxLifetime:    5 * time.Minute,
+		},
+		Auth: AuthConfig{
+			RequireInferenceAuth:  true,
+			RequireManagementAuth: true,
+			ManagementKeys:        []string{},
+		},
+	}
+}
+
+// getDefaultDataDir returns platform-specific default data directory
+func getDefaultDataDir() string {
+	switch runtime.GOOS {
+	case "windows":
+		// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
+		if programData := os.Getenv("PROGRAMDATA"); programData != "" {
+			return filepath.Join(programData, "llamactl")
+		}
+		if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
+			return filepath.Join(localAppData, "llamactl")
+		}
+		return "C:\\ProgramData\\llamactl" // Final fallback
+
+	case "darwin":
+		// For macOS, use user's Application Support directory
+		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
+			return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
+		}
+		return "/usr/local/var/llamactl" // Fallback
+
+	default:
+		// Linux and other Unix-like systems
+		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
+			return filepath.Join(homeDir, ".local", "share", "llamactl")
+		}
+		return "/var/lib/llamactl" // Final fallback
+	}
+}
+
+// getDefaultConfigLocations returns platform-specific config file locations
+func getDefaultConfigLocations() []string {
+	var locations []string
+	// Use ./llamactl.yaml and ./config.yaml as the default config file
+	locations = append(locations, "llamactl.yaml")
+	locations = append(locations, "config.yaml")
+
+	homeDir, _ := os.UserHomeDir()
+
+	switch runtime.GOOS {
+	case "windows":
+		// Windows: Use APPDATA if available, else user home, fallback to ProgramData
+		if appData := os.Getenv("APPDATA"); appData != "" {
+			locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
+		} else if homeDir != "" {
+			locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
+		}
+		locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
+
+	case "darwin":
+		// macOS: Use Application Support in user home, fallback to /Library/Application Support
+		if homeDir != "" {
+			locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
+		}
+		locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
+
+	default:
+		// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
+		if homeDir != "" {
+			locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
+		}
+		locations = append(locations, "/etc/llamactl/config.yaml")
+	}
+
+	return locations
+}
--- a/pkg/config/env.go
+++ b/pkg/config/env.go
@@ -0,0 +1,319 @@
+package config
+
+import (
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// loadEnvVars overrides config with environment variables
+func loadEnvVars(cfg *AppConfig) {
+	// Server config
+	if host := os.Getenv("LLAMACTL_HOST"); host != "" {
+		cfg.Server.Host = host
+	}
+	if port := os.Getenv("LLAMACTL_PORT"); port != "" {
+		if p, err := strconv.Atoi(port); err == nil {
+			cfg.Server.Port = p
+		}
+	}
+	if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
+		cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
+	}
+	if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
+		if b, err := strconv.ParseBool(enableSwagger); err == nil {
+			cfg.Server.EnableSwagger = b
+		}
+	}
+
+	// Data config
+	if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
+		cfg.DataDir = dataDir
+	}
+	if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
+		cfg.Instances.LogsDir = logsDir
+	}
+	if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
+		if b, err := strconv.ParseBool(autoCreate); err == nil {
+			cfg.Instances.AutoCreateDirs = b
+		}
+	}
+
+	// Instance config
+	if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
+		if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
+			cfg.Instances.PortRange = ports
+		}
+	}
+	if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
+		if m, err := strconv.Atoi(maxInstances); err == nil {
+			cfg.Instances.MaxInstances = m
+		}
+	}
+	if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
+		if m, err := strconv.Atoi(maxRunning); err == nil {
+			cfg.Instances.MaxRunningInstances = m
+		}
+	}
+	if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
+		if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
+			cfg.Instances.EnableLRUEviction = b
+		}
+	}
+	// Backend config
+	// LlamaCpp backend
+	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
+		cfg.Backends.LlamaCpp.Command = llamaCmd
+	}
+	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
+		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
+		if cfg.Backends.LlamaCpp.Environment == nil {
+			cfg.Backends.LlamaCpp.Environment = make(map[string]string)
+		}
+		parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
+	}
+	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
+		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
+			if cfg.Backends.LlamaCpp.Docker == nil {
+				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+			}
+			cfg.Backends.LlamaCpp.Docker.Enabled = b
+		}
+	}
+	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
+	}
+	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
+	}
+	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
+			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
+		}
+		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
+			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
+	}
+
+	// vLLM backend
+	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
+		cfg.Backends.VLLM.Command = vllmCmd
+	}
+	if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
+		cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
+	}
+	if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
+		if cfg.Backends.VLLM.Environment == nil {
+			cfg.Backends.VLLM.Environment = make(map[string]string)
+		}
+		parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
+	}
+	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
+		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
+			if cfg.Backends.VLLM.Docker == nil {
+				cfg.Backends.VLLM.Docker = &DockerSettings{}
+			}
+			cfg.Backends.VLLM.Docker.Enabled = b
+		}
+	}
+	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
+	}
+	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
+	}
+	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.VLLM.Docker.Environment == nil {
+			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
+		}
+		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.VLLM.ResponseHeaders == nil {
+			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
+	}
+
+	// MLX backend
+	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
+		cfg.Backends.MLX.Command = mlxCmd
+	}
+	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
+		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
+	}
+	if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
+		if cfg.Backends.MLX.Environment == nil {
+			cfg.Backends.MLX.Environment = make(map[string]string)
+		}
+		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.MLX.ResponseHeaders == nil {
+			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
+	}
+
+	// Instance defaults
+	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
+		if b, err := strconv.ParseBool(autoRestart); err == nil {
+			cfg.Instances.DefaultAutoRestart = b
+		}
+	}
+	if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
+		if m, err := strconv.Atoi(maxRestarts); err == nil {
+			cfg.Instances.DefaultMaxRestarts = m
+		}
+	}
+	if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
+		if seconds, err := strconv.Atoi(restartDelay); err == nil {
+			cfg.Instances.DefaultRestartDelay = seconds
+		}
+	}
+	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
+		if b, err := strconv.ParseBool(onDemandStart); err == nil {
+			cfg.Instances.DefaultOnDemandStart = b
+		}
+	}
+	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
+		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
+			cfg.Instances.OnDemandStartTimeout = seconds
+		}
+	}
+	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
+		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
+			cfg.Instances.TimeoutCheckInterval = minutes
+		}
+	}
+	// Auth config
+	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
+		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
+			cfg.Auth.RequireInferenceAuth = b
+		}
+	}
+	if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
+		if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
+			cfg.Auth.RequireManagementAuth = b
+		}
+	}
+	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
+		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
+	}
+
+	// Local node config
+	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
+		cfg.LocalNode = localNode
+	}
+
+	// Database config
+	if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
+		cfg.Database.Path = dbPath
+	}
+	if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
+		if m, err := strconv.Atoi(maxOpenConns); err == nil {
+			cfg.Database.MaxOpenConnections = m
+		}
+	}
+	if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
+		if m, err := strconv.Atoi(maxIdleConns); err == nil {
+			cfg.Database.MaxIdleConnections = m
+		}
+	}
+	if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
+		if d, err := time.ParseDuration(connMaxLifetime); err == nil {
+			cfg.Database.ConnMaxLifetime = d
+		}
+	}
+
+	// Log rotation config
+	if logRotationEnabled := os.Getenv("LLAMACTL_LOG_ROTATION_ENABLED"); logRotationEnabled != "" {
+		if b, err := strconv.ParseBool(logRotationEnabled); err == nil {
+			cfg.Instances.LogRotationEnabled = b
+		}
+	}
+	if logRotationMaxSize := os.Getenv("LLAMACTL_LOG_ROTATION_MAX_SIZE"); logRotationMaxSize != "" {
+		if m, err := strconv.Atoi(logRotationMaxSize); err == nil {
+			cfg.Instances.LogRotationMaxSize = m
+		}
+	}
+	if logRotationCompress := os.Getenv("LLAMACTL_LOG_ROTATION_COMPRESS"); logRotationCompress != "" {
+		if b, err := strconv.ParseBool(logRotationCompress); err == nil {
+			cfg.Instances.LogRotationCompress = b
+		}
+	}
+}
+
+// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
+func ParsePortRange(s string) [2]int {
+	var parts []string
+
+	// Try both separators
+	if strings.Contains(s, "-") {
+		parts = strings.Split(s, "-")
+	} else if strings.Contains(s, ",") {
+		parts = strings.Split(s, ",")
+	}
+
+	// Parse the two parts
+	if len(parts) == 2 {
+		start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
+		end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
+		if err1 == nil && err2 == nil {
+			return [2]int{start, end}
+		}
+	}
+
+	return [2]int{0, 0} // Invalid format
+}
+
+// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
+// and populates the provided environment map
+func parseEnvVars(envString string, envMap map[string]string) {
+	if envString == "" {
+		return
+	}
+	for _, envPair := range strings.Split(envString, ",") {
+		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+}
+
+// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
+// and populates the provided environment map
+func parseHeaders(envString string, envMap map[string]string) {
+	if envString == "" {
+		return
+	}
+	for _, envPair := range strings.Split(envString, ";") {
+		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+}
--- a/pkg/config/types.go
+++ b/pkg/config/types.go
@@ -0,0 +1,143 @@
+package config
+
+import "time"
+
+// BackendSettings contains structured backend configuration
+type BackendSettings struct {
+	Command         string            `yaml:"command" json:"command"`
+	Args            []string          `yaml:"args" json:"args"`
+	Environment     map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
+	Docker          *DockerSettings   `yaml:"docker,omitempty" json:"docker,omitempty"`
+	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
+}
+
+// DockerSettings contains Docker-specific configuration
+type DockerSettings struct {
+	Enabled     bool              `yaml:"enabled" json:"enabled"`
+	Image       string            `yaml:"image" json:"image"`
+	Args        []string          `yaml:"args" json:"args"`
+	Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
+}
+
+// BackendConfig contains backend executable configurations
+type BackendConfig struct {
+	LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
+	VLLM     BackendSettings `yaml:"vllm" json:"vllm"`
+	MLX      BackendSettings `yaml:"mlx" json:"mlx"`
+}
+
+// AppConfig represents the configuration for llamactl
+type AppConfig struct {
+	Server    ServerConfig          `yaml:"server" json:"server"`
+	Backends  BackendConfig         `yaml:"backends" json:"backends"`
+	Instances InstancesConfig       `yaml:"instances" json:"instances"`
+	Database  DatabaseConfig        `yaml:"database" json:"database"`
+	Auth      AuthConfig            `yaml:"auth" json:"auth"`
+	LocalNode string                `yaml:"local_node,omitempty" json:"local_node,omitempty"`
+	Nodes     map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
+
+	// Directory where all llamactl data will be stored (database, instances, logs, etc.)
+	DataDir string `yaml:"data_dir" json:"data_dir"`
+
+	Version    string `yaml:"-" json:"version"`
+	CommitHash string `yaml:"-" json:"commit_hash"`
+	BuildTime  string `yaml:"-" json:"build_time"`
+}
+
+// ServerConfig contains HTTP server configuration
+type ServerConfig struct {
+	// Server host to bind to
+	Host string `yaml:"host" json:"host"`
+
+	// Server port to bind to
+	Port int `yaml:"port" json:"port"`
+
+	// Allowed origins for CORS (e.g., "http://localhost:3000")
+	AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
+
+	// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
+	AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
+
+	// Enable Swagger UI for API documentation
+	EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
+
+	// Response headers to send with responses
+	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
+}
+
+// DatabaseConfig contains database configuration settings
+type DatabaseConfig struct {
+	// Database file path (relative to the top-level data_dir or absolute)
+	Path string `yaml:"path" json:"path"`
+
+	// Connection settings
+	MaxOpenConnections int           `yaml:"max_open_connections" json:"max_open_connections"`
+	MaxIdleConnections int           `yaml:"max_idle_connections" json:"max_idle_connections"`
+	ConnMaxLifetime    time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
+}
+
+// InstancesConfig contains instance management configuration
+type InstancesConfig struct {
+	// Port range for instances (e.g., 8000,9000)
+	PortRange [2]int `yaml:"port_range" json:"port_range"`
+
+	// Automatically create the data directory if it doesn't exist
+	AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
+
+	// Maximum number of instances that can be created
+	MaxInstances int `yaml:"max_instances" json:"max_instances"`
+
+	// Maximum number of instances that can be running at the same time
+	MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
+
+	// Enable LRU eviction for instance logs
+	EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
+
+	// Default auto-restart setting for new instances
+	DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
+
+	// Default max restarts for new instances
+	DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
+
+	// Default restart delay for new instances (in seconds)
+	DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
+
+	// Default on-demand start setting for new instances
+	DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
+
+	// How long to wait for an instance to start on demand (in seconds)
+	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
+
+	// Interval for checking instance timeouts (in minutes)
+	TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
+
+	// Logs directory override (relative to data_dir if not absolute)
+	LogsDir string `yaml:"logs_dir" json:"logs_dir"`
+
+	// Log rotation enabled
+	LogRotationEnabled bool `yaml:"log_rotation_enabled" default:"true"`
+
+	// Maximum log file size in MB before rotation
+	LogRotationMaxSize int `yaml:"log_rotation_max_size" default:"100"`
+
+	// Whether to compress rotated log files
+	LogRotationCompress bool `yaml:"log_rotation_compress" default:"false"`
+}
+
+// AuthConfig contains authentication settings
+type AuthConfig struct {
+
+	// Require authentication for OpenAI compatible inference endpoints
+	RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
+
+	// Require authentication for management endpoints
+	RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
+
+	// List of keys for management endpoints
+	ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
+}
+
+type NodeConfig struct {
+	Address string `yaml:"address" json:"address"`
+	APIKey  string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
+}
--- a/pkg/database/apikeys.go
+++ b/pkg/database/apikeys.go
@@ -18,8 +18,8 @@ func (db *sqliteDB) CreateKey(ctx context.Context, key *auth.APIKey, permissions

 	// Insert the API key
 	query := `
-		INSERT INTO api_keys (key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at)
-		VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+		INSERT INTO api_keys (key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?)
 	`

 	var expiresAt sql.NullInt64
@@ -29,7 +29,7 @@ func (db *sqliteDB) CreateKey(ctx context.Context, key *auth.APIKey, permissions

 	result, err := tx.ExecContext(ctx, query,
 		key.KeyHash, key.Name, key.UserID, key.PermissionMode,
-		expiresAt, key.Enabled, key.CreatedAt, key.UpdatedAt,
+		expiresAt, key.CreatedAt, key.UpdatedAt,
 	)
 	if err != nil {
 		return fmt.Errorf("failed to insert API key: %w", err)
@@ -45,10 +45,10 @@ func (db *sqliteDB) CreateKey(ctx context.Context, key *auth.APIKey, permissions
 	if key.PermissionMode == auth.PermissionModePerInstance {
 		for _, perm := range permissions {
 			query := `
-				INSERT INTO key_permissions (key_id, instance_id, can_infer, can_view_logs)
-				VALUES (?, ?, ?, ?)
+				INSERT INTO key_permissions (key_id, instance_id)
+				VALUES (?, ?)
 			`
-			_, err := tx.ExecContext(ctx, query, perm.KeyID, perm.InstanceID, perm.CanInfer, perm.CanViewLogs)
+			_, err := tx.ExecContext(ctx, query, key.ID, perm.InstanceID)
 			if err != nil {
 				return fmt.Errorf("failed to insert permission for instance %d: %w", perm.InstanceID, err)
 			}
@@ -61,7 +61,7 @@ func (db *sqliteDB) CreateKey(ctx context.Context, key *auth.APIKey, permissions
 // GetKeyByID retrieves an API key by ID
 func (db *sqliteDB) GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error) {
 	query := `
-		SELECT id, key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at, last_used_at
+		SELECT id, key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at, last_used_at
 		FROM api_keys
 		WHERE id = ?
 	`
@@ -72,7 +72,7 @@ func (db *sqliteDB) GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error

 	err := db.QueryRowContext(ctx, query, id).Scan(
 		&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
-		&expiresAt, &key.Enabled, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
+		&expiresAt, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
 	)
 	if err != nil {
 		if err == sql.ErrNoRows {
@@ -94,7 +94,7 @@ func (db *sqliteDB) GetKeyByID(ctx context.Context, id int) (*auth.APIKey, error
 // GetUserKeys retrieves all API keys for a user
 func (db *sqliteDB) GetUserKeys(ctx context.Context, userID string) ([]*auth.APIKey, error) {
 	query := `
-		SELECT id, key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at, last_used_at
+		SELECT id, key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at, last_used_at
 		FROM api_keys
 		WHERE user_id = ?
 		ORDER BY created_at DESC
@@ -114,7 +114,7 @@ func (db *sqliteDB) GetUserKeys(ctx context.Context, userID string) ([]*auth.API

 		err := rows.Scan(
 			&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
-			&expiresAt, &key.Enabled, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
+			&expiresAt, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
 		)
 		if err != nil {
 			return nil, fmt.Errorf("failed to scan API key: %w", err)
@@ -133,12 +133,12 @@ func (db *sqliteDB) GetUserKeys(ctx context.Context, userID string) ([]*auth.API
 	return keys, nil
 }

-// GetActiveKeys retrieves all enabled, non-expired API keys
+// GetActiveKeys retrieves all non-expired API keys
 func (db *sqliteDB) GetActiveKeys(ctx context.Context) ([]*auth.APIKey, error) {
 	query := `
-		SELECT id, key_hash, name, user_id, permission_mode, expires_at, enabled, created_at, updated_at, last_used_at
+		SELECT id, key_hash, name, user_id, permission_mode, expires_at, created_at, updated_at, last_used_at
 		FROM api_keys
-		WHERE enabled = 1 AND (expires_at IS NULL OR expires_at > ?)
+		WHERE expires_at IS NULL OR expires_at > ?
 		ORDER BY created_at DESC
 	`

@@ -157,7 +157,7 @@ func (db *sqliteDB) GetActiveKeys(ctx context.Context) ([]*auth.APIKey, error) {

 		err := rows.Scan(
 			&key.ID, &key.KeyHash, &key.Name, &key.UserID, &key.PermissionMode,
-			&expiresAt, &key.Enabled, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
+			&expiresAt, &key.CreatedAt, &key.UpdatedAt, &lastUsedAt,
 		)
 		if err != nil {
 			return nil, fmt.Errorf("failed to scan API key: %w", err)
--- a/pkg/database/database.go
+++ b/pkg/database/database.go
@@ -107,6 +107,12 @@ func Open(config *Config) (*sqliteDB, error) {
 func (db *sqliteDB) Close() error {
 	if db.DB != nil {
 		log.Println("Closing database connection")
+
+		// Checkpoint WAL to merge changes back to main database file
+		if _, err := db.DB.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
+			log.Printf("Warning: Failed to checkpoint WAL: %v", err)
+		}
+
 		return db.DB.Close()
 	}
 	return nil
--- a/pkg/database/instances.go
+++ b/pkg/database/instances.go
@@ -45,7 +45,7 @@ func (db *sqliteDB) Create(ctx context.Context, inst *instance.Instance) error {
 		) VALUES (?, ?, ?, ?, ?, ?)
 	`

-	_, err = db.DB.ExecContext(ctx, query,
+	result, err := db.DB.ExecContext(ctx, query,
 		row.Name, row.Status, row.CreatedAt, row.UpdatedAt, row.OptionsJSON, row.OwnerUserID,
 	)

@@ -53,6 +53,14 @@ func (db *sqliteDB) Create(ctx context.Context, inst *instance.Instance) error {
 		return fmt.Errorf("failed to insert instance: %w", err)
 	}

+	// Get the auto-generated ID and set it on the instance
+	id, err := result.LastInsertId()
+	if err != nil {
+		return fmt.Errorf("failed to get last insert ID: %w", err)
+	}
+
+	inst.ID = int(id)
+
 	return nil
 }

@@ -263,6 +271,7 @@ func (db *sqliteDB) rowToInstance(row *instanceRow) (*instance.Instance, error)

 	// Build complete instance JSON with all fields
 	instanceJSON, err := json.Marshal(map[string]any{
+		"id":      row.ID,
 		"name":    row.Name,
 		"created": row.CreatedAt,
 		"status":  row.Status,
--- a/pkg/database/migrations/001_initial_schema.up.sql
+++ b/pkg/database/migrations/001_initial_schema.up.sql
@@ -36,7 +36,6 @@ CREATE TABLE IF NOT EXISTS api_keys (
    user_id TEXT NOT NULL,
    permission_mode TEXT NOT NULL CHECK(permission_mode IN ('allow_all', 'per_instance')) DEFAULT 'per_instance',
    expires_at INTEGER NULL,
-    enabled INTEGER NOT NULL DEFAULT 1,
    created_at INTEGER NOT NULL,
    updated_at INTEGER NOT NULL,
    last_used_at INTEGER NULL
@@ -48,8 +47,6 @@ CREATE TABLE IF NOT EXISTS api_keys (
 CREATE TABLE IF NOT EXISTS key_permissions (
    key_id INTEGER NOT NULL,
    instance_id INTEGER NOT NULL,
-    can_infer INTEGER NOT NULL DEFAULT 0,
-    can_view_logs INTEGER NOT NULL DEFAULT 0,
    PRIMARY KEY (key_id, instance_id),
    FOREIGN KEY (key_id) REFERENCES api_keys (id) ON DELETE CASCADE,
    FOREIGN KEY (instance_id) REFERENCES instances (id) ON DELETE CASCADE
--- a/pkg/database/permissions.go
+++ b/pkg/database/permissions.go
@@ -10,7 +10,7 @@ import (
 // GetPermissions retrieves all permissions for a key
 func (db *sqliteDB) GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPermission, error) {
 	query := `
-		SELECT key_id, instance_id, can_infer, can_view_logs
+		SELECT key_id, instance_id
 		FROM key_permissions
 		WHERE key_id = ?
 		ORDER BY instance_id
@@ -25,7 +25,7 @@ func (db *sqliteDB) GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPe
 	var permissions []auth.KeyPermission
 	for rows.Next() {
 		var perm auth.KeyPermission
-		err := rows.Scan(&perm.KeyID, &perm.InstanceID, &perm.CanInfer, &perm.CanViewLogs)
+		err := rows.Scan(&perm.KeyID, &perm.InstanceID)
 		if err != nil {
 			return nil, fmt.Errorf("failed to scan key permission: %w", err)
 		}
@@ -38,13 +38,13 @@ func (db *sqliteDB) GetPermissions(ctx context.Context, keyID int) ([]auth.KeyPe
 // HasPermission checks if key has inference permission for instance
 func (db *sqliteDB) HasPermission(ctx context.Context, keyID, instanceID int) (bool, error) {
 	query := `
-		SELECT can_infer 
-		FROM key_permissions 
+		SELECT 1
+		FROM key_permissions
 		WHERE key_id = ? AND instance_id = ?
 	`

-	var canInfer bool
-	err := db.QueryRowContext(ctx, query, keyID, instanceID).Scan(&canInfer)
+	var exists int
+	err := db.QueryRowContext(ctx, query, keyID, instanceID).Scan(&exists)
 	if err != nil {
 		if err == sql.ErrNoRows {
 			// No permission record found, deny access
@@ -53,5 +53,5 @@ func (db *sqliteDB) HasPermission(ctx context.Context, keyID, instanceID int) (b
 		return false, fmt.Errorf("failed to check key permission: %w", err)
 	}

-	return canInfer, nil
+	return true, nil
 }
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -3,10 +3,12 @@ package instance
 import (
 	"encoding/json"
 	"fmt"
-	"llamactl/pkg/config"
 	"log"
 	"net/http"
 	"time"
+
+	"llamactl/pkg/backends"
+	"llamactl/pkg/config"
 )

 // Instance represents a running instance of llama server
@@ -68,7 +70,16 @@ func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusCha

 	// Only create logger, proxy, and process for local instances
 	if !instance.IsRemote() {
-		instance.logger = newLogger(name, globalInstanceSettings.LogsDir)
+		logRotationConfig := &LogRotationConfig{
+			Enabled:  globalInstanceSettings.LogRotationEnabled,
+			MaxSize:  globalInstanceSettings.LogRotationMaxSize,
+			Compress: globalInstanceSettings.LogRotationCompress,
+		}
+		instance.logger = newLogger(
+			name,
+			globalInstanceSettings.LogsDir,
+			logRotationConfig,
+		)
 		instance.process = newProcess(instance)
 	}

@@ -107,6 +118,14 @@ func (i *Instance) WaitForHealthy(timeout int) error {
 	return i.process.waitForHealthy(timeout)
 }

+func (i *Instance) GetBackendType() backends.BackendType {
+	opts := i.GetOptions()
+	if opts == nil {
+		return backends.BackendTypeUnknown
+	}
+	return opts.BackendOptions.BackendType
+}
+
 // GetOptions returns the current options
 func (i *Instance) GetOptions() *Options {
 	if i.options == nil {
--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -27,8 +27,8 @@ func TestNewInstance(t *testing.T) {
 			},
 		},
 		Instances: config.InstancesConfig{
-			LogsDir:             "/tmp/test",
 			DefaultAutoRestart:  true,
+			LogsDir:             "/tmp/test",
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
@@ -120,8 +120,8 @@ func TestSetOptions(t *testing.T) {
 			},
 		},
 		Instances: config.InstancesConfig{
-			LogsDir:             "/tmp/test",
 			DefaultAutoRestart:  true,
+			LogsDir:             "/tmp/test",
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
--- a/pkg/instance/logger.go
+++ b/pkg/instance/logger.go
@@ -7,66 +7,117 @@ import (
 	"os"
 	"strings"
 	"sync"
-	"sync/atomic"
 	"time"
+
+	timber "github.com/DeRuina/timberjack"
 )

+// LogRotationConfig contains log rotation settings for instances
+type LogRotationConfig struct {
+	Enabled  bool
+	MaxSize  int
+	Compress bool
+}
+
 type logger struct {
 	name        string
 	logDir      string
-	logFile     atomic.Pointer[os.File]
+	logFile     *timber.Logger
 	logFilePath string
 	mu          sync.RWMutex
+	cfg         *LogRotationConfig
 }

-func newLogger(name string, logDir string) *logger {
+func newLogger(name, logDir string, cfg *LogRotationConfig) *logger {
 	return &logger{
 		name:   name,
 		logDir: logDir,
+		cfg:    cfg,
 	}
 }

-// create creates and opens the log files for stdout and stderr
-func (i *logger) create() error {
-	i.mu.Lock()
-	defer i.mu.Unlock()
+func (l *logger) create() error {
+	l.mu.Lock()
+	defer l.mu.Unlock()

-	if i.logDir == "" {
-		return fmt.Errorf("logDir is empty for instance %s", i.name)
+	if l.logDir == "" {
+		return fmt.Errorf("logDir empty for instance %s", l.name)
 	}

-	// Set up instance logs
-	logPath := i.logDir + "/" + i.name + ".log"
-
-	i.logFilePath = logPath
-	if err := os.MkdirAll(i.logDir, 0755); err != nil {
+	if err := os.MkdirAll(l.logDir, 0755); err != nil {
 		return fmt.Errorf("failed to create log directory: %w", err)
 	}

-	logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
-	if err != nil {
-		return fmt.Errorf("failed to create stdout log file: %w", err)
+	logPath := fmt.Sprintf("%s/%s.log", l.logDir, l.name)
+	l.logFilePath = logPath
+
+	// Build the timber logger
+	t := &timber.Logger{
+		Filename:   logPath,
+		MaxSize:    l.cfg.MaxSize,
+		MaxBackups: 0, // No limit on backups
+		// Compression: "gzip" if Compress is true, else "none"
+		Compression: func() string {
+			if l.cfg.Compress {
+				return "gzip"
+			}
+			return "none"
+		}(),
+		FileMode:  0644,
+		LocalTime: true,
 	}

-	i.logFile.Store(logFile)
+	// If rotation is disabled, set MaxSize to 0 so no rotation occurs
+	if !l.cfg.Enabled {
+		t.MaxSize = 0
+	}

-	// Write a startup marker to both files
-	timestamp := time.Now().Format("2006-01-02 15:04:05")
-	fmt.Fprintf(logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
+	l.logFile = t
+
+	// Write a startup marker
+	ts := time.Now().Format("2006-01-02 15:04:05")
+	fmt.Fprintf(t, "\n=== Instance %s started at %s ===\n", l.name, ts)

 	return nil
 }

-// getLogs retrieves the last n lines of logs from the instance
-func (i *logger) getLogs(num_lines int) (string, error) {
-	i.mu.RLock()
-	defer i.mu.RUnlock()
+func (l *logger) readOutput(rc io.ReadCloser) {
+	defer rc.Close()
+	scanner := bufio.NewScanner(rc)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if lg := l.logFile; lg != nil {
+			fmt.Fprintln(lg, line)
+		}
+	}
+}

-	if i.logFilePath == "" {
-		return "", fmt.Errorf("log file not created for instance %s", i.name)
+func (l *logger) close() {
+	l.mu.Lock()
+	defer l.mu.Unlock()
+
+	lg := l.logFile
+	if lg == nil {
+		return
 	}

-	file, err := os.Open(i.logFilePath)
+	ts := time.Now().Format("2006-01-02 15:04:05")
+	fmt.Fprintf(lg, "=== Instance %s stopped at %s ===\n\n", l.name, ts)
+
+	_ = lg.Close()
+	l.logFile = nil
+}
+
+// getLogs retrieves the last n lines of logs from the instance
+func (l *logger) getLogs(num_lines int) (string, error) {
+	l.mu.RLock()
+	defer l.mu.RUnlock()
+
+	if l.logFilePath == "" {
+		return "", fmt.Errorf("log file not created for instance %s", l.name)
+	}
+
+	file, err := os.Open(l.logFilePath)
 	if err != nil {
 		return "", fmt.Errorf("failed to open log file: %w", err)
 	}
@@ -97,31 +148,3 @@ func (i *logger) getLogs(num_lines int) (string, error) {

 	return strings.Join(lines[start:], "\n"), nil
 }
-
-// close closes the log files
-func (i *logger) close() {
-	i.mu.Lock()
-	defer i.mu.Unlock()
-
-	logFile := i.logFile.Swap(nil)
-	if logFile != nil {
-		timestamp := time.Now().Format("2006-01-02 15:04:05")
-		fmt.Fprintf(logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
-		logFile.Sync() // Ensure all buffered data is written to disk
-		logFile.Close()
-	}
-}
-
-// readOutput reads from the given reader and writes lines to the log file
-func (i *logger) readOutput(reader io.ReadCloser) {
-	defer reader.Close()
-
-	scanner := bufio.NewScanner(reader)
-	for scanner.Scan() {
-		line := scanner.Text()
-		// Use atomic load to avoid lock contention on every line
-		if logFile := i.logFile.Load(); logFile != nil {
-			fmt.Fprintln(logFile, line)
-		}
-	}
-}
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -19,7 +19,7 @@ type InstanceManager interface {
 	UpdateInstance(name string, options *instance.Options) (*instance.Instance, error)
 	DeleteInstance(name string) error
 	StartInstance(name string) (*instance.Instance, error)
-	IsMaxRunningInstancesReached() bool
+	AtMaxRunning() bool
 	StopInstance(name string) (*instance.Instance, error)
 	EvictLRUInstance() error
 	RestartInstance(name string) (*instance.Instance, error)
@@ -114,11 +114,6 @@ func (im *instanceManager) Shutdown() {
 		}
 		wg.Wait()
 		fmt.Println("All instances stopped.")
-
-		// 4. Close database connection
-		if err := im.db.Close(); err != nil {
-			log.Printf("Error closing database: %v\n", err)
-		}
 	})
 }

@@ -181,6 +176,7 @@ func (im *instanceManager) loadInstance(persistedInst *instance.Instance) error
 	inst := instance.New(name, im.globalConfig, options, statusCallback)

 	// Restore persisted fields that NewInstance doesn't set
+	inst.ID = persistedInst.ID
 	inst.Created = persistedInst.Created
 	inst.SetStatus(persistedInst.GetStatus())

--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -202,12 +202,11 @@ func createTestAppConfig(instancesDir string) *config.AppConfig {
 		},
 		Instances: config.InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
-			InstancesDir:         instancesDir,
-			LogsDir:              instancesDir,
 			MaxInstances:         10,
 			MaxRunningInstances:  10,
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
+			LogsDir:              instancesDir,
 			DefaultRestartDelay:  5,
 			TimeoutCheckInterval: 5,
 		},
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -37,7 +37,6 @@ func (im *instanceManager) ListInstances() ([]*instance.Instance, error) {
 		if node := im.getNodeForInstance(inst); node != nil {
 			remoteInst, err := im.remote.getInstance(ctx, node, inst.Name)
 			if err != nil {
-				// Log error but continue with stale data
 				// Don't fail the entire list operation due to one remote failure
 				continue
 			}
@@ -384,7 +383,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Instance, error
 	}

 	// Check max running instances limit for local instances only
-	if im.IsMaxRunningInstancesReached() {
+	if im.AtMaxRunning() {
 		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances))
 	}

@@ -400,7 +399,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Instance, error
 	return inst, nil
 }

-func (im *instanceManager) IsMaxRunningInstancesReached() bool {
+func (im *instanceManager) AtMaxRunning() bool {
 	if im.globalConfig.Instances.MaxRunningInstances == -1 {
 		return false
 	}
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -38,7 +38,6 @@ func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
 }

 func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
-	tempDir := t.TempDir()
 	appConfig := &config.AppConfig{
 		Backends: config.BackendConfig{
 			LlamaCpp: config.BackendSettings{
@@ -47,7 +46,6 @@ func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
 		},
 		Instances: config.InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
-			InstancesDir:         tempDir,
 			MaxInstances:         1, // Very low limit for testing
 			TimeoutCheckInterval: 5,
 		},
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -96,7 +96,7 @@ func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
 		return fmt.Errorf("instance is not running and on-demand start is not enabled")
 	}

-	if h.InstanceManager.IsMaxRunningInstancesReached() {
+	if h.InstanceManager.AtMaxRunning() {
 		if h.cfg.Instances.EnableLRUEviction {
 			err := h.InstanceManager.EvictLRUInstance()
 			if err != nil {
--- a/pkg/server/handlers_auth.go
+++ b/pkg/server/handlers_auth.go
@@ -11,19 +11,12 @@ import (
 	"github.com/go-chi/chi/v5"
 )

-// InstancePermission defines the permissions for an API key on a specific instance.
-type InstancePermission struct {
-	InstanceID  int  `json:"instance_id"`
-	CanInfer    bool `json:"can_infer"`
-	CanViewLogs bool `json:"can_view_logs"`
-}
-
 // CreateKeyRequest represents the request body for creating a new API key.
 type CreateKeyRequest struct {
-	Name                string
-	PermissionMode      auth.PermissionMode
-	ExpiresAt           *int64
-	InstancePermissions []InstancePermission
+	Name           string              `json:"name"`
+	PermissionMode auth.PermissionMode `json:"permission_mode"`
+	ExpiresAt      *int64              `json:"expires_at,omitempty"`
+	InstanceIDs    []int               `json:"instance_ids,omitempty"`
 }

 // CreateKeyResponse represents the response returned when creating a new API key.
@@ -33,7 +26,6 @@ type CreateKeyResponse struct {
 	UserID         string              `json:"user_id"`
 	PermissionMode auth.PermissionMode `json:"permission_mode"`
 	ExpiresAt      *int64              `json:"expires_at"`
-	Enabled        bool                `json:"enabled"`
 	CreatedAt      int64               `json:"created_at"`
 	UpdatedAt      int64               `json:"updated_at"`
 	LastUsedAt     *int64              `json:"last_used_at"`
@@ -47,7 +39,6 @@ type KeyResponse struct {
 	UserID         string              `json:"user_id"`
 	PermissionMode auth.PermissionMode `json:"permission_mode"`
 	ExpiresAt      *int64              `json:"expires_at"`
-	Enabled        bool                `json:"enabled"`
 	CreatedAt      int64               `json:"created_at"`
 	UpdatedAt      int64               `json:"updated_at"`
 	LastUsedAt     *int64              `json:"last_used_at"`
@@ -57,8 +48,6 @@ type KeyResponse struct {
 type KeyPermissionResponse struct {
 	InstanceID   int    `json:"instance_id"`
 	InstanceName string `json:"instance_name"`
-	CanInfer     bool   `json:"can_infer"`
-	CanViewLogs  bool   `json:"can_view_logs"`
 }

 // CreateKey godoc
@@ -93,8 +82,8 @@ func (h *Handler) CreateKey() http.HandlerFunc {
 			writeError(w, http.StatusBadRequest, "invalid_permission_mode", "Permission mode must be 'allow_all' or 'per_instance'")
 			return
 		}
-		if req.PermissionMode == auth.PermissionModePerInstance && len(req.InstancePermissions) == 0 {
-			writeError(w, http.StatusBadRequest, "missing_permissions", "Instance permissions required when permission mode is 'per_instance'")
+		if req.PermissionMode == auth.PermissionModePerInstance && len(req.InstanceIDs) == 0 {
+			writeError(w, http.StatusBadRequest, "missing_permissions", "Instance IDs required when permission mode is 'per_instance'")
 			return
 		}
 		if req.ExpiresAt != nil && *req.ExpiresAt <= time.Now().Unix() {
@@ -114,16 +103,16 @@ func (h *Handler) CreateKey() http.HandlerFunc {
 				instanceIDMap[inst.ID] = true
 			}

-			for _, perm := range req.InstancePermissions {
-				if !instanceIDMap[perm.InstanceID] {
-					writeError(w, http.StatusBadRequest, "invalid_instance_id", fmt.Sprintf("Instance ID %d does not exist", perm.InstanceID))
+			for _, instanceID := range req.InstanceIDs {
+				if !instanceIDMap[instanceID] {
+					writeError(w, http.StatusBadRequest, "invalid_instance_id", fmt.Sprintf("Instance ID %d does not exist", instanceID))
 					return
 				}
 			}
 		}

 		// Generate plain-text key
-		plainTextKey, err := auth.GenerateKey("llamactl-")
+		plainTextKey, err := auth.GenerateKey("llamactl")
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "key_generation_failed", "Failed to generate API key")
 			return
@@ -144,19 +133,16 @@ func (h *Handler) CreateKey() http.HandlerFunc {
 			UserID:         "system",
 			PermissionMode: req.PermissionMode,
 			ExpiresAt:      req.ExpiresAt,
-			Enabled:        true,
 			CreatedAt:      now,
 			UpdatedAt:      now,
 		}

-		// Convert InstancePermissions to KeyPermissions
+		// Convert InstanceIDs to KeyPermissions
 		var keyPermissions []auth.KeyPermission
-		for _, perm := range req.InstancePermissions {
+		for _, instanceID := range req.InstanceIDs {
 			keyPermissions = append(keyPermissions, auth.KeyPermission{
-				KeyID:       0, // Will be set by database after key creation
-				InstanceID:  perm.InstanceID,
-				CanInfer:    perm.CanInfer,
-				CanViewLogs: perm.CanViewLogs,
+				KeyID:      0, // Will be set by database after key creation
+				InstanceID: instanceID,
 			})
 		}

@@ -174,7 +160,6 @@ func (h *Handler) CreateKey() http.HandlerFunc {
 			UserID:         apiKey.UserID,
 			PermissionMode: apiKey.PermissionMode,
 			ExpiresAt:      apiKey.ExpiresAt,
-			Enabled:        apiKey.Enabled,
 			CreatedAt:      apiKey.CreatedAt,
 			UpdatedAt:      apiKey.UpdatedAt,
 			LastUsedAt:     apiKey.LastUsedAt,
@@ -213,7 +198,6 @@ func (h *Handler) ListKeys() http.HandlerFunc {
 				UserID:         key.UserID,
 				PermissionMode: key.PermissionMode,
 				ExpiresAt:      key.ExpiresAt,
-				Enabled:        key.Enabled,
 				CreatedAt:      key.CreatedAt,
 				UpdatedAt:      key.UpdatedAt,
 				LastUsedAt:     key.LastUsedAt,
@@ -263,7 +247,6 @@ func (h *Handler) GetKey() http.HandlerFunc {
 			UserID:         key.UserID,
 			PermissionMode: key.PermissionMode,
 			ExpiresAt:      key.ExpiresAt,
-			Enabled:        key.Enabled,
 			CreatedAt:      key.CreatedAt,
 			UpdatedAt:      key.UpdatedAt,
 			LastUsedAt:     key.LastUsedAt,
@@ -362,8 +345,6 @@ func (h *Handler) GetKeyPermissions() http.HandlerFunc {
 			response = append(response, KeyPermissionResponse{
 				InstanceID:   perm.InstanceID,
 				InstanceName: instanceNameMap[perm.InstanceID],
-				CanInfer:     perm.CanInfer,
-				CanViewLogs:  perm.CanViewLogs,
 			})
 		}

--- a/pkg/server/handlers_backends.go
+++ b/pkg/server/handlers_backends.go
@@ -306,3 +306,158 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
 func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
 }
+
+// LlamaCppListModels godoc
+// @Summary List models in a llama.cpp instance
+// @Description Returns a list of models available in the specified llama.cpp instance
+// @Tags Llama.cpp
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} map[string]any "Models list response"
+// @Failure 400 {string} string "Invalid instance"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/llama-cpp/{name}/models [get]
+func (h *Handler) LlamaCppListModels() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inst, err := h.validateLlamaCppInstance(r)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
+			return
+		}
+
+		// Check instance permissions
+		if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
+			writeError(w, http.StatusForbidden, "permission_denied", err.Error())
+			return
+		}
+
+		// Check if instance is shutting down before autostart logic
+		if inst.GetStatus() == instance.ShuttingDown {
+			writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
+			return
+		}
+
+		if !inst.IsRemote() && !inst.IsRunning() {
+			err := h.ensureInstanceRunning(inst)
+			if err != nil {
+				writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
+				return
+			}
+		}
+
+		// Modify request path to /models for proxying
+		r.URL.Path = "/models"
+
+		// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
+		err = inst.ServeHTTP(w, r)
+		if err != nil {
+			// Error is already handled in ServeHTTP (response written)
+			return
+		}
+	}
+}
+
+// LlamaCppLoadModel godoc
+// @Summary Load a model in a llama.cpp instance
+// @Description Loads the specified model in the given llama.cpp instance
+// @Tags Llama.cpp
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param model path string true "Model Name"
+// @Success 200 {object} map[string]string "Success message"
+// @Failure 400 {string} string "Invalid request"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/llama-cpp/{name}/models/{model}/load [post]
+func (h *Handler) LlamaCppLoadModel() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inst, err := h.validateLlamaCppInstance(r)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
+			return
+		}
+
+		// Check instance permissions
+		if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
+			writeError(w, http.StatusForbidden, "permission_denied", err.Error())
+			return
+		}
+
+		// Check if instance is shutting down before autostart logic
+		if inst.GetStatus() == instance.ShuttingDown {
+			writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
+			return
+		}
+
+		if !inst.IsRemote() && !inst.IsRunning() {
+			err := h.ensureInstanceRunning(inst)
+			if err != nil {
+				writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
+				return
+			}
+		}
+
+		// Modify request path to /models/load for proxying
+		r.URL.Path = "/models/load"
+
+		// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
+		err = inst.ServeHTTP(w, r)
+		if err != nil {
+			// Error is already handled in ServeHTTP (response written)
+			return
+		}
+	}
+}
+
+// LlamaCppUnloadModel godoc
+// @Summary Unload a model in a llama.cpp instance
+// @Description Unloads the specified model in the given llama.cpp instance
+// @Tags Llama.cpp
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param model path string true "Model Name"
+// @Success 200 {object} map[string]string "Success message"
+// @Failure 400 {string} string "Invalid request"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /api/v1/llama-cpp/{name}/models/{model}/unload [post]
+func (h *Handler) LlamaCppUnloadModel() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		inst, err := h.validateLlamaCppInstance(r)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
+			return
+		}
+
+		// Check instance permissions
+		if err := h.authMiddleware.CheckInstancePermission(r.Context(), inst.ID); err != nil {
+			writeError(w, http.StatusForbidden, "permission_denied", err.Error())
+			return
+		}
+
+		// Check if instance is shutting down before autostart logic
+		if inst.GetStatus() == instance.ShuttingDown {
+			writeError(w, http.StatusServiceUnavailable, "instance_shutting_down", "Instance is shutting down")
+			return
+		}
+
+		if !inst.IsRemote() && !inst.IsRunning() {
+			err := h.ensureInstanceRunning(inst)
+			if err != nil {
+				writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
+				return
+			}
+		}
+
+		// Modify request path to /models/unload for proxying
+		r.URL.Path = "/models/unload"
+
+		// Use instance's ServeHTTP which tracks inflight requests and handles shutting down state
+		err = inst.ServeHTTP(w, r)
+		if err != nil {
+			// Error is already handled in ServeHTTP (response written)
+			return
+		}
+	}
+}
--- a/pkg/server/handlers_openai.go
+++ b/pkg/server/handlers_openai.go
@@ -3,10 +3,13 @@ package server
 import (
 	"bytes"
 	"encoding/json"
+	"fmt"
 	"io"
+	"llamactl/pkg/backends"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/validation"
 	"net/http"
+	"strings"
 )

 // OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format
@@ -23,6 +26,53 @@ type OpenAIInstance struct {
 	OwnedBy string `json:"owned_by"`
 }

+// LlamaCppModel represents a model available in a llama.cpp instance
+type LlamaCppModel struct {
+	ID      string              `json:"id"`
+	Object  string              `json:"object"`
+	OwnedBy string              `json:"owned_by"`
+	Created int64               `json:"created"`
+	InCache bool                `json:"in_cache"`
+	Path    string              `json:"path"`
+	Status  LlamaCppModelStatus `json:"status"`
+}
+
+// LlamaCppModelStatus represents the status of a model in a llama.cpp instance
+type LlamaCppModelStatus struct {
+	Value string   `json:"value"` // "loaded" | "loading" | "unloaded"
+	Args  []string `json:"args"`
+}
+
+// fetchLlamaCppModels fetches models from a llama.cpp instance using the proxy
+func fetchLlamaCppModels(inst *instance.Instance) ([]LlamaCppModel, error) {
+	// Create a request to the instance's /models endpoint
+	req, err := http.NewRequest("GET", fmt.Sprintf("http://%s:%d/models", inst.GetHost(), inst.GetPort()), nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	// Use a custom response writer to capture the response
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		bodyBytes, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var result struct {
+		Data []LlamaCppModel `json:"data"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	return result.Data, nil
+}
+
 // OpenAIListInstances godoc
 // @Summary List instances in OpenAI-compatible format
 // @Description Returns a list of instances in a format compatible with OpenAI API
@@ -40,14 +90,41 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
 			return
 		}

-		openaiInstances := make([]OpenAIInstance, len(instances))
-		for i, inst := range instances {
-			openaiInstances[i] = OpenAIInstance{
+		var openaiInstances []OpenAIInstance
+
+		// For each llama.cpp instance, try to fetch models and add them as separate entries
+		for _, inst := range instances {
+
+			if inst.GetBackendType() == backends.BackendTypeLlamaCpp && inst.IsRunning() {
+				// Try to fetch models from the instance
+				models, err := fetchLlamaCppModels(inst)
+				if err != nil {
+					fmt.Printf("Failed to fetch models from instance %s: %v", inst.Name, err)
+					continue
+				}
+
+				for _, model := range models {
+					openaiInstances = append(openaiInstances, OpenAIInstance{
+						ID:      inst.Name + "/" + model.ID,
+						Object:  "model",
+						Created: inst.Created,
+						OwnedBy: inst.Name,
+					})
+				}
+
+				if len(models) > 1 {
+					// Skip adding the instance name if multiple models are present
+					continue
+				}
+			}
+
+			// Add instance name as single entry (for non-llama.cpp or if model fetch failed)
+			openaiInstances = append(openaiInstances, OpenAIInstance{
 				ID:      inst.Name,
 				Object:  "model",
 				Created: inst.Created,
 				OwnedBy: "llamactl",
-			}
+			})
 		}

 		openaiResponse := OpenAIListInstancesResponse{
@@ -87,14 +164,28 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 			return
 		}

-		modelName, ok := requestBody["model"].(string)
-		if !ok || modelName == "" {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Instance name is required")
+		reqModelName, ok := requestBody["model"].(string)
+		if !ok || reqModelName == "" {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Model name is required")
 			return
 		}

+		// Parse instance name and model name from <instance_name>/<model_name> format
+		var instanceName string
+		var modelName string
+
+		// Check if model name contains "/"
+		if idx := strings.Index(reqModelName, "/"); idx != -1 {
+			// Split into instance and model parts
+			instanceName = reqModelName[:idx]
+			modelName = reqModelName[idx+1:]
+		} else {
+			instanceName = reqModelName
+			modelName = reqModelName
+		}
+
 		// Validate instance name at the entry point
-		validatedName, err := validation.ValidateInstanceName(modelName)
+		validatedName, err := validation.ValidateInstanceName(instanceName)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
@@ -119,6 +210,11 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 			return
 		}

+		if inst.IsRemote() {
+			// Don't replace model name for remote instances
+			modelName = reqModelName
+		}
+
 		if !inst.IsRemote() && !inst.IsRunning() {
 			err := h.ensureInstanceRunning(inst)
 			if err != nil {
@@ -127,6 +223,16 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 			}
 		}

+		// Update the request body with just the model name
+		requestBody["model"] = modelName
+
+		// Re-marshal the updated body
+		bodyBytes, err = json.Marshal(requestBody)
+		if err != nil {
+			writeError(w, http.StatusInternalServerError, "marshal_error", "Failed to update request body")
+			return
+		}
+
 		// Recreate the request body from the bytes we read
 		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
 		r.ContentLength = int64(len(bodyBytes))
--- a/pkg/server/middleware.go
+++ b/pkg/server/middleware.go
@@ -36,18 +36,12 @@ func NewAPIAuthMiddleware(authCfg config.AuthConfig, authStore database.AuthStor
 		managementKeys[key] = true
 	}

-	// If len(authCfg.InferenceKeys) > 0, log warning
-	if len(authCfg.InferenceKeys) > 0 {
-		log.Println("⚠️ Config-based inference keys are no longer supported and will be ignored.")
-		log.Println("    Please create inference keys in web UI or via management API.")
-	}
-
 	// Handle legacy auto-generation for management keys if none provided and auth is required
 	var generated bool = false
 	const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"

 	if authCfg.RequireManagementAuth && len(authCfg.ManagementKeys) == 0 {
-		key, err := auth.GenerateKey("llamactl-mgmt-")
+		key, err := auth.GenerateKey("llamactl-mgmt")
 		if err != nil {
 			log.Printf("Warning: Failed to generate management key: %v", err)
 			// Fallback to PID-based key for safety
--- a/pkg/server/middleware_test.go
+++ b/pkg/server/middleware_test.go
@@ -275,16 +275,3 @@ func TestAutoGenerationScenarios(t *testing.T) {
 		})
 	}
 }
-
-func TestConfigBasedInferenceKeysDeprecationWarning(t *testing.T) {
-	// Test that config-based inference keys trigger a warning (captured in logs)
-	cfg := config.AuthConfig{
-		InferenceKeys: []string{"sk-inference-old"},
-	}
-
-	// Creating middleware should log a warning, but shouldn't fail
-	_ = server.NewAPIAuthMiddleware(cfg, nil)
-
-	// If we get here without panic, the test passes
-	// The warning is logged but not returned as an error
-}
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -26,9 +26,6 @@ func SetupRouter(handler *Handler) *chi.Mux {
 		MaxAge:           300,
 	}))

-	// Add API authentication middleware
-	authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth, handler.authStore)
-
 	if handler.cfg.Server.EnableSwagger {
 		r.Get("/swagger/*", httpSwagger.Handler(
 			httpSwagger.URL("/swagger/doc.json"),
@@ -38,8 +35,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
 	// Define routes
 	r.Route("/api/v1", func(r chi.Router) {

-		if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
-			r.Use(authMiddleware.ManagementAuthMiddleware())
+		if handler.authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
+			r.Use(handler.authMiddleware.ManagementAuthMiddleware())
 		}

 		r.Get("/version", handler.VersionHandler())
@@ -73,12 +70,19 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			})
 		})

+		// Llama.cpp instance-specific endpoints
+		r.Route("/llama-cpp/{name}", func(r chi.Router) {
+			r.Get("/models", handler.LlamaCppListModels())
+			r.Post("/models/{model}/load", handler.LlamaCppLoadModel())
+			r.Post("/models/{model}/unload", handler.LlamaCppUnloadModel())
+		})
+
 		// Node management endpoints
 		r.Route("/nodes", func(r chi.Router) {
 			r.Get("/", handler.ListNodes()) // List all nodes

 			r.Route("/{name}", func(r chi.Router) {
-				r.Get("/", handler.GetNode())
+				r.Get("/", handler.GetNode()) // Get node details
 			})
 		})

@@ -107,8 +111,8 @@ func SetupRouter(handler *Handler) *chi.Mux {

 	r.Route("/v1", func(r chi.Router) {

-		if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
-			r.Use(authMiddleware.InferenceAuthMiddleware())
+		if handler.authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
+			r.Use(handler.authMiddleware.InferenceAuthMiddleware())
 		}

 		r.Get("/models", handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
@@ -135,8 +139,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
 		// Private Routes
 		r.Group(func(r chi.Router) {

-			if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
-				r.Use(authMiddleware.InferenceAuthMiddleware())
+			if handler.authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
+				r.Use(handler.authMiddleware.InferenceAuthMiddleware())
 			}

 			// This handler auto starts the server if it's not running
--- a/test_client.py
+++ b/test_client.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+"""
+Simple Python script to interact with local LLM server's OpenAI-compatible API
+"""
+
+import requests
+import sys
+
+# Local LLM server configuration
+BASE_URL = "http://localhost:8080"
+API_KEY = None
+MODEL_NAME = None
+
+def get_models():
+    """Fetch available models from /v1/models endpoint"""
+    headers = {}
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+
+    try:
+        response = requests.get(f"{BASE_URL}/v1/models", headers=headers, timeout=10)
+        response.raise_for_status()
+        return response.json()["data"]
+    except Exception as e:
+        print(f"Error fetching models: {e}")
+        return []
+
+def send_message(message):
+    """
+    Send a message to local LLM server API
+
+    Args:
+        message (str): The message to send
+
+    Returns:
+        str: The AI response or error message
+    """
+
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+
+    data = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": message
+            }
+        ],
+        "temperature": 0.7,
+        "max_tokens": 1000,
+        "stream": False,
+    }
+    
+    response = requests.post(f"{BASE_URL}/v1/chat/completions", headers=headers, json=data, timeout=60)
+    response.raise_for_status()
+    return response.json()["choices"][0]["message"]["content"]
+
+def interactive_mode():
+    """Run in interactive mode for continuous conversation"""
+    global BASE_URL, API_KEY, MODEL_NAME
+
+    # Get base URL
+    url_input = input(f"Base URL [{BASE_URL}]: ").strip()
+    if url_input:
+        BASE_URL = url_input
+
+    # Get API key (optional)
+    key_input = input("API key (optional): ").strip()
+    if key_input:
+        API_KEY = key_input
+
+    # Fetch and select model
+    models = get_models()
+    if not models:
+        print("No models available. Exiting.")
+        return
+
+    print("\nAvailable models:")
+    for i, m in enumerate(models, 1):
+        print(f"{i}. {m['id']}")
+
+    while True:
+        try:
+            selection = int(input("\nSelect model: "))
+            if 1 <= selection <= len(models):
+                MODEL_NAME = models[selection - 1]["id"]
+                break
+            print(f"Please enter a number between 1 and {len(models)}")
+        except ValueError:
+            print("Please enter a valid number")
+
+    print(f"\nUsing model: {MODEL_NAME}")
+    print("Type 'quit' or 'exit' to stop")
+    print("-" * 40)
+    
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+            
+            if user_input.lower() in ['quit', 'exit', 'q']:
+                print("Goodbye!")
+                break
+                
+            if not user_input:
+                continue
+                
+            print("AI: ", end="", flush=True)
+            response = send_message(user_input)
+            print(response)
+            
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except EOFError:
+            print("\nGoodbye!")
+            break
+
+def main():
+    """Main function"""
+    if len(sys.argv) > 1:
+        # Single message mode
+        message = " ".join(sys.argv[1:])
+        response = send_message(message)
+        print(response)
+    else:
+        # Interactive mode
+        interactive_mode()
+
+if __name__ == "__main__":
+    main()
--- a/webui/package-lock.json
+++ b/webui/package-lock.json
--- a/webui/package.json
+++ b/webui/package.json
@@ -18,28 +18,29 @@
    "lint:fix": "eslint . --ext .ts,.tsx --fix"
  },
  "dependencies": {
-    "@radix-ui/react-checkbox": "^1.3.2",
-    "@radix-ui/react-dialog": "^1.1.14",
-    "@radix-ui/react-label": "^2.1.7",
-    "@radix-ui/react-slot": "^1.2.3",
+    "@radix-ui/react-checkbox": "^1.3.3",
+    "@radix-ui/react-dialog": "^1.1.15",
+    "@radix-ui/react-label": "^2.1.8",
+    "@radix-ui/react-radio-group": "^1.3.8",
+    "@radix-ui/react-slot": "^1.2.4",
    "@tailwindcss/vite": "^4.1.11",
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
-    "lucide-react": "^0.555.0",
+    "date-fns": "^4.1.0",
+    "lucide-react": "^0.561.0",
    "react": "^19.2.0",
    "react-dom": "^19.2.0",
    "sonner": "^2.0.7",
    "tailwind-merge": "^3.4.0",
    "tailwindcss": "^4.1.11",
-    "zod": "^4.1.12"
+    "zod": "^4.2.0"
  },
  "devDependencies": {
    "@eslint/js": "^9.39.1",
    "@testing-library/jest-dom": "^6.9.1",
    "@testing-library/react": "^16.3.0",
    "@testing-library/user-event": "^14.6.1",
-    "@types/eslint__js": "^9.14.0",
-    "@types/node": "^24.10.1",
+    "@types/node": "^25.0.2",
    "@types/react": "^19.2.4",
    "@types/react-dom": "^19.2.3",
    "@vitejs/plugin-react": "^5.1.1",
@@ -48,11 +49,11 @@
    "eslint-plugin-react": "^7.37.5",
    "eslint-plugin-react-hooks": "^7.0.1",
    "eslint-plugin-react-refresh": "^0.4.20",
-    "jsdom": "^27.2.0",
+    "jsdom": "^27.3.0",
    "tw-animate-css": "^1.4.0",
    "typescript": "^5.9.3",
-    "typescript-eslint": "^8.48.0",
-    "vite": "^7.2.2",
+    "typescript-eslint": "^8.50.0",
+    "vite": "^7.3.0",
    "vitest": "^4.0.8"
  }
 }
--- a/webui/src/App.tsx
+++ b/webui/src/App.tsx
@@ -4,6 +4,7 @@ import InstanceList from "@/components/InstanceList";
 import InstanceDialog from "@/components/InstanceDialog";
 import LoginDialog from "@/components/LoginDialog";
 import SystemInfoDialog from "./components/SystemInfoDialog";
+import SettingsDialog from "./components/settings/SettingsDialog";
 import { type CreateInstanceOptions, type Instance } from "@/types/instance";
 import { useInstances } from "@/contexts/InstancesContext";
 import { useAuth } from "@/contexts/AuthContext";
@@ -14,6 +15,7 @@ function App() {
  const { isAuthenticated, isLoading: authLoading } = useAuth();
  const [isInstanceModalOpen, setIsInstanceModalOpen] = useState(false);
  const [isSystemInfoModalOpen, setIsSystemInfoModalOpen] = useState(false);
+  const [isSettingsModalOpen, setIsSettingsModalOpen] = useState(false);
  const [editingInstance, setEditingInstance] = useState<Instance | undefined>(
    undefined
  );
@@ -41,6 +43,10 @@ function App() {
    setIsSystemInfoModalOpen(true);
  };

+  const handleShowSettings = () => {
+    setIsSettingsModalOpen(true);
+  };
+
  // Show loading spinner while checking auth
  if (authLoading) {
    return (
@@ -70,7 +76,11 @@ function App() {
  return (
    <ThemeProvider>
      <div className="min-h-screen bg-background">
-        <Header onCreateInstance={handleCreateInstance} onShowSystemInfo={handleShowSystemInfo} />
+        <Header
+          onCreateInstance={handleCreateInstance}
+          onShowSystemInfo={handleShowSystemInfo}
+          onShowSettings={handleShowSettings}
+        />
        <main className="container mx-auto max-w-4xl px-4 py-8">
          <InstanceList editInstance={handleEditInstance} />
        </main>
@@ -86,7 +96,12 @@ function App() {
          open={isSystemInfoModalOpen}
          onOpenChange={setIsSystemInfoModalOpen}
        />
-        
+
+        <SettingsDialog
+          open={isSettingsModalOpen}
+          onOpenChange={setIsSettingsModalOpen}
+        />
+
        <Toaster />
      </div>
    </ThemeProvider>
--- a/webui/src/tests/App.test.tsx
+++ b/webui/src/tests/App.test.tsx
@@ -75,8 +75,8 @@ function renderApp() {

 describe('App Component - Critical Business Logic Only', () => {
  const mockInstances: Instance[] = [
-    { name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
-    { name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
+    { id: 1, name: 'test-instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
+    { id: 2, name: 'test-instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } }
  ]

  beforeEach(() => {
@@ -109,6 +109,7 @@ describe('App Component - Critical Business Logic Only', () => {
    it('creates new instance with correct API call and updates UI', async () => {
      const user = userEvent.setup()
      const newInstance: Instance = {
+        id: 3,
        name: 'new-test-instance',
        status: 'stopped',
        options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'new-model.gguf' } }
@@ -151,6 +152,7 @@ describe('App Component - Critical Business Logic Only', () => {
    it('updates existing instance with correct API call', async () => {
      const user = userEvent.setup()
      const updatedInstance: Instance = {
+        id: 1,
        name: 'test-instance-1',
        status: 'stopped',
        options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'updated-model.gguf' } }
--- a/webui/src/components/Header.tsx
+++ b/webui/src/components/Header.tsx
@@ -1,14 +1,15 @@
 import { Button } from "@/components/ui/button";
-import { HelpCircle, LogOut, Moon, Sun } from "lucide-react";
+import { HelpCircle, LogOut, Moon, Settings, Sun } from "lucide-react";
 import { useAuth } from "@/contexts/AuthContext";
 import { useTheme } from "@/contexts/ThemeContext";

 interface HeaderProps {
  onCreateInstance: () => void;
  onShowSystemInfo: () => void;
+  onShowSettings: () => void;
 }

-function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
+function Header({ onCreateInstance, onShowSystemInfo, onShowSettings }: HeaderProps) {
  const { logout } = useAuth();
  const { theme, toggleTheme } = useTheme();

@@ -41,6 +42,16 @@ function Header({ onCreateInstance, onShowSystemInfo }: HeaderProps) {
              {theme === 'light' ? <Moon className="h-4 w-4" /> : <Sun className="h-4 w-4" />}
            </Button>

+            <Button
+              variant="outline"
+              size="icon"
+              onClick={onShowSettings}
+              data-testid="settings-button"
+              title="Settings"
+            >
+              <Settings className="h-4 w-4" />
+            </Button>
+
            <Button
              variant="outline"
              size="icon"
--- a/webui/src/components/InstanceCard.tsx
+++ b/webui/src/components/InstanceCard.tsx
@@ -1,14 +1,16 @@
 // ui/src/components/InstanceCard.tsx
 import { Button } from "@/components/ui/button";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Badge } from "@/components/ui/badge";
 import type { Instance } from "@/types/instance";
-import { Edit, FileText, Play, Square, Trash2, MoreHorizontal, Download } from "lucide-react";
+import { Edit, FileText, Play, Square, Trash2, MoreHorizontal, Download, Boxes } from "lucide-react";
 import LogsDialog from "@/components/LogDialog";
+import ModelsDialog from "@/components/ModelsDialog";
 import HealthBadge from "@/components/HealthBadge";
 import BackendBadge from "@/components/BackendBadge";
-import { useState } from "react";
+import { useState, useEffect } from "react";
 import { useInstanceHealth } from "@/hooks/useInstanceHealth";
-import { instancesApi } from "@/lib/api";
+import { instancesApi, llamaCppApi, type Model } from "@/lib/api";

 interface InstanceCardProps {
  instance: Instance;
@@ -26,9 +28,35 @@ function InstanceCard({
  editInstance,
 }: InstanceCardProps) {
  const [isLogsOpen, setIsLogsOpen] = useState(false);
+  const [isModelsOpen, setIsModelsOpen] = useState(false);
  const [showAllActions, setShowAllActions] = useState(false);
+  const [models, setModels] = useState<Model[]>([]);
  const health = useInstanceHealth(instance.name, instance.status);

+  const running = instance.status === "running";
+  const isLlamaCpp = instance.options?.backend_type === "llama_cpp";
+
+  // Fetch models for llama.cpp instances
+  useEffect(() => {
+    if (!isLlamaCpp || !running) {
+      setModels([]);
+      return;
+    }
+
+    void (async () => {
+      try {
+        const fetchedModels = await llamaCppApi.getModels(instance.name);
+        setModels(fetchedModels);
+      } catch {
+        setModels([]);
+      }
+    })();
+  }, [instance.name, isLlamaCpp, running]);
+
+  // Calculate model counts
+  const totalModels = models.length;
+  const loadedModels = models.filter(m => m.status.value === "loaded").length;
+
  const handleStart = () => {
    startInstance(instance.name);
  };
@@ -53,6 +81,10 @@ function InstanceCard({
    setIsLogsOpen(true);
  };

+  const handleModels = () => {
+    setIsModelsOpen(true);
+  };
+
  const handleExport = () => {
    void (async () => {
      try {
@@ -83,8 +115,6 @@ function InstanceCard({
    })();
  };

-  const running = instance.status === "running";
-
  return (
    <>
      <Card className="hover:shadow-md transition-shadow">
@@ -99,6 +129,12 @@ function InstanceCard({
            <div className="flex items-center gap-2 flex-wrap">
              <BackendBadge backend={instance.options?.backend_type} docker={instance.options?.docker_enabled} />
              {running && <HealthBadge health={health} />}
+              {isLlamaCpp && running && totalModels > 0 && (
+                <Badge variant="secondary" className="text-xs">
+                  <Boxes className="h-3 w-3 mr-1" />
+                  {loadedModels}/{totalModels} models
+                </Badge>
+              )}
            </div>
          </div>
        </CardHeader>
@@ -149,26 +185,37 @@ function InstanceCard({

          {/* Secondary actions - collapsible */}
          {showAllActions && (
-            <div className="flex items-center gap-2 pt-2 border-t border-border">
+            <div className="flex items-center gap-2 pt-2 border-t border-border flex-wrap">
              <Button
                size="sm"
                variant="outline"
                onClick={handleLogs}
                title="View logs"
                data-testid="view-logs-button"
-                className="flex-1"
              >
                <FileText className="h-4 w-4 mr-1" />
                Logs
              </Button>

+              {isLlamaCpp && totalModels > 1 && (
+                <Button
+                  size="sm"
+                  variant="outline"
+                  onClick={handleModels}
+                  title="Manage models"
+                  data-testid="manage-models-button"
+                >
+                  <Boxes className="h-4 w-4 mr-1" />
+                  Models
+                </Button>
+              )}
+
              <Button
                size="sm"
                variant="outline"
                onClick={handleExport}
                title="Export instance"
                data-testid="export-instance-button"
-                className="flex-1"
              >
                <Download className="h-4 w-4 mr-1" />
                Export
@@ -195,6 +242,13 @@ function InstanceCard({
        instanceName={instance.name}
        isRunning={running}
      />
+
+      <ModelsDialog
+        open={isModelsOpen}
+        onOpenChange={setIsModelsOpen}
+        instanceName={instance.name}
+        isRunning={running}
+      />
    </>
  );
 }
--- a/webui/src/components/ModelsDialog.tsx
+++ b/webui/src/components/ModelsDialog.tsx
@@ -0,0 +1,303 @@
+import React, { useState, useEffect } from 'react'
+import { Button } from '@/components/ui/button'
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogHeader,
+  DialogTitle,
+} from '@/components/ui/dialog'
+import {
+  Table,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeader,
+  TableRow,
+} from '@/components/ui/table'
+import { Badge } from '@/components/ui/badge'
+import { llamaCppApi } from '@/lib/api'
+import { RefreshCw, Loader2, AlertCircle } from 'lucide-react'
+
+interface ModelsDialogProps {
+  open: boolean
+  onOpenChange: (open: boolean) => void
+  instanceName: string
+  isRunning: boolean
+}
+
+interface Model {
+  id: string
+  object: string
+  owned_by: string
+  created: number
+  in_cache: boolean
+  path: string
+  status: {
+    value: string // "loaded" | "loading" | "unloaded"
+    args: string[]
+  }
+}
+
+const StatusIcon: React.FC<{ status: string }> = ({ status }) => {
+  switch (status) {
+    case 'loaded':
+      return (
+        <div className="h-2 w-2 rounded-full bg-green-500" />
+      )
+    case 'loading':
+      return (
+        <Loader2
+          className="h-3 w-3 animate-spin text-yellow-500"
+        />
+      )
+    case 'unloaded':
+      return (
+        <div className="h-2 w-2 rounded-full bg-gray-400" />
+      )
+    default:
+      return null
+  }
+}
+
+const ModelsDialog: React.FC<ModelsDialogProps> = ({
+  open,
+  onOpenChange,
+  instanceName,
+  isRunning,
+}) => {
+  const [models, setModels] = useState<Model[]>([])
+  const [loading, setLoading] = useState(false)
+  const [error, setError] = useState<string | null>(null)
+  const [loadingModels, setLoadingModels] = useState<Set<string>>(new Set())
+
+  // Fetch models function
+  const fetchModels = React.useCallback(async () => {
+    if (!instanceName || !isRunning) return
+
+    setLoading(true)
+    setError(null)
+
+    try {
+      const response = await llamaCppApi.getModels(instanceName)
+      setModels(response)
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Failed to fetch models')
+    } finally {
+      setLoading(false)
+    }
+  }, [instanceName, isRunning])
+
+  // Fetch models when dialog opens
+  useEffect(() => {
+    if (!open || !isRunning) return
+
+    // Initial fetch
+    void fetchModels()
+  }, [open, isRunning, fetchModels])
+
+  // Auto-refresh only when models are loading
+  useEffect(() => {
+    if (!open || !isRunning) return
+
+    // Check if any model is in loading state
+    const hasLoadingModel = models.some(m => m.status.value === 'loading')
+
+    if (!hasLoadingModel) return
+
+    // Poll every 2 seconds when there's a loading model
+    const interval = setInterval(() => {
+      void fetchModels()
+    }, 2000)
+
+    return () => clearInterval(interval)
+  }, [open, isRunning, models, fetchModels])
+
+  // Load model
+  const loadModel = async (modelName: string) => {
+    setLoadingModels((prev) => new Set(prev).add(modelName))
+    setError(null)
+
+    try {
+      await llamaCppApi.loadModel(instanceName, modelName)
+      // Wait a bit for the backend to process the load
+      await new Promise(resolve => setTimeout(resolve, 500))
+      // Refresh models list after loading
+      await fetchModels()
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Failed to load model')
+    } finally {
+      setLoadingModels((prev) => {
+        const newSet = new Set(prev)
+        newSet.delete(modelName)
+        return newSet
+      })
+    }
+  }
+
+  // Unload model
+  const unloadModel = async (modelName: string) => {
+    setLoadingModels((prev) => new Set(prev).add(modelName))
+    setError(null)
+
+    try {
+      await llamaCppApi.unloadModel(instanceName, modelName)
+      // Wait a bit for the backend to process the unload
+      await new Promise(resolve => setTimeout(resolve, 500))
+      // Refresh models list after unloading
+      await fetchModels()
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Failed to unload model')
+    } finally {
+      setLoadingModels((prev) => {
+        const newSet = new Set(prev)
+        newSet.delete(modelName)
+        return newSet
+      })
+    }
+  }
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <DialogContent className="sm:max-w-4xl max-w-[calc(100%-2rem)] max-h-[80vh] flex flex-col">
+        <DialogHeader>
+          <div className="flex items-center justify-between">
+            <div>
+              <DialogTitle className="flex items-center gap-2">
+                Models: {instanceName}
+                <Badge variant={isRunning ? 'default' : 'secondary'}>
+                  {isRunning ? 'Running' : 'Stopped'}
+                </Badge>
+              </DialogTitle>
+              <DialogDescription>
+                Manage models in this llama.cpp instance
+              </DialogDescription>
+            </div>
+
+            <Button
+              variant="outline"
+              size="sm"
+              onClick={() => void fetchModels()}
+              disabled={loading || !isRunning}
+            >
+              {loading ? (
+                <Loader2 className="h-4 w-4 animate-spin" />
+              ) : (
+                <RefreshCw className="h-4 w-4" />
+              )}
+            </Button>
+          </div>
+        </DialogHeader>
+
+        {/* Error Display */}
+        {error && (
+          <div className="flex items-center gap-2 p-3 bg-destructive/10 border border-destructive/20 rounded-lg">
+            <AlertCircle className="h-4 w-4 text-destructive" />
+            <span className="text-sm text-destructive">{error}</span>
+          </div>
+        )}
+
+        {/* Models Table */}
+        <div className="flex-1 flex flex-col min-h-0 overflow-auto">
+          {!isRunning ? (
+            <div className="flex items-center justify-center h-full text-muted-foreground">
+              Instance is not running
+            </div>
+          ) : loading && models.length === 0 ? (
+            <div className="flex items-center justify-center h-full">
+              <Loader2 className="h-6 w-6 animate-spin text-muted-foreground" />
+              <span className="ml-2 text-muted-foreground">
+                Loading models...
+              </span>
+            </div>
+          ) : models.length === 0 ? (
+            <div className="flex items-center justify-center h-full text-muted-foreground">
+              No models found
+            </div>
+          ) : (
+            <Table>
+              <TableHeader>
+                <TableRow>
+                  <TableHead>Model</TableHead>
+                  <TableHead>Status</TableHead>
+                  <TableHead className="text-right">Actions</TableHead>
+                </TableRow>
+              </TableHeader>
+              <TableBody>
+                {models.map((model) => {
+                  const isLoading = loadingModels.has(model.id)
+                  const isModelLoading = model.status.value === 'loading'
+
+                  return (
+                    <TableRow key={model.id}>
+                      <TableCell className="font-mono text-sm">
+                        {model.id}
+                      </TableCell>
+                      <TableCell>
+                        <div className="flex items-center gap-2">
+                          <StatusIcon status={model.status.value} />
+                          <span className="text-sm capitalize">
+                            {model.status.value}
+                          </span>
+                        </div>
+                      </TableCell>
+                      <TableCell className="text-right">
+                        {model.status.value === 'loaded' ? (
+                          <Button
+                            size="sm"
+                            variant="outline"
+                            onClick={() => { void unloadModel(model.id) }}
+                            disabled={!isRunning || isLoading || isModelLoading}
+                          >
+                            {isLoading ? (
+                              <>
+                                <Loader2 className="h-3 w-3 animate-spin mr-1" />
+                                Unloading...
+                              </>
+                            ) : (
+                              'Unload'
+                            )}
+                          </Button>
+                        ) : model.status.value === 'unloaded' ? (
+                          <Button
+                            size="sm"
+                            variant="default"
+                            onClick={() => { void loadModel(model.id) }}
+                            disabled={!isRunning || isLoading || isModelLoading}
+                          >
+                            {isLoading ? (
+                              <>
+                                <Loader2 className="h-3 w-3 animate-spin mr-1" />
+                                Loading...
+                              </>
+                            ) : (
+                              'Load'
+                            )}
+                          </Button>
+                        ) : (
+                          <Button size="sm" variant="ghost" disabled>
+                            Loading...
+                          </Button>
+                        )}
+                      </TableCell>
+                    </TableRow>
+                  )
+                })}
+              </TableBody>
+            </Table>
+          )}
+        </div>
+
+        {/* Auto-refresh indicator - only shown when models are loading */}
+        {isRunning && models.some(m => m.status.value === 'loading') && (
+          <div className="flex items-center gap-2 text-sm text-muted-foreground">
+            <div className="w-2 h-2 bg-yellow-500 rounded-full animate-pulse"></div>
+            Auto-refreshing while models are loading
+          </div>
+        )}
+      </DialogContent>
+    </Dialog>
+  )
+}
+
+export default ModelsDialog
--- a/webui/src/components/tests/InstanceCard.test.tsx
+++ b/webui/src/components/tests/InstanceCard.test.tsx
@@ -21,12 +21,14 @@ describe('InstanceCard - Instance Actions and State', () => {
  const mockEditInstance = vi.fn()

  const stoppedInstance: Instance = {
+    id: 1,
    name: 'test-instance',
    status: 'stopped',
    options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'test-model.gguf' } }
  }

  const runningInstance: Instance = {
+    id: 2,
    name: 'running-instance',
    status: 'running',
    options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'running-model.gguf' } }
@@ -342,6 +344,7 @@ afterEach(() => {
  describe('Error Edge Cases', () => {
    it('handles instance with minimal data', () => {
      const minimalInstance: Instance = {
+        id: 3,
        name: 'minimal',
        status: 'stopped',
        options: {}
@@ -364,6 +367,7 @@ afterEach(() => {

    it('handles instance with undefined options', () => {
      const instanceWithoutOptions: Instance = {
+        id: 4,
        name: 'no-options',
        status: 'running',
        options: undefined
--- a/webui/src/components/tests/InstanceList.test.tsx
+++ b/webui/src/components/tests/InstanceList.test.tsx
@@ -4,8 +4,7 @@ import userEvent from '@testing-library/user-event'
 import InstanceList from '@/components/InstanceList'
 import { InstancesProvider } from '@/contexts/InstancesContext'
 import { instancesApi } from '@/lib/api'
-import type { Instance } from '@/types/instance'
-import { BackendType } from '@/types/instance'
+import { BackendType, type Instance } from '@/types/instance'
 import { AuthProvider } from '@/contexts/AuthContext'

 // Mock the API
@@ -59,9 +58,9 @@ describe('InstanceList - State Management and UI Logic', () => {
  const mockEditInstance = vi.fn()

  const mockInstances: Instance[] = [
-    { name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
-    { name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
-    { name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
+    { id: 1, name: 'instance-1', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model1.gguf' } } },
+    { id: 1, name: 'instance-2', status: 'running', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model2.gguf' } } },
+    { id: 1, name: 'instance-3', status: 'stopped', options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: 'model3.gguf' } } }
  ]

  const DUMMY_API_KEY = 'test-api-key-123'
--- a/webui/src/components/tests/InstanceModal.test.tsx
+++ b/webui/src/components/tests/InstanceModal.test.tsx
@@ -153,6 +153,7 @@ afterEach(() => {

  describe('Edit Mode', () => {
    const mockInstance: Instance = {
+      id: 1,
      name: 'existing-instance',
      status: 'stopped',
      options: {
--- a/webui/src/components/apikeys/CreateApiKeyDialog.tsx
+++ b/webui/src/components/apikeys/CreateApiKeyDialog.tsx
@@ -0,0 +1,238 @@
+import { useState } from "react";
+import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogFooter } from "@/components/ui/dialog";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Label } from "@/components/ui/label";
+import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group";
+import { Checkbox } from "@/components/ui/checkbox";
+import { Alert, AlertDescription } from "@/components/ui/alert";
+import { Loader2 } from "lucide-react";
+import { apiKeysApi } from "@/lib/api";
+import { PermissionMode, type CreateKeyRequest } from "@/types/apiKey";
+import { useInstances } from "@/contexts/InstancesContext";
+import { format } from "date-fns";
+
+interface CreateApiKeyDialogProps {
+  open: boolean;
+  onOpenChange: (open: boolean) => void;
+  onKeyCreated: (plainTextKey: string) => void;
+}
+
+function CreateApiKeyDialog({ open, onOpenChange, onKeyCreated }: CreateApiKeyDialogProps) {
+  const { instances } = useInstances();
+  const [name, setName] = useState("");
+  const [permissionMode, setPermissionMode] = useState<PermissionMode>(PermissionMode.AllowAll);
+  const [expiresAt, setExpiresAt] = useState<string>("");
+  const [instancePermissions, setInstancePermissions] = useState<Record<number, boolean>>({});
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  const formatDisplayDate = (dateString: string) => {
+    if (!dateString) return null;
+    try {
+      const date = new Date(dateString);
+      return format(date, "d MMMM yyyy");
+    } catch {
+      return null;
+    }
+  };
+
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    setError(null);
+
+    // Validation
+    if (!name.trim()) {
+      setError("Name is required");
+      return;
+    }
+
+    if (name.length > 100) {
+      setError("Name must be 100 characters or less");
+      return;
+    }
+
+    if (permissionMode === PermissionMode.PerInstance) {
+      const hasAnyPermission = Object.values(instancePermissions).some(v => v);
+      if (!hasAnyPermission) {
+        setError("At least one instance permission is required for per-instance mode");
+        return;
+      }
+    }
+
+    // Build request
+    const instanceIds: number[] = [];
+    if (permissionMode === PermissionMode.PerInstance) {
+      Object.entries(instancePermissions).forEach(([instanceId, hasPermission]) => {
+        if (hasPermission) {
+          instanceIds.push(parseInt(instanceId));
+        }
+      });
+    }
+
+    const request: CreateKeyRequest = {
+      name: name.trim(),
+      permission_mode: permissionMode,
+      instance_ids: instanceIds,
+    };
+
+    // Add expiration if provided
+    if (expiresAt) {
+      const expirationDate = new Date(expiresAt);
+      const now = new Date();
+      if (expirationDate <= now) {
+        setError("Expiration date must be in the future");
+        return;
+      }
+      request.expires_at = Math.floor(expirationDate.getTime() / 1000);
+    }
+
+    setLoading(true);
+    try {
+      const response = await apiKeysApi.create(request);
+      onKeyCreated(response.key);
+      // Reset form
+      setName("");
+      setPermissionMode(PermissionMode.AllowAll);
+      setExpiresAt("");
+      setInstancePermissions({});
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Failed to create API key");
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const handleInstancePermissionChange = (instanceId: number, checked: boolean) => {
+    setInstancePermissions(prev => ({
+      ...prev,
+      [instanceId]: checked,
+    }));
+  };
+
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <DialogContent className="sm:max-w-2xl">
+        <DialogHeader>
+          <DialogTitle>Create API Key</DialogTitle>
+        </DialogHeader>
+        <form onSubmit={(e) => void handleSubmit(e)} className="space-y-4">
+          {error && (
+            <Alert variant="destructive">
+              <AlertDescription>{error}</AlertDescription>
+            </Alert>
+          )}
+
+          <div className="space-y-2">
+            <Label htmlFor="name">Name</Label>
+            <Input
+              id="name"
+              value={name}
+              onChange={(e) => setName(e.target.value)}
+              placeholder="My API Key"
+              maxLength={100}
+              disabled={loading}
+            />
+          </div>
+
+          <div className="space-y-3">
+            <Label>Permission Mode</Label>
+            <RadioGroup
+              value={permissionMode}
+              onValueChange={(value) => setPermissionMode(value as PermissionMode)}
+              disabled={loading}
+            >
+              <div className="flex items-center space-x-2">
+                <RadioGroupItem value={PermissionMode.AllowAll} id="allow-all" />
+                <Label htmlFor="allow-all" className="font-normal cursor-pointer">
+                  Full Access
+                </Label>
+              </div>
+              <div className="flex items-center space-x-2">
+                <RadioGroupItem value={PermissionMode.PerInstance} id="per-instance" />
+                <Label htmlFor="per-instance" className="font-normal cursor-pointer">
+                  Per-Instance Access
+                </Label>
+              </div>
+            </RadioGroup>
+
+            {permissionMode === PermissionMode.AllowAll && (
+              <p className="text-sm text-muted-foreground">
+                This key will have access to all instances
+              </p>
+            )}
+
+            {permissionMode === PermissionMode.PerInstance && (
+              <div className="space-y-2 border rounded-lg p-4">
+                <Label className="text-sm font-semibold">Instance Permissions</Label>
+                {instances.length === 0 ? (
+                  <p className="text-sm text-muted-foreground">No instances available</p>
+                ) : (
+                  <div className="space-y-2">
+                    {instances.map((instance, index) => {
+                      const isChecked = !!instancePermissions[instance.id];
+                      return (
+                        <div
+                          key={`${instance.name}-${index}`}
+                          className="flex items-center space-x-2"
+                        >
+                          <Checkbox
+                            id={`instance-${instance.id}`}
+                            checked={isChecked}
+                            onCheckedChange={(checked) => {
+                              handleInstancePermissionChange(instance.id, checked as boolean);
+                            }}
+                            disabled={loading}
+                          />
+                          <Label
+                            htmlFor={`instance-${instance.id}`}
+                            className="font-normal cursor-pointer flex-1"
+                          >
+                            {instance.name}
+                          </Label>
+                        </div>
+                      );
+                    })}
+                  </div>
+                )}
+              </div>
+            )}
+          </div>
+
+          <div className="space-y-2">
+            <Label htmlFor="expires-at">Expiration Date (Optional)</Label>
+            <Input
+              id="expires-at"
+              type="date"
+              value={expiresAt}
+              onChange={(e) => setExpiresAt(e.target.value)}
+              disabled={loading}
+            />
+            {expiresAt && formatDisplayDate(expiresAt) && (
+              <p className="text-sm text-muted-foreground">
+                Expires on {formatDisplayDate(expiresAt)}
+              </p>
+            )}
+          </div>
+
+          <DialogFooter>
+            <Button
+              type="button"
+              variant="outline"
+              onClick={() => onOpenChange(false)}
+              disabled={loading}
+            >
+              Cancel
+            </Button>
+            <Button type="submit" disabled={loading}>
+              {loading && <Loader2 className="mr-2 h-4 w-4 animate-spin" />}
+              Create
+            </Button>
+          </DialogFooter>
+        </form>
+      </DialogContent>
+    </Dialog>
+  );
+}
+
+export default CreateApiKeyDialog;
--- a/webui/src/components/form/KeyValueInput.tsx
+++ b/webui/src/components/form/KeyValueInput.tsx
@@ -59,7 +59,7 @@ const KeyValueInput: React.FC<KeyValueInputProps> = ({
      // Reset to single empty row if value is explicitly undefined/null
      setPairs([{ key: '', value: '' }])
    }
-    // eslint-disable-next-line react-hooks/exhaustive-deps
+     
  }, [value])

  // Update parent component when pairs change
--- a/webui/src/components/instance/AutoRestartConfiguration.tsx
+++ b/webui/src/components/instance/AutoRestartConfiguration.tsx
@@ -5,7 +5,7 @@ import NumberInput from '@/components/form/NumberInput'

 interface AutoRestartConfigurationProps {
  formData: CreateInstanceOptions
-  onChange: (key: keyof CreateInstanceOptions, value: any) => void
+  onChange: <K extends keyof CreateInstanceOptions>(key: K, value: CreateInstanceOptions[K]) => void
 }

 const AutoRestartConfiguration: React.FC<AutoRestartConfigurationProps> = ({
--- a/webui/src/components/instance/BackendConfiguration.tsx
+++ b/webui/src/components/instance/BackendConfiguration.tsx
@@ -3,9 +3,11 @@ import type { CreateInstanceOptions } from '@/types/instance'
 import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils'
 import BackendFormField from '@/components/BackendFormField'

+type BackendFieldValue = string | number | boolean | string[] | Record<string, string> | undefined
+
 interface BackendConfigurationProps {
  formData: CreateInstanceOptions
-  onBackendFieldChange: (key: string, value: any) => void
+  onBackendFieldChange: (key: string, value: BackendFieldValue) => void
  showAdvanced?: boolean
 }

@@ -26,7 +28,7 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
        <BackendFormField
          key={fieldKey}
          fieldKey={fieldKey}
-          value={(formData.backend_options as any)?.[fieldKey]}
+          value={(formData.backend_options as Record<string, BackendFieldValue> | undefined)?.[fieldKey]}
          onChange={onBackendFieldChange}
        />
      ))}
@@ -41,7 +43,7 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
              <BackendFormField
                key={fieldKey}
                fieldKey={fieldKey}
-                value={(formData.backend_options as any)?.[fieldKey]}
+                value={(formData.backend_options as Record<string, BackendFieldValue> | undefined)?.[fieldKey]}
                onChange={onBackendFieldChange}
              />
            ))}
@@ -53,7 +55,7 @@ const BackendConfiguration: React.FC<BackendConfigurationProps> = ({
        <BackendFormField
          key="extra_args"
          fieldKey="extra_args"
-          value={(formData.backend_options as any)?.extra_args}
+          value={(formData.backend_options as Record<string, BackendFieldValue> | undefined)?.extra_args}
          onChange={onBackendFieldChange}
        />
      </div>
--- a/webui/src/components/settings/ApiKeysSection.tsx
+++ b/webui/src/components/settings/ApiKeysSection.tsx
@@ -0,0 +1,270 @@
+import { useEffect, useState, Fragment } from "react";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import { Alert, AlertDescription } from "@/components/ui/alert";
+import { Trash2, Copy, Check, X, ChevronDown, ChevronRight } from "lucide-react";
+import { apiKeysApi } from "@/lib/api";
+import { type ApiKey, type KeyPermissionResponse, PermissionMode } from "@/types/apiKey";
+import CreateApiKeyDialog from "@/components/apikeys/CreateApiKeyDialog";
+import { format, formatDistanceToNow } from "date-fns";
+
+function ApiKeysSection() {
+  const [keys, setKeys] = useState<ApiKey[]>([]);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  const [expandedRowId, setExpandedRowId] = useState<number | null>(null);
+  const [newKeyPlainText, setNewKeyPlainText] = useState<string | null>(null);
+  const [createDialogOpen, setCreateDialogOpen] = useState(false);
+  const [copiedKey, setCopiedKey] = useState(false);
+  const [permissions, setPermissions] = useState<Record<number, KeyPermissionResponse[]>>({});
+  const [loadingPermissions, setLoadingPermissions] = useState<Record<number, boolean>>({});
+
+  useEffect(() => {
+    void fetchKeys();
+  }, []);
+
+  const fetchKeys = async () => {
+    setLoading(true);
+    setError(null);
+    try {
+      const data = await apiKeysApi.list();
+      setKeys(data);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : "Failed to load API keys");
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const fetchPermissions = async (keyId: number) => {
+    if (permissions[keyId]) return;
+
+    setLoadingPermissions({ ...loadingPermissions, [keyId]: true });
+    try {
+      const data = await apiKeysApi.getPermissions(keyId);
+      setPermissions({ ...permissions, [keyId]: data });
+    } catch (err) {
+      console.error("Failed to load permissions:", err);
+    } finally {
+      setLoadingPermissions({ ...loadingPermissions, [keyId]: false });
+    }
+  };
+
+  const handleKeyCreated = (plainTextKey: string) => {
+    setNewKeyPlainText(plainTextKey);
+    void fetchKeys();
+    setCreateDialogOpen(false);
+  };
+
+  const dismissSuccessBanner = () => {
+    setNewKeyPlainText(null);
+  };
+
+  const handleCopyKey = async () => {
+    if (newKeyPlainText) {
+      await navigator.clipboard.writeText(newKeyPlainText);
+      setCopiedKey(true);
+      setTimeout(() => setCopiedKey(false), 2000);
+    }
+  };
+
+  const handleDeleteKey = async (id: number, name: string) => {
+    if (!confirm(`Are you sure you want to delete the key '${name}'?\n\nThis action cannot be undone.`)) {
+      return;
+    }
+
+    try {
+      await apiKeysApi.delete(id);
+      void fetchKeys();
+    } catch (err) {
+      alert(err instanceof Error ? err.message : "Failed to delete API key");
+    }
+  };
+
+  const handleRowClick = (key: ApiKey) => {
+    if (expandedRowId === key.id) {
+      setExpandedRowId(null);
+    } else {
+      setExpandedRowId(key.id);
+      if (key.permission_mode === PermissionMode.PerInstance) {
+        void fetchPermissions(key.id);
+      }
+    }
+  };
+
+  const formatDate = (timestamp: number) => {
+    return format(new Date(timestamp * 1000), "MMM d, yyyy");
+  };
+
+  const formatLastUsed = (timestamp: number | null) => {
+    if (!timestamp) return "Never";
+    return formatDistanceToNow(new Date(timestamp * 1000), { addSuffix: true });
+  };
+
+  const isExpired = (expiresAt: number | null) => {
+    if (!expiresAt) return false;
+    return expiresAt * 1000 < Date.now();
+  };
+
+  return (
+    <div className="space-y-6">
+      <div className="flex items-center justify-between">
+        <h3 className="text-lg font-semibold">API Keys</h3>
+        <Button onClick={() => setCreateDialogOpen(true)}>Create API Key</Button>
+      </div>
+
+      {newKeyPlainText && (
+        <Alert className="bg-green-50 dark:bg-green-950 border-green-200 dark:border-green-900">
+          <AlertDescription className="space-y-3">
+            <div className="flex items-start justify-between">
+              <div>
+                <p className="font-semibold text-green-900 dark:text-green-100">API key created successfully</p>
+                <p className="text-sm text-green-800 dark:text-green-200 mt-1">
+                  Make sure to copy this key now. You won't be able to see it again!
+                </p>
+              </div>
+              <Button
+                variant="ghost"
+                size="icon"
+                onClick={dismissSuccessBanner}
+                className="h-6 w-6"
+              >
+                <X className="h-4 w-4" />
+              </Button>
+            </div>
+            <div className="flex items-center gap-2">
+              <code className="flex-1 p-3 bg-white dark:bg-gray-900 border border-green-300 dark:border-green-800 rounded font-mono text-sm break-all">
+                {newKeyPlainText}
+              </code>
+              <Button onClick={() => void handleCopyKey()} variant="outline" size="sm">
+                {copiedKey ? <Check className="h-4 w-4" /> : <Copy className="h-4 w-4" />}
+              </Button>
+            </div>
+          </AlertDescription>
+        </Alert>
+      )}
+
+      {error && (
+        <Alert variant="destructive">
+          <AlertDescription>{error}</AlertDescription>
+        </Alert>
+      )}
+
+      {loading ? (
+        <div className="space-y-2">
+          {[1, 2, 3].map((i) => (
+            <div key={i} className="h-16 bg-muted animate-pulse rounded" />
+          ))}
+        </div>
+      ) : keys.length === 0 ? (
+        <div className="text-center py-12 text-muted-foreground">
+          No API keys yet. Create your first key to get started.
+        </div>
+      ) : (
+        <div className="border rounded-lg overflow-hidden">
+          <table className="w-full">
+            <thead className="bg-muted">
+              <tr>
+                <th className="text-left p-3 font-semibold text-sm">Name</th>
+                <th className="text-left p-3 font-semibold text-sm">Permissions</th>
+                <th className="text-left p-3 font-semibold text-sm">Created</th>
+                <th className="text-left p-3 font-semibold text-sm">Expires</th>
+                <th className="text-left p-3 font-semibold text-sm">Last Accessed</th>
+                <th className="text-left p-3 font-semibold text-sm">Actions</th>
+              </tr>
+            </thead>
+            <tbody>
+              {keys.map((key) => (
+                <Fragment key={key.id}>
+                  <tr
+                    className="border-t hover:bg-muted/50 cursor-pointer"
+                    onClick={() => handleRowClick(key)}
+                  >
+                    <td className="p-3">
+                      <div className="flex items-center gap-2">
+                        {expandedRowId === key.id ? (
+                          <ChevronDown className="h-4 w-4 text-muted-foreground" />
+                        ) : (
+                          <ChevronRight className="h-4 w-4 text-muted-foreground" />
+                        )}
+                        {key.name}
+                      </div>
+                    </td>
+                    <td className="p-3">
+                      {key.permission_mode === PermissionMode.AllowAll ? (
+                        <Badge variant="default">Full Access</Badge>
+                      ) : (
+                        <Badge variant="secondary">Limited Access</Badge>
+                      )}
+                    </td>
+                    <td className="p-3 text-sm text-muted-foreground">{formatDate(key.created_at)}</td>
+                    <td className="p-3">
+                      {key.expires_at ? (
+                        isExpired(key.expires_at) ? (
+                          <Badge variant="destructive">Expired</Badge>
+                        ) : (
+                          <span className="text-sm text-muted-foreground">{formatDate(key.expires_at)}</span>
+                        )
+                      ) : (
+                        <span className="text-sm text-muted-foreground">Never</span>
+                      )}
+                    </td>
+                    <td className="p-3 text-sm text-muted-foreground">{formatLastUsed(key.last_used_at)}</td>
+                    <td className="p-3">
+                      <Button
+                        variant="ghost"
+                        size="icon"
+                        onClick={(e) => {
+                          e.stopPropagation();
+                          void handleDeleteKey(key.id, key.name);
+                        }}
+                        title="Delete key"
+                      >
+                        <Trash2 className="h-4 w-4 text-destructive" />
+                      </Button>
+                    </td>
+                  </tr>
+                  {expandedRowId === key.id && (
+                    <tr key={`${key.id}-expanded`} className="border-t bg-muted/30">
+                      <td colSpan={6} className="p-4">
+                        {key.permission_mode === PermissionMode.AllowAll ? (
+                          <p className="text-sm text-muted-foreground">
+                            This key has full access to all instances
+                          </p>
+                        ) : loadingPermissions[key.id] ? (
+                          <p className="text-sm text-muted-foreground">Loading permissions...</p>
+                        ) : permissions[key.id] ? (
+                          <div className="space-y-2">
+                            <p className="text-sm font-semibold">Allowed Instances:</p>
+                            <ul className="text-sm space-y-1">
+                              {permissions[key.id].map((perm) => (
+                                <li key={perm.instance_id} className="flex items-center gap-2">
+                                  <Check className="h-3 w-3 text-green-600" />
+                                  {perm.instance_name}
+                                </li>
+                              ))}
+                            </ul>
+                          </div>
+                        ) : (
+                          <p className="text-sm text-muted-foreground">No permissions data</p>
+                        )}
+                      </td>
+                    </tr>
+                  )}
+                </Fragment>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      )}
+
+      <CreateApiKeyDialog
+        open={createDialogOpen}
+        onOpenChange={setCreateDialogOpen}
+        onKeyCreated={handleKeyCreated}
+      />
+    </div>
+  );
+}
+
+export default ApiKeysSection;
--- a/webui/src/components/settings/SettingsDialog.tsx
+++ b/webui/src/components/settings/SettingsDialog.tsx
@@ -0,0 +1,25 @@
+import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog";
+import ApiKeysSection from "./ApiKeysSection";
+
+interface SettingsDialogProps {
+  open: boolean;
+  onOpenChange: (open: boolean) => void;
+}
+
+function SettingsDialog({ open, onOpenChange }: SettingsDialogProps) {
+  return (
+    <Dialog open={open} onOpenChange={onOpenChange}>
+      <DialogContent className="sm:max-w-5xl max-h-[90vh] overflow-y-auto">
+        <DialogHeader>
+          <DialogTitle>Settings</DialogTitle>
+          <DialogDescription>
+            Manage your application settings and API keys.
+          </DialogDescription>
+        </DialogHeader>
+        <ApiKeysSection />
+      </DialogContent>
+    </Dialog>
+  );
+}
+
+export default SettingsDialog;
--- a/webui/src/components/ui/alert.tsx
+++ b/webui/src/components/ui/alert.tsx
@@ -0,0 +1,66 @@
+import * as React from "react"
+import { cva, type VariantProps } from "class-variance-authority"
+
+import { cn } from "@/lib/utils"
+
+const alertVariants = cva(
+  "relative w-full rounded-lg border px-4 py-3 text-sm grid has-[>svg]:grid-cols-[calc(var(--spacing)*4)_1fr] grid-cols-[0_1fr] has-[>svg]:gap-x-3 gap-y-0.5 items-start [&>svg]:size-4 [&>svg]:translate-y-0.5 [&>svg]:text-current",
+  {
+    variants: {
+      variant: {
+        default: "bg-card text-card-foreground",
+        destructive:
+          "text-destructive bg-card [&>svg]:text-current *:data-[slot=alert-description]:text-destructive/90",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+    },
+  }
+)
+
+function Alert({
+  className,
+  variant,
+  ...props
+}: React.ComponentProps<"div"> & VariantProps<typeof alertVariants>) {
+  return (
+    <div
+      data-slot="alert"
+      role="alert"
+      className={cn(alertVariants({ variant }), className)}
+      {...props}
+    />
+  )
+}
+
+function AlertTitle({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="alert-title"
+      className={cn(
+        "col-start-2 line-clamp-1 min-h-4 font-medium tracking-tight",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+
+function AlertDescription({
+  className,
+  ...props
+}: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="alert-description"
+      className={cn(
+        "text-muted-foreground col-start-2 grid justify-items-start gap-1 text-sm [&_p]:leading-relaxed",
+        className
+      )}
+      {...props}
+    />
+  )
+}
+
+export { Alert, AlertTitle, AlertDescription }
--- a/webui/src/components/ui/badge.tsx
+++ b/webui/src/components/ui/badge.tsx
@@ -5,7 +5,7 @@ import { cva, type VariantProps } from "class-variance-authority"
 import { cn } from "@/lib/utils"

 const badgeVariants = cva(
-  "inline-flex items-center justify-center rounded-md border px-2 py-0.5 text-xs font-medium w-fit whitespace-nowrap shrink-0 [&>svg]:size-3 gap-1 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive transition-[color,box-shadow] overflow-hidden",
+  "inline-flex items-center justify-center rounded-full border px-2 py-0.5 text-xs font-medium w-fit whitespace-nowrap shrink-0 [&>svg]:size-3 gap-1 [&>svg]:pointer-events-none focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive transition-[color,box-shadow] overflow-hidden",
  {
    variants: {
      variant: {
--- a/webui/src/components/ui/button.tsx
+++ b/webui/src/components/ui/button.tsx
@@ -9,14 +9,13 @@ const buttonVariants = cva(
  {
    variants: {
      variant: {
-        default:
-          "bg-primary text-primary-foreground shadow-xs hover:bg-primary/90",
+        default: "bg-primary text-primary-foreground hover:bg-primary/90",
        destructive:
-          "bg-destructive text-white shadow-xs hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
+          "bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:focus-visible:ring-destructive/40 dark:bg-destructive/60",
        outline:
          "border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50",
        secondary:
-          "bg-secondary text-secondary-foreground shadow-xs hover:bg-secondary/80",
+          "bg-secondary text-secondary-foreground hover:bg-secondary/80",
        ghost:
          "hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50",
        link: "text-primary underline-offset-4 hover:underline",
@@ -26,6 +25,8 @@ const buttonVariants = cva(
        sm: "h-8 rounded-md gap-1.5 px-3 has-[>svg]:px-2.5",
        lg: "h-10 rounded-md px-6 has-[>svg]:px-4",
        icon: "size-9",
+        "icon-sm": "size-8",
+        "icon-lg": "size-10",
      },
    },
    defaultVariants: {
--- a/webui/src/components/ui/card.tsx
+++ b/webui/src/components/ui/card.tsx
@@ -20,7 +20,7 @@ function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
    <div
      data-slot="card-header"
      className={cn(
-        "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-1.5 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
+        "@container/card-header grid auto-rows-min grid-rows-[auto_auto] items-start gap-2 px-6 has-data-[slot=card-action]:grid-cols-[1fr_auto] [.border-b]:pb-6",
        className
      )}
      {...props}
--- a/webui/src/components/ui/checkbox.tsx
+++ b/webui/src/components/ui/checkbox.tsx
@@ -19,7 +19,7 @@ function Checkbox({
    >
      <CheckboxPrimitive.Indicator
        data-slot="checkbox-indicator"
-        className="flex items-center justify-center text-current transition-none"
+        className="grid place-content-center text-current transition-none"
      >
        <CheckIcon className="size-3.5" />
      </CheckboxPrimitive.Indicator>
--- a/webui/src/components/ui/input.tsx
+++ b/webui/src/components/ui/input.tsx
@@ -8,7 +8,7 @@ function Input({ className, type, ...props }: React.ComponentProps<"input">) {
      type={type}
      data-slot="input"
      className={cn(
-        "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
+        "file:text-foreground placeholder:text-muted-foreground selection:bg-primary selection:text-primary-foreground dark:bg-input/30 border-input h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-base shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50 md:text-sm",
        "focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]",
        "aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive",
        className
--- a/webui/src/components/ui/radio-group.tsx
+++ b/webui/src/components/ui/radio-group.tsx
@@ -0,0 +1,43 @@
+import * as React from "react"
+import * as RadioGroupPrimitive from "@radix-ui/react-radio-group"
+import { CircleIcon } from "lucide-react"
+
+import { cn } from "@/lib/utils"
+
+function RadioGroup({
+  className,
+  ...props
+}: React.ComponentProps<typeof RadioGroupPrimitive.Root>) {
+  return (
+    <RadioGroupPrimitive.Root
+      data-slot="radio-group"
+      className={cn("grid gap-3", className)}
+      {...props}
+    />
+  )
+}
+
+function RadioGroupItem({
+  className,
+  ...props
+}: React.ComponentProps<typeof RadioGroupPrimitive.Item>) {
+  return (
+    <RadioGroupPrimitive.Item
+      data-slot="radio-group-item"
+      className={cn(
+        "border-input text-primary focus-visible:border-ring focus-visible:ring-ring/50 aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive dark:bg-input/30 aspect-square size-4 shrink-0 rounded-full border shadow-xs transition-[color,box-shadow] outline-none focus-visible:ring-[3px] disabled:cursor-not-allowed disabled:opacity-50",
+        className
+      )}
+      {...props}
+    >
+      <RadioGroupPrimitive.Indicator
+        data-slot="radio-group-indicator"
+        className="relative flex items-center justify-center"
+      >
+        <CircleIcon className="fill-primary absolute top-1/2 left-1/2 size-2 -translate-x-1/2 -translate-y-1/2" />
+      </RadioGroupPrimitive.Indicator>
+    </RadioGroupPrimitive.Item>
+  )
+}
+
+export { RadioGroup, RadioGroupItem }
--- a/webui/src/components/ui/table.tsx
+++ b/webui/src/components/ui/table.tsx
@@ -0,0 +1,117 @@
+import * as React from "react"
+
+import { cn } from "@/lib/utils"
+
+const Table = React.forwardRef<
+  HTMLTableElement,
+  React.HTMLAttributes<HTMLTableElement>
+>(({ className, ...props }, ref) => (
+  <div className="relative w-full overflow-auto">
+    <table
+      ref={ref}
+      className={cn("w-full caption-bottom text-sm", className)}
+      {...props}
+    />
+  </div>
+))
+Table.displayName = "Table"
+
+const TableHeader = React.forwardRef<
+  HTMLTableSectionElement,
+  React.HTMLAttributes<HTMLTableSectionElement>
+>(({ className, ...props }, ref) => (
+  <thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
+))
+TableHeader.displayName = "TableHeader"
+
+const TableBody = React.forwardRef<
+  HTMLTableSectionElement,
+  React.HTMLAttributes<HTMLTableSectionElement>
+>(({ className, ...props }, ref) => (
+  <tbody
+    ref={ref}
+    className={cn("[&_tr:last-child]:border-0", className)}
+    {...props}
+  />
+))
+TableBody.displayName = "TableBody"
+
+const TableFooter = React.forwardRef<
+  HTMLTableSectionElement,
+  React.HTMLAttributes<HTMLTableSectionElement>
+>(({ className, ...props }, ref) => (
+  <tfoot
+    ref={ref}
+    className={cn(
+      "border-t bg-muted/50 font-medium [&>tr]:last:border-b-0",
+      className
+    )}
+    {...props}
+  />
+))
+TableFooter.displayName = "TableFooter"
+
+const TableRow = React.forwardRef<
+  HTMLTableRowElement,
+  React.HTMLAttributes<HTMLTableRowElement>
+>(({ className, ...props }, ref) => (
+  <tr
+    ref={ref}
+    className={cn(
+      "border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted",
+      className
+    )}
+    {...props}
+  />
+))
+TableRow.displayName = "TableRow"
+
+const TableHead = React.forwardRef<
+  HTMLTableCellElement,
+  React.ThHTMLAttributes<HTMLTableCellElement>
+>(({ className, ...props }, ref) => (
+  <th
+    ref={ref}
+    className={cn(
+      "h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0",
+      className
+    )}
+    {...props}
+  />
+))
+TableHead.displayName = "TableHead"
+
+const TableCell = React.forwardRef<
+  HTMLTableCellElement,
+  React.TdHTMLAttributes<HTMLTableCellElement>
+>(({ className, ...props }, ref) => (
+  <td
+    ref={ref}
+    className={cn("p-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
+    {...props}
+  />
+))
+TableCell.displayName = "TableCell"
+
+const TableCaption = React.forwardRef<
+  HTMLTableCaptionElement,
+  React.HTMLAttributes<HTMLTableCaptionElement>
+>(({ className, ...props }, ref) => (
+  <caption
+    ref={ref}
+    className={cn("mt-4 text-sm text-muted-foreground", className)}
+    {...props}
+  />
+))
+TableCaption.displayName = "TableCaption"
+
+export {
+  Table,
+  TableHeader,
+  TableBody,
+  TableFooter,
+  TableHead,
+  TableRow,
+  TableCell,
+  TableCaption,
+}
--- a/webui/src/contexts/tests/InstancesContext.test.tsx
+++ b/webui/src/contexts/tests/InstancesContext.test.tsx
@@ -3,8 +3,7 @@ import { render, screen, waitFor } from "@testing-library/react";
 import type { ReactNode } from "react";
 import { InstancesProvider, useInstances } from "@/contexts/InstancesContext";
 import { instancesApi } from "@/lib/api";
-import type { Instance } from "@/types/instance";
-import { BackendType } from "@/types/instance";
+import { BackendType, type Instance } from "@/types/instance";
 import { AuthProvider } from "../AuthContext";

 // Mock the API module
@@ -71,37 +70,37 @@ function TestComponent() {

      {/* Action buttons for testing with specific instances */}
      <button
-        onClick={() => createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
+        onClick={() => void createInstance("new-instance", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } })}
        data-testid="create-instance"
      >
        Create Instance
      </button>
      <button
-        onClick={() => updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
+        onClick={() => void updateInstance("instance1", { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } })}
        data-testid="update-instance"
      >
        Update Instance
      </button>
      <button
-        onClick={() => startInstance("instance2")}
+        onClick={() => void startInstance("instance2")}
        data-testid="start-instance"
      >
        Start Instance2
      </button>
      <button
-        onClick={() => stopInstance("instance1")}
+        onClick={() => void stopInstance("instance1")}
        data-testid="stop-instance"
      >
        Stop Instance1
      </button>
      <button
-        onClick={() => restartInstance("instance1")}
+        onClick={() => void restartInstance("instance1")}
        data-testid="restart-instance"
      >
        Restart Instance1
      </button>
      <button
-        onClick={() => deleteInstance("instance2")}
+        onClick={() => void deleteInstance("instance2")}
        data-testid="delete-instance"
      >
        Delete Instance2
@@ -123,8 +122,8 @@ function renderWithProvider(children: ReactNode) {

 describe("InstancesContext", () => {
  const mockInstances: Instance[] = [
-    { name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
-    { name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
+    { id: 1, name: "instance1", status: "running", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model1.gguf" } } },
+    { id: 2, name: "instance2", status: "stopped", options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "model2.gguf" } } },
  ];

  beforeEach(() => {
@@ -181,6 +180,7 @@ describe("InstancesContext", () => {
  describe("Create Instance", () => {
    it("creates instance and adds it to state", async () => {
      const newInstance: Instance = {
+        id: 3,
        name: "new-instance",
        status: "stopped",
        options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "test.gguf" } },
@@ -238,6 +238,7 @@ describe("InstancesContext", () => {
  describe("Update Instance", () => {
    it("updates instance and maintains it in state", async () => {
      const updatedInstance: Instance = {
+        id: 1,
        name: "instance1",
        status: "running",
        options: { backend_type: BackendType.LLAMA_CPP, backend_options: { model: "updated.gguf" } },
@@ -408,6 +409,7 @@ describe("InstancesContext", () => {
    it("maintains consistent state during multiple operations", async () => {
      // Test that operations don't interfere with each other
      const newInstance: Instance = {
+        id: 3,
        name: "new-instance",
        status: "stopped",
        options: {},
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,5 +1,6 @@
 import type { CreateInstanceOptions, Instance } from "@/types/instance";
 import type { AppConfig } from "@/types/config";
+import type { ApiKey, CreateKeyRequest, CreateKeyResponse, KeyPermissionResponse } from "@/types/apiKey";
 import { handleApiError } from "./errorUtils";

 // Adding baseURI as a prefix to support being served behind a subpath
@@ -178,3 +179,79 @@ export const instancesApi = {
  // GET /instances/{name}/proxy/health
  getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${encodeURIComponent(name)}/proxy/health`),
 };
+
+// API Keys API functions
+export const apiKeysApi = {
+  // GET /auth/keys
+  list: () => apiCall<ApiKey[]>("/auth/keys"),
+
+  // GET /auth/keys/{id}
+  get: (id: number) => apiCall<ApiKey>(`/auth/keys/${id}`),
+
+  // POST /auth/keys
+  create: (request: CreateKeyRequest) =>
+    apiCall<CreateKeyResponse>("/auth/keys", {
+      method: "POST",
+      body: JSON.stringify(request),
+    }),
+
+  // DELETE /auth/keys/{id}
+  delete: (id: number) =>
+    apiCall<void>(`/auth/keys/${id}`, {
+      method: "DELETE",
+    }),
+
+  // GET /auth/keys/{id}/permissions
+  getPermissions: (id: number) =>
+    apiCall<KeyPermissionResponse[]>(`/auth/keys/${id}/permissions`),
+};
+
+// Llama.cpp model management types
+export interface Model {
+  id: string;
+  object: string;
+  owned_by: string;
+  created: number;
+  in_cache: boolean;
+  path: string;
+  status: {
+    value: string; // "loaded" | "loading" | "unloaded"
+    args: string[];
+  };
+}
+
+export interface ModelsListResponse {
+  object: string;
+  data: Model[];
+}
+
+// Llama.cpp model management API functions
+export const llamaCppApi = {
+  // GET /llama-cpp/{name}/models
+  getModels: async (instanceName: string): Promise<Model[]> => {
+    const response = await apiCall<ModelsListResponse>(
+      `/llama-cpp/${encodeURIComponent(instanceName)}/models`
+    );
+    return response.data;
+  },
+
+  // POST /llama-cpp/{name}/models/{model}/load
+  loadModel: (instanceName: string, modelName: string) =>
+    apiCall<{ success: boolean }>(
+      `/llama-cpp/${encodeURIComponent(instanceName)}/models/${encodeURIComponent(modelName)}/load`,
+      {
+        method: "POST",
+        body: JSON.stringify({ model: modelName }),
+      }
+    ),
+
+  // POST /llama-cpp/{name}/models/{model}/unload
+  unloadModel: (instanceName: string, modelName: string) =>
+    apiCall<{ success: boolean }>(
+      `/llama-cpp/${encodeURIComponent(instanceName)}/models/${encodeURIComponent(modelName)}/unload`,
+      {
+        method: "POST",
+        body: JSON.stringify({ model: modelName }),
+      }
+    ),
+};
--- a/webui/src/lib/healthService.ts
+++ b/webui/src/lib/healthService.ts
@@ -156,11 +156,14 @@ class HealthService {
      this.callbacks.set(instanceName, new Set())
    }

-    this.callbacks.get(instanceName)!.add(callback)
+    const callbacks = this.callbacks.get(instanceName)
+    if (callbacks) {
+      callbacks.add(callback)

-    // Start health checking if this is the first subscriber
-    if (this.callbacks.get(instanceName)!.size === 1) {
-      this.startHealthCheck(instanceName)
+      // Start health checking if this is the first subscriber
+      if (callbacks.size === 1) {
+        this.startHealthCheck(instanceName)
+      }
    }

    // Return unsubscribe function
@@ -214,22 +217,24 @@ class HealthService {
    }

    // Start new interval with appropriate timing
-    const interval = setInterval(async () => {
-      try {
-        const health = await this.performHealthCheck(instanceName)
-        this.notifyCallbacks(instanceName, health)
+    const interval = setInterval(() => {
+      void (async () => {
+        try {
+          const health = await this.performHealthCheck(instanceName)
+          this.notifyCallbacks(instanceName, health)

-        // Check if state changed and adjust interval
-        const previousState = this.lastHealthState.get(instanceName)
-        this.lastHealthState.set(instanceName, health.state)
+          // Check if state changed and adjust interval
+          const previousState = this.lastHealthState.get(instanceName)
+          this.lastHealthState.set(instanceName, health.state)

-        if (previousState !== health.state) {
-          this.adjustPollingInterval(instanceName, health.state)
+          if (previousState !== health.state) {
+            this.adjustPollingInterval(instanceName, health.state)
+          }
+        } catch (error) {
+          console.error(`Health check failed for ${instanceName}:`, error)
+          // Continue polling even on error
        }
-      } catch (error) {
-        console.error(`Health check failed for ${instanceName}:`, error)
-        // Continue polling even on error
-      }
+      })()
    }, pollInterval)

    this.intervals.set(instanceName, interval)
--- a/webui/src/main.tsx
+++ b/webui/src/main.tsx
@@ -6,7 +6,10 @@ import './index.css'
 import { AuthProvider } from './contexts/AuthContext'
 import { ConfigProvider } from './contexts/ConfigContext'

-ReactDOM.createRoot(document.getElementById('root')!).render(
+const rootElement = document.getElementById('root')
+if (!rootElement) throw new Error('Failed to find the root element')
+
+ReactDOM.createRoot(rootElement).render(
  <React.StrictMode>
    <AuthProvider>
      <ConfigProvider>
--- a/webui/src/test/setup.ts
+++ b/webui/src/test/setup.ts
@@ -1,10 +1,44 @@
 import '@testing-library/jest-dom'
-import { afterEach, vi } from 'vitest'
+import { afterEach, beforeEach } from 'vitest'

-// Mock fetch globally since your app uses fetch
-global.fetch = vi.fn()
+// Create a working localStorage implementation for tests
+// This ensures localStorage works in both CLI and VSCode test runner
+class LocalStorageMock implements Storage {
+  private store: Map<string, string> = new Map()
+
+  get length(): number {
+    return this.store.size
+  }
+
+  clear(): void {
+    this.store.clear()
+  }
+
+  getItem(key: string): string | null {
+    return this.store.get(key) ?? null
+  }
+
+  key(index: number): string | null {
+    return Array.from(this.store.keys())[index] ?? null
+  }
+
+  removeItem(key: string): void {
+    this.store.delete(key)
+  }
+
+  setItem(key: string, value: string): void {
+    this.store.set(key, value)
+  }
+}
+
+// Replace global localStorage
+global.localStorage = new LocalStorageMock()
+
+// Clean up before each test
+beforeEach(() => {
+  localStorage.clear()
+})

-// Clean up after each test
 afterEach(() => {
-  vi.clearAllMocks()
+  localStorage.clear()
 })
--- a/webui/src/types/apiKey.ts
+++ b/webui/src/types/apiKey.ts
@@ -0,0 +1,31 @@
+export enum PermissionMode {
+  AllowAll = "allow_all",
+  PerInstance = "per_instance"
+}
+
+export interface ApiKey {
+  id: number
+  name: string
+  user_id: string
+  permission_mode: PermissionMode
+  expires_at: number | null
+  created_at: number
+  updated_at: number
+  last_used_at: number | null
+}
+
+export interface CreateKeyRequest {
+  name: string
+  permission_mode: PermissionMode
+  expires_at?: number
+  instance_ids: number[]
+}
+
+export interface CreateKeyResponse extends ApiKey {
+  key: string
+}
+
+export interface KeyPermissionResponse {
+  instance_id: number
+  instance_name: string
+}
--- a/webui/src/types/config.ts
+++ b/webui/src/types/config.ts
@@ -30,7 +30,6 @@ export interface ServerConfig {

 export interface InstancesConfig {
  port_range: [number, number]
-  configs_dir: string
  logs_dir: string
  auto_create_dirs: boolean
  max_instances: number
@@ -53,7 +52,6 @@ export interface DatabaseConfig {

 export interface AuthConfig {
  require_inference_auth: boolean
-  inference_keys: string[] // Will be empty in sanitized response
  require_management_auth: boolean
  management_keys: string[] // Will be empty in sanitized response
 }
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -24,6 +24,7 @@ export interface HealthStatus {
 }

 export interface Instance {
+  id: number;
  name: string;
  status: InstanceStatus;
  options?: CreateInstanceOptions;
--- a/webui/tsconfig.node.json
+++ b/webui/tsconfig.node.json
@@ -4,7 +4,8 @@
    "skipLibCheck": true,
    "module": "ESNext",
    "moduleResolution": "bundler",
-    "allowSyntheticDefaultImports": true
+    "allowSyntheticDefaultImports": true,
+    "types": ["node"]
  },
  "include": ["vite.config.ts"]
 }
Author	SHA1	Message	Date
Matúš Námešný	9cea295305	Merge pull request #118 from lordmathis/chore/remove-deprecated chore: Remove deprecated code	2025-12-22 21:53:19 +01:00
LordMathis	1f78d3f780	Remove deprecated code	2025-12-22 21:49:37 +01:00
Matúš Námešný	e7baeb9ece	Merge pull request #117 from lordmathis/docs/llama-router docs: Document llama.cpp router mode	2025-12-22 21:23:58 +01:00
LordMathis	3cec850e74	Document llama.cpp router mode	2025-12-22 21:20:42 +01:00
Matúš Námešný	67098d7801	Merge pull request #113 from lordmathis/feat/llama-cpp-router feat: Integrate native llama.cpp router	2025-12-22 20:55:46 +01:00
LordMathis	3c95e76137	Poll models during loading	2025-12-22 20:38:58 +01:00
LordMathis	761cdfe7d8	Improve InstanceCard to display models for llama.cpp instances	2025-12-22 20:38:58 +01:00
LordMathis	99eba3daa9	Update test client	2025-12-22 20:38:58 +01:00
LordMathis	d9d7b6d814	Allow empty backend options	2025-12-22 20:38:58 +01:00
LordMathis	5062c882de	Update dependencies	2025-12-22 20:38:58 +01:00
LordMathis	ee122d669c	Support llama.cpp router mode for openai endpoints	2025-12-22 20:38:58 +01:00
LordMathis	41d904475c	Remove model registry	2025-12-22 20:38:58 +01:00
LordMathis	7f5292412c	Implement model management for llama.cpp instances	2025-12-22 20:38:58 +01:00
Matúš Námešný	ec84a7d331	Merge pull request #112 from lordmathis/fix/auth-middleware fix: Remove duplicate auth middleware init	2025-12-17 19:09:12 +01:00
LordMathis	b45219a01e	Reuse handler auth middleware	2025-12-17 19:06:04 +01:00
Matúš Námešný	463bb561e1	Merge pull request #111 from lordmathis/fix/cgo-enabled-build fix: Add multiplatform CGO_ENABLED=1 build	2025-12-17 14:38:27 +01:00
Matúš Námešný	ebdb9143c0	Remove separate windows build step	2025-12-17 14:32:10 +01:00
Matúš Námešný	4269d04381	Update release.yaml	2025-12-17 14:25:50 +01:00
Matúš Námešný	c734329a62	Merge pull request #109 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-62cd70be13 chore: bump the npm-production group in /webui with 2 updates	2025-12-15 23:50:27 +01:00
Matúš Námešný	15fcf7c377	Merge pull request #110 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-8fdc2c429d chore: bump the npm-development group in /webui with 3 updates	2025-12-15 23:48:42 +01:00
dependabot[bot]	795f530956	chore: bump the npm-development group in /webui with 3 updates Bumps the npm-development group in /webui with 3 updates: [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node), [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) and [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite). Updates `@types/node` from 24.10.1 to 25.0.2 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) Updates `typescript-eslint` from 8.49.0 to 8.50.0 - [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases) - [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md) - [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.50.0/packages/typescript-eslint) Updates `vite` from 7.2.2 to 7.3.0 - [Release notes](https://github.com/vitejs/vite/releases) - [Changelog](https://github.com/vitejs/vite/blob/v7.3.0/packages/vite/CHANGELOG.md) - [Commits](https://github.com/vitejs/vite/commits/v7.3.0/packages/vite) --- updated-dependencies: - dependency-name: "@types/node" dependency-version: 25.0.2 dependency-type: direct:development update-type: version-update:semver-major dependency-group: npm-development - dependency-name: typescript-eslint dependency-version: 8.50.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development - dependency-name: vite dependency-version: 7.3.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-15 22:06:41 +00:00
dependabot[bot]	4507358310	chore: bump the npm-production group in /webui with 2 updates Bumps the npm-production group in /webui with 2 updates: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react) and [zod](https://github.com/colinhacks/zod). Updates `lucide-react` from 0.560.0 to 0.561.0 - [Release notes](https://github.com/lucide-icons/lucide/releases) - [Commits](https://github.com/lucide-icons/lucide/commits/0.561.0/packages/lucide-react) Updates `zod` from 4.1.12 to 4.2.0 - [Release notes](https://github.com/colinhacks/zod/releases) - [Commits](https://github.com/colinhacks/zod/compare/v4.1.12...v4.2.0) --- updated-dependencies: - dependency-name: lucide-react dependency-version: 0.561.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: npm-production - dependency-name: zod dependency-version: 4.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: npm-production ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-15 22:06:01 +00:00
Matúš Námešný	f3c02b4939	Merge pull request #108 from lordmathis/refactor/config refactor: Split large config file	2025-12-13 14:27:25 +01:00
LordMathis	0a85409deb	Split large config file	2025-12-13 13:50:59 +01:00
Matúš Námešný	22fd295250	Merge pull request #107 from lordmathis/feat/logrotate feat: Add log rotation for instance logs	2025-12-13 13:30:20 +01:00
LordMathis	c0cecdd377	Clean up logger	2025-12-13 13:18:30 +01:00
LordMathis	4d57b37a5d	Remove verbose _mb suffix	2025-12-13 13:06:22 +01:00
LordMathis	c13b71d07f	Document new log rotation config options	2025-12-13 13:02:22 +01:00
LordMathis	406a711682	Move LogRotationConfig to logger package	2025-12-13 12:48:50 +01:00
LordMathis	0b3d654945	Simplify logging config	2025-12-13 12:48:50 +01:00
LordMathis	e2a49402d6	Implement instance log rotation	2025-12-13 12:48:50 +01:00
Matúš Námešný	48836c9c12	Merge pull request #105 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-3fe24f4500 chore: bump lucide-react from 0.555.0 to 0.556.0 in /webui in the npm-production group	2025-12-12 10:43:12 +01:00
Matúš Námešný	4200b8eed9	Merge pull request #104 from lordmathis/dependabot/go_modules/go-dependencies-f180a085e8 chore: bump golang.org/x/crypto from 0.45.0 to 0.46.0 in the go-dependencies group	2025-12-11 18:51:40 +01:00
dependabot[bot]	9a7ae87df8	chore: bump lucide-react in /webui in the npm-production group Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react). Updates `lucide-react` from 0.555.0 to 0.556.0 - [Release notes](https://github.com/lucide-icons/lucide/releases) - [Commits](https://github.com/lucide-icons/lucide/commits/0.556.0/packages/lucide-react) --- updated-dependencies: - dependency-name: lucide-react dependency-version: 0.556.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: npm-production ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-11 17:17:18 +00:00
Matúš Námešný	e54c495528	Merge pull request #106 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-808d3127cd chore: bump the npm-development group in /webui with 2 updates	2025-12-11 18:12:15 +01:00
dependabot[bot]	83006968ca	chore: bump the npm-development group in /webui with 2 updates Bumps the npm-development group in /webui with 2 updates: [jsdom](https://github.com/jsdom/jsdom) and [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint). Updates `jsdom` from 27.2.0 to 27.3.0 - [Release notes](https://github.com/jsdom/jsdom/releases) - [Changelog](https://github.com/jsdom/jsdom/blob/main/Changelog.md) - [Commits](https://github.com/jsdom/jsdom/compare/27.2.0...27.3.0) Updates `typescript-eslint` from 8.48.0 to 8.49.0 - [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases) - [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md) - [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.49.0/packages/typescript-eslint) --- updated-dependencies: - dependency-name: jsdom dependency-version: 27.3.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development - dependency-name: typescript-eslint dependency-version: 8.49.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-08 21:31:36 +00:00
dependabot[bot]	c8d9c6907c	chore: bump golang.org/x/crypto in the go-dependencies group Bumps the go-dependencies group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto). Updates `golang.org/x/crypto` from 0.45.0 to 0.46.0 - [Commits](https://github.com/golang/crypto/compare/v0.45.0...v0.46.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-version: 0.46.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: go-dependencies ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-08 21:30:38 +00:00
Matúš Námešný	c776785f30	Merge pull request #103 from lordmathis/docs/api-keys docs: Improve API key management documentation	2025-12-08 19:23:39 +01:00
LordMathis	1cfbd42eda	Update swagger docs	2025-12-08 19:16:02 +01:00
LordMathis	8fee27054d	Update docs for API key management	2025-12-08 19:15:42 +01:00
Matúš Námešný	fd33837026	Merge pull request #101 from lordmathis/feat/api-key-mgmt feat: Add inference api key management	2025-12-08 18:49:49 +01:00
LordMathis	3c4ebf7403	Addsimple python LLM test client	2025-12-08 18:44:28 +01:00
LordMathis	b7a0f7e3d8	Unhide migrated directory	2025-12-08 18:08:22 +01:00
LordMathis	d5b68a900f	Add .migrated directory for migrated json files	2025-12-08 18:06:15 +01:00
LordMathis	00cd8c8877	Update shadcn componments	2025-12-07 18:50:52 +01:00
LordMathis	4b1b12a7a8	Fix lint errors	2025-12-07 18:28:01 +01:00
LordMathis	0ce9016488	Fix some lint issues	2025-12-07 17:40:09 +01:00
LordMathis	1acbcafe1c	Add DialogDescription to SettingsDialog	2025-12-07 17:26:38 +01:00
LordMathis	00a502a268	Implement LocalStorageMock for testing	2025-12-07 17:16:40 +01:00
LordMathis	54fe0f7421	Fix eslint issues	2025-12-07 16:16:13 +01:00
LordMathis	cd1bd64889	Refactor CreateApiKeyDialog to use instance IDs	2025-12-06 22:20:39 +01:00
LordMathis	0fee7abc7c	Simplify create key request format	2025-12-06 22:20:05 +01:00
LordMathis	02193bd309	Add instance ID to JSON output	2025-12-06 21:28:17 +01:00
LordMathis	0217f7cc4e	Fix instance creation to retrieve and set the auto-generated ID	2025-12-06 20:58:17 +01:00
LordMathis	fa311c46ac	Improve server shutdown process	2025-12-06 19:52:40 +01:00
LordMathis	99927160c2	Remove 'can_infer' field	2025-12-06 18:07:01 +01:00
LordMathis	c37c1b8161	Remove 'enabled' field from API key model and related database operations	2025-12-06 17:59:11 +01:00
LordMathis	80d5d44a0b	Add inference api key frontend integration	2025-12-04 23:26:32 +01:00
LordMathis	2d0acc60f2	Fix double dash in generated keys	2025-12-04 23:25:51 +01:00
LordMathis	a1b6f0c1b0	Remove JSON file archiving from migration process	2025-12-04 23:02:06 +01:00
LordMathis	991ce3c678	Remove unnecessary canviewlogs permission	2025-12-04 22:18:29 +01:00
LordMathis	d9c666a245	Update deprication warnings	2025-12-04 21:23:22 +01:00