Merge pull request #112 from lordmathis/fix/auth-middleware

fix: Remove duplicate auth middleware init
Reuse handler auth middleware
2025-12-22 17:14:22 +00:00 · 2025-12-17 19:09:12 +01:00 · 2025-12-17 19:06:04 +01:00 · 2025-12-17 14:38:27 +01:00 · 2025-12-17 14:32:10 +01:00 · 2025-12-17 14:25:50 +01:00
24 changed files with 1248 additions and 1038 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -45,15 +45,23 @@ jobs:
  build:
    name: Build Binaries
    needs: build-webui
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.runner }}
    strategy:
      matrix:
-        goos: [linux, windows, darwin]
-        goarch: [amd64, arm64]
-        exclude:
-          # Windows ARM64 support is limited
-          - goos: windows
+        include:
+          - goos: linux
+            goarch: amd64
+            runner: ubuntu-latest
+          - goos: linux
            goarch: arm64
+            runner: ubuntu-latest
+            cc: aarch64-linux-gnu-gcc
+          - goos: darwin
+            goarch: arm64
+            runner: macos-latest
+          - goos: windows
+            goarch: amd64
+            runner: windows-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
@@ -70,11 +78,19 @@ jobs:
          name: webui-dist
          path: webui/dist/

+      - name: Install cross-compilation tools (Linux ARM64 only)
+        if: matrix.cc != ''
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y gcc-aarch64-linux-gnu
+
      - name: Build binary
        env:
          GOOS: ${{ matrix.goos }}
          GOARCH: ${{ matrix.goarch }}
-          CGO_ENABLED: 0
+          CGO_ENABLED: 1
+          CC: ${{ matrix.cc }}
+        shell: bash
        run: |
          # Set binary extension for Windows
          BINARY_NAME="llamactl"
@@ -91,8 +107,10 @@ jobs:
            ARCHIVE_OS="macos"
          fi
          ARCHIVE_NAME="llamactl-${{ github.ref_name }}-${ARCHIVE_OS}-${{ matrix.goarch }}"
+          
          if [ "${{ matrix.goos }}" = "windows" ]; then
-            zip "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
+            # Use 7z on Windows (pre-installed)
+            7z a "${ARCHIVE_NAME}.zip" "${BINARY_NAME}"
            echo "ASSET_PATH=${ARCHIVE_NAME}.zip" >> $GITHUB_ENV
          else
            tar -czf "${ARCHIVE_NAME}.tar.gz" "${BINARY_NAME}"
@@ -179,4 +197,4 @@ jobs:
        with:
          files: assets/checksums.txt
        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/README.md
+++ b/README.md
@@ -183,7 +183,7 @@ data_dir: ~/.local/share/llamactl  # Main data directory (database, instances, l

 instances:
  port_range: [8000, 9000]                        # Port range for instances
-  configs_dir: ~/.local/share/llamactl/instances  # Instance configs directory (platform dependent)
+  configs_dir: ~/.local/share/llamactl/instances  # Instance configs directory (platform dependent) [deprecated]
  logs_dir: ~/.local/share/llamactl/logs          # Logs directory (platform dependent)
  auto_create_dirs: true                          # Auto-create data/config/logs dirs if missing
  max_instances: -1                               # Max instances (-1 = unlimited)
@@ -195,6 +195,9 @@ instances:
  default_on_demand_start: true                   # Default on-demand start setting
  on_demand_start_timeout: 120                    # Default on-demand start timeout in seconds
  timeout_check_interval: 5                       # Idle instance timeout check in minutes
+  log_rotation_enabled: true                      # Enable log rotation (default: true)
+  log_rotation_max_size: 100                      # Max log file size in MB before rotation (default: 100)
+  log_rotation_compress: false                    # Compress rotated log files (default: false)

 database:
  path: ~/.local/share/llamactl/llamactl.db  # Database file path (platform dependent)
@@ -203,8 +206,7 @@ database:
  connection_max_lifetime: 5m                # Connection max lifetime

 auth:
-  require_inference_auth: true   # Require auth for inference endpoints
-  inference_keys: []             # Keys for inference endpoints
+  require_inference_auth: true   # Require auth for inference endpoints, API keys are created in web UI
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints
 ```
--- a/cmd/server/migrate_json.go
+++ b/cmd/server/migrate_json.go
@@ -13,6 +13,7 @@ import (

 // migrateFromJSON migrates instances from JSON files to SQLite database
 // This is a one-time migration that runs on first startup with existing JSON files.
+// Migrated files are moved to a migrated subdirectory to avoid re-importing.
 func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
 	instancesDir := cfg.Instances.InstancesDir
 	if instancesDir == "" {
@@ -24,16 +25,6 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
 		return nil // No instances directory, nothing to migrate
 	}

-	// Check if database is empty (no instances)
-	existing, err := db.LoadAll()
-	if err != nil {
-		return fmt.Errorf("failed to check existing instances: %w", err)
-	}
-
-	if len(existing) > 0 {
-		return nil // Database already has instances, skip migration
-	}
-
 	// Find all JSON files
 	files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
 	if err != nil {
@@ -46,6 +37,12 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {

 	log.Printf("Migrating %d instances from JSON to SQLite...", len(files))

+	// Create migrated directory
+	migratedDir := filepath.Join(instancesDir, "migrated")
+	if err := os.MkdirAll(migratedDir, 0755); err != nil {
+		return fmt.Errorf("failed to create migrated directory: %w", err)
+	}
+
 	// Migrate each JSON file
 	var migrated int
 	for _, file := range files {
@@ -53,6 +50,14 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
 			log.Printf("Failed to migrate %s: %v", file, err)
 			continue
 		}
+
+		// Move the file to the migrated directory
+		destPath := filepath.Join(migratedDir, filepath.Base(file))
+		if err := os.Rename(file, destPath); err != nil {
+			log.Printf("Warning: Failed to move %s to migrated directory: %v", file, err)
+			// Don't fail the migration if we can't move the file
+		}
+
 		migrated++
 	}

--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -74,7 +74,6 @@ database:

 auth:
  require_inference_auth: true   # Require auth for inference endpoints
-  inference_keys: []             # Keys for inference endpoints
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints

@@ -231,6 +230,9 @@ instances:
  default_on_demand_start: true # Default on-demand start setting
  on_demand_start_timeout: 120  # Default on-demand start timeout in seconds
  timeout_check_interval: 5     # Default instance timeout check interval in minutes
+  log_rotation_enabled: true    # Enable log rotation (default: true)
+  log_rotation_max_size: 100    # Max log file size in MB before rotation (default: 100)
+  log_rotation_compress: false  # Compress rotated log files (default: false)
 ```

 **Environment Variables:**
@@ -247,6 +249,9 @@ instances:
 - `LLAMACTL_DEFAULT_ON_DEMAND_START` - Default on-demand start setting (true/false)  
 - `LLAMACTL_ON_DEMAND_START_TIMEOUT` - Default on-demand start timeout in seconds
 - `LLAMACTL_TIMEOUT_CHECK_INTERVAL` - Default instance timeout check interval in minutes
+- `LLAMACTL_LOG_ROTATION_ENABLED` - Enable log rotation (true/false)
+- `LLAMACTL_LOG_ROTATION_MAX_SIZE` - Max log file size in MB
+- `LLAMACTL_LOG_ROTATION_COMPRESS` - Compress rotated logs (true/false)

 ### Database Configuration

@@ -266,17 +271,33 @@ database:

 ### Authentication Configuration

+llamactl supports two types of authentication:
+
+- **Management API Keys**: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables.
+- **Inference API Keys**: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database.
+
 ```yaml
 auth:
  require_inference_auth: true           # Require API key for OpenAI endpoints (default: true)
-  inference_keys: []                     # List of valid inference API keys
  require_management_auth: true          # Require API key for management endpoints (default: true)
  management_keys: []                    # List of valid management API keys
 ```

+**Managing Inference API Keys:**
+
+Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys:
+
+1. Open the web UI and log in with a management API key
+2. Navigate to **Settings → API Keys**
+3. Click **Create API Key**
+4. Configure the key:
+   - **Name**: A descriptive name for the key
+   - **Expiration**: Optional expiration date
+   - **Permissions**: Grant access to all instances or specific instances only
+5. Copy the generated key - it won't be shown again
+
 **Environment Variables:**
 - `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
 - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
 - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys

--- a/docs/docs.go
+++ b/docs/docs.go
@@ -2063,20 +2063,19 @@ const docTemplate = `{
        "server.CreateKeyRequest": {
            "type": "object",
            "properties": {
-                "expiresAt": {
-                    "type": "integer",
-                    "format": "int64"
+                "expires_at": {
+                    "type": "integer"
                },
-                "instancePermissions": {
+                "instance_ids": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/server.InstancePermission"
+                        "type": "integer"
                    }
                },
                "name": {
                    "type": "string"
                },
-                "permissionMode": {
+                "permission_mode": {
                    "$ref": "#/definitions/auth.PermissionMode"
                }
            }
@@ -2087,9 +2086,6 @@ const docTemplate = `{
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
@@ -2116,29 +2112,9 @@ const docTemplate = `{
                }
            }
        },
-        "server.InstancePermission": {
-            "type": "object",
-            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
-                "instance_id": {
-                    "type": "integer"
-                }
-            }
-        },
        "server.KeyPermissionResponse": {
            "type": "object",
            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
                "instance_id": {
                    "type": "integer"
                },
@@ -2153,9 +2129,6 @@ const docTemplate = `{
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -17,10 +17,10 @@ Before you start, let's clarify a few key terms:

 Llamactl uses two types of API keys:

- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
+- **Management API Key**: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal.
+- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). These are created and managed via the web UI.

-By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
+By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the [Configuration](configuration.md) guide.

 ## Start Llamactl

@@ -38,24 +38,17 @@ llamactl

    sk-management-...

-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-⚠️  INFERENCE AUTHENTICATION REQUIRED
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-🔑  Generated Inference API Key:
-
-    sk-inference-...
-
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ⚠️  IMPORTANT
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-• These keys are auto-generated and will change on restart
-• For production, add explicit keys to your configuration
-• Copy these keys before they disappear from the terminal
+• This key is auto-generated and will change on restart
+• For production, add explicit management_keys to your configuration
+• Copy this key before it disappears from the terminal
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Llamactl server listening on 0.0.0.0:8080
 ```

-Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
+Copy the **Management API Key** from the terminal - you'll need it to access the web UI.

 By default, Llamactl will start on `http://localhost:8080`.

@@ -82,7 +75,7 @@ You should see the Llamactl web interface.
     - **Additional Options**: Backend-specific parameters

    !!! tip "Auto-Assignment"
-        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values.

    !!! note "Remote Node Deployment"
        If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
@@ -98,6 +91,24 @@ Once created, you can:
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed

+## Create an Inference API Key
+
+To make inference requests to your instances, you'll need an inference API key:
+
+1. In the web UI, click the **Settings** icon (gear icon in the top-right)
+2. Navigate to the **API Keys** tab
+3. Click **Create API Key**
+4. Configure your key:
+   - **Name**: Give it a descriptive name (e.g., "Production Key", "Development Key")
+   - **Expiration**: Optionally set an expiration date for the key
+   - **Permissions**: Choose whether the key can access all instances or only specific ones
+5. Click **Create**
+6. **Copy the generated key** - it will only be shown once!
+
+The key will look like: `llamactl-...`
+
+You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances).
+
 ## Example Configurations

 Here are basic example configurations for each backend:
@@ -246,7 +257,7 @@ print(response.choices[0].message.content)
 ```

 !!! note "API Key"
-    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
+    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key you created via the web UI (Settings → API Keys).

 ### List Available Models

--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -2056,20 +2056,19 @@
        "server.CreateKeyRequest": {
            "type": "object",
            "properties": {
-                "expiresAt": {
-                    "type": "integer",
-                    "format": "int64"
+                "expires_at": {
+                    "type": "integer"
                },
-                "instancePermissions": {
+                "instance_ids": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/server.InstancePermission"
+                        "type": "integer"
                    }
                },
                "name": {
                    "type": "string"
                },
-                "permissionMode": {
+                "permission_mode": {
                    "$ref": "#/definitions/auth.PermissionMode"
                }
            }
@@ -2080,9 +2079,6 @@
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
@@ -2109,29 +2105,9 @@
                }
            }
        },
-        "server.InstancePermission": {
-            "type": "object",
-            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
-                "instance_id": {
-                    "type": "integer"
-                }
-            }
-        },
        "server.KeyPermissionResponse": {
            "type": "object",
            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
                "instance_id": {
                    "type": "integer"
                },
@@ -2146,9 +2122,6 @@
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -232,24 +232,21 @@ definitions:
    type: object
  server.CreateKeyRequest:
    properties:
-      expiresAt:
-        format: int64
+      expires_at:
        type: integer
-      instancePermissions:
+      instance_ids:
        items:
-          $ref: '#/definitions/server.InstancePermission'
+          type: integer
        type: array
      name:
        type: string
-      permissionMode:
+      permission_mode:
        $ref: '#/definitions/auth.PermissionMode'
    type: object
  server.CreateKeyResponse:
    properties:
      created_at:
        type: integer
-      enabled:
-        type: boolean
      expires_at:
        type: integer
      id:
@@ -267,21 +264,8 @@ definitions:
      user_id:
        type: string
    type: object
-  server.InstancePermission:
-    properties:
-      can_infer:
-        type: boolean
-      can_view_logs:
-        type: boolean
-      instance_id:
-        type: integer
-    type: object
  server.KeyPermissionResponse:
    properties:
-      can_infer:
-        type: boolean
-      can_view_logs:
-        type: boolean
      instance_id:
        type: integer
      instance_name:
@@ -291,8 +275,6 @@ definitions:
    properties:
      created_at:
        type: integer
-      enabled:
-        type: boolean
      expires_at:
        type: integer
      id:
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -115,15 +115,15 @@ vllm serve microsoft/DialoGPT-medium --port 8081
     require_inference_auth: false
   ```

-2. **Configure API keys:**
+2. **Configure management API keys:**
   ```yaml
   auth:
     management_keys:
       - "your-management-key"
-     inference_keys:
-       - "your-inference-key"
   ```

+   For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key.
+
 3. **Use correct Authorization header:**
   ```bash
   curl -H "Authorization: Bearer your-api-key" \
--- a/go.mod
+++ b/go.mod
@@ -3,13 +3,14 @@ module llamactl
 go 1.24.5

 require (
+	github.com/DeRuina/timberjack v1.3.9
 	github.com/go-chi/chi/v5 v5.2.2
 	github.com/go-chi/cors v1.2.2
 	github.com/golang-migrate/migrate/v4 v4.19.1
 	github.com/mattn/go-sqlite3 v1.14.24
 	github.com/swaggo/http-swagger v1.3.4
 	github.com/swaggo/swag v1.16.5
-	golang.org/x/crypto v0.45.0
+	golang.org/x/crypto v0.46.0
 	gopkg.in/yaml.v3 v3.0.1
 )

@@ -20,11 +21,12 @@ require (
 	github.com/go-openapi/spec v0.21.0 // indirect
 	github.com/go-openapi/swag v0.23.1 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
+	github.com/klauspost/compress v1.17.11 // indirect
 	github.com/mailru/easyjson v0.9.0 // indirect
 	github.com/swaggo/files v1.0.1 // indirect
 	golang.org/x/mod v0.29.0 // indirect
 	golang.org/x/net v0.47.0 // indirect
 	golang.org/x/sync v0.18.0 // indirect
-	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/sys v0.39.0 // indirect
 	golang.org/x/tools v0.38.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,7 +1,11 @@
+github.com/DeRuina/timberjack v1.3.9 h1:6UXZ1I7ExPGTX/1UNYawR58LlOJUHKBPiYC7WQ91eBo=
+github.com/DeRuina/timberjack v1.3.9/go.mod h1:RLoeQrwrCGIEF8gO5nV5b/gMD0QIy7bzQhBUgpp1EqE=
 github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
+github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
 github.com/go-chi/chi/v5 v5.2.2 h1:CMwsvRVTbXVytCk1Wd72Zy1LAsAh9GxMmSNWLHCG618=
 github.com/go-chi/chi/v5 v5.2.2/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
 github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE=
@@ -20,6 +24,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
 github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
@@ -45,8 +51,8 @@ github.com/swaggo/swag v1.16.5/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
-golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
+golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
 golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
@@ -66,8 +72,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
-golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
+golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -6,233 +6,18 @@ import (
 	"log"
 	"os"
 	"path/filepath"
-	"runtime"
-	"strconv"
-	"strings"
-	"time"

 	"gopkg.in/yaml.v3"
 )

-// BackendSettings contains structured backend configuration
-type BackendSettings struct {
-	Command         string            `yaml:"command" json:"command"`
-	Args            []string          `yaml:"args" json:"args"`
-	Environment     map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
-	Docker          *DockerSettings   `yaml:"docker,omitempty" json:"docker,omitempty"`
-	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
-}
-
-// DockerSettings contains Docker-specific configuration
-type DockerSettings struct {
-	Enabled     bool              `yaml:"enabled" json:"enabled"`
-	Image       string            `yaml:"image" json:"image"`
-	Args        []string          `yaml:"args" json:"args"`
-	Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
-}
-
-// BackendConfig contains backend executable configurations
-type BackendConfig struct {
-	LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
-	VLLM     BackendSettings `yaml:"vllm" json:"vllm"`
-	MLX      BackendSettings `yaml:"mlx" json:"mlx"`
-}
-
-// AppConfig represents the configuration for llamactl
-type AppConfig struct {
-	Server    ServerConfig          `yaml:"server" json:"server"`
-	Backends  BackendConfig         `yaml:"backends" json:"backends"`
-	Instances InstancesConfig       `yaml:"instances" json:"instances"`
-	Database  DatabaseConfig        `yaml:"database" json:"database"`
-	Auth      AuthConfig            `yaml:"auth" json:"auth"`
-	LocalNode string                `yaml:"local_node,omitempty" json:"local_node,omitempty"`
-	Nodes     map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
-
-	// Directory where all llamactl data will be stored (database, instances, logs, etc.)
-	DataDir string `yaml:"data_dir" json:"data_dir"`
-
-	Version    string `yaml:"-" json:"version"`
-	CommitHash string `yaml:"-" json:"commit_hash"`
-	BuildTime  string `yaml:"-" json:"build_time"`
-}
-
-// ServerConfig contains HTTP server configuration
-type ServerConfig struct {
-	// Server host to bind to
-	Host string `yaml:"host" json:"host"`
-
-	// Server port to bind to
-	Port int `yaml:"port" json:"port"`
-
-	// Allowed origins for CORS (e.g., "http://localhost:3000")
-	AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
-
-	// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
-	AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
-
-	// Enable Swagger UI for API documentation
-	EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
-
-	// Response headers to send with responses
-	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
-}
-
-// DatabaseConfig contains database configuration settings
-type DatabaseConfig struct {
-	// Database file path (relative to the top-level data_dir or absolute)
-	Path string `yaml:"path" json:"path"`
-
-	// Connection settings
-	MaxOpenConnections int           `yaml:"max_open_connections" json:"max_open_connections"`
-	MaxIdleConnections int           `yaml:"max_idle_connections" json:"max_idle_connections"`
-	ConnMaxLifetime    time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
-}
-
-// InstancesConfig contains instance management configuration
-type InstancesConfig struct {
-	// Port range for instances (e.g., 8000,9000)
-	PortRange [2]int `yaml:"port_range" json:"port_range"`
-
-	// Instance config directory override (relative to data_dir if not absolute)
-	InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
-
-	// Logs directory override (relative to data_dir if not absolute)
-	LogsDir string `yaml:"logs_dir" json:"logs_dir"`
-
-	// Automatically create the data directory if it doesn't exist
-	AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
-
-	// Maximum number of instances that can be created
-	MaxInstances int `yaml:"max_instances" json:"max_instances"`
-
-	// Maximum number of instances that can be running at the same time
-	MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
-
-	// Enable LRU eviction for instance logs
-	EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
-
-	// Default auto-restart setting for new instances
-	DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
-
-	// Default max restarts for new instances
-	DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
-
-	// Default restart delay for new instances (in seconds)
-	DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
-
-	// Default on-demand start setting for new instances
-	DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
-
-	// How long to wait for an instance to start on demand (in seconds)
-	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
-
-	// Interval for checking instance timeouts (in minutes)
-	TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
-}
-
-// AuthConfig contains authentication settings
-type AuthConfig struct {
-
-	// Require authentication for OpenAI compatible inference endpoints
-	RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
-
-	// List of keys for OpenAI compatible inference endpoints
-	InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
-
-	// Require authentication for management endpoints
-	RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
-
-	// List of keys for management endpoints
-	ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
-}
-
-type NodeConfig struct {
-	Address string `yaml:"address" json:"address"`
-	APIKey  string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
-}
-
 // LoadConfig loads configuration with the following precedence:
 // 1. Hardcoded defaults
 // 2. Config file
 // 3. Environment variables
 func LoadConfig(configPath string) (AppConfig, error) {
 	// 1. Start with defaults
-	defaultDataDir := getDefaultDataDirectory()
-
-	cfg := AppConfig{
-		Server: ServerConfig{
-			Host:           "0.0.0.0",
-			Port:           8080,
-			AllowedOrigins: []string{"*"}, // Default to allow all origins
-			AllowedHeaders: []string{"*"}, // Default to allow all headers
-			EnableSwagger:  false,
-		},
-		LocalNode: "main",
-		Nodes:     map[string]NodeConfig{},
-		DataDir:   defaultDataDir,
-		Backends: BackendConfig{
-			LlamaCpp: BackendSettings{
-				Command:     "llama-server",
-				Args:        []string{},
-				Environment: map[string]string{},
-				Docker: &DockerSettings{
-					Enabled: false,
-					Image:   "ghcr.io/ggml-org/llama.cpp:server",
-					Args: []string{
-						"run", "--rm", "--network", "host", "--gpus", "all",
-						"-v", filepath.Join(defaultDataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
-					Environment: map[string]string{},
-				},
-			},
-			VLLM: BackendSettings{
-				Command: "vllm",
-				Args:    []string{"serve"},
-				Docker: &DockerSettings{
-					Enabled: false,
-					Image:   "vllm/vllm-openai:latest",
-					Args: []string{
-						"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
-						"-v", filepath.Join(defaultDataDir, "huggingface") + ":/root/.cache/huggingface",
-					},
-					Environment: map[string]string{},
-				},
-			},
-			MLX: BackendSettings{
-				Command: "mlx_lm.server",
-				Args:    []string{},
-				// No Docker section for MLX - not supported
-			},
-		},
-		Instances: InstancesConfig{
-			PortRange: [2]int{8000, 9000},
-			// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
-			// should be relative path to DataDir if not explicitly set.
-			InstancesDir:         "",
-			LogsDir:              "",
-			AutoCreateDirs:       true,
-			MaxInstances:         -1, // -1 means unlimited
-			MaxRunningInstances:  -1, // -1 means unlimited
-			EnableLRUEviction:    true,
-			DefaultAutoRestart:   true,
-			DefaultMaxRestarts:   3,
-			DefaultRestartDelay:  5,
-			DefaultOnDemandStart: true,
-			OnDemandStartTimeout: 120, // 2 minutes
-			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
-		},
-		Database: DatabaseConfig{
-			Path:               "", // Will be set to data_dir/llamactl.db if empty
-			MaxOpenConnections: 25,
-			MaxIdleConnections: 5,
-			ConnMaxLifetime:    5 * time.Minute,
-		},
-		Auth: AuthConfig{
-			RequireInferenceAuth:  true,
-			InferenceKeys:         []string{},
-			RequireManagementAuth: true,
-			ManagementKeys:        []string{},
-		},
-	}
+	defaultDataDir := getDefaultDataDir()
+	cfg := getDefaultConfig(defaultDataDir)

 	// 2. Load from config file
 	if err := loadConfigFile(&cfg, configPath); err != nil {
@@ -300,372 +85,6 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
 	return nil
 }

-// loadEnvVars overrides config with environment variables
-func loadEnvVars(cfg *AppConfig) {
-	// Server config
-	if host := os.Getenv("LLAMACTL_HOST"); host != "" {
-		cfg.Server.Host = host
-	}
-	if port := os.Getenv("LLAMACTL_PORT"); port != "" {
-		if p, err := strconv.Atoi(port); err == nil {
-			cfg.Server.Port = p
-		}
-	}
-	if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
-		cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
-	}
-	if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
-		if b, err := strconv.ParseBool(enableSwagger); err == nil {
-			cfg.Server.EnableSwagger = b
-		}
-	}
-
-	// Data config
-	if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
-		cfg.DataDir = dataDir
-	}
-	if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
-		cfg.Instances.InstancesDir = instancesDir
-	}
-	if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
-		cfg.Instances.LogsDir = logsDir
-	}
-	if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
-		if b, err := strconv.ParseBool(autoCreate); err == nil {
-			cfg.Instances.AutoCreateDirs = b
-		}
-	}
-
-	// Instance config
-	if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
-		if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
-			cfg.Instances.PortRange = ports
-		}
-	}
-	if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
-		if m, err := strconv.Atoi(maxInstances); err == nil {
-			cfg.Instances.MaxInstances = m
-		}
-	}
-	if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
-		if m, err := strconv.Atoi(maxRunning); err == nil {
-			cfg.Instances.MaxRunningInstances = m
-		}
-	}
-	if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
-		if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
-			cfg.Instances.EnableLRUEviction = b
-		}
-	}
-	// Backend config
-	// LlamaCpp backend
-	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
-		cfg.Backends.LlamaCpp.Command = llamaCmd
-	}
-	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
-		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
-		if cfg.Backends.LlamaCpp.Environment == nil {
-			cfg.Backends.LlamaCpp.Environment = make(map[string]string)
-		}
-		parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
-	}
-	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
-		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
-			if cfg.Backends.LlamaCpp.Docker == nil {
-				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-			}
-			cfg.Backends.LlamaCpp.Docker.Enabled = b
-		}
-	}
-	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
-		if cfg.Backends.LlamaCpp.Docker == nil {
-			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-		}
-		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
-	}
-	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
-		if cfg.Backends.LlamaCpp.Docker == nil {
-			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-		}
-		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
-	}
-	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
-		if cfg.Backends.LlamaCpp.Docker == nil {
-			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
-		}
-		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
-			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
-		}
-		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
-		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
-			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
-		}
-		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
-	}
-
-	// vLLM backend
-	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
-		cfg.Backends.VLLM.Command = vllmCmd
-	}
-	if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
-		cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
-	}
-	if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
-		if cfg.Backends.VLLM.Environment == nil {
-			cfg.Backends.VLLM.Environment = make(map[string]string)
-		}
-		parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
-	}
-	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
-		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
-			if cfg.Backends.VLLM.Docker == nil {
-				cfg.Backends.VLLM.Docker = &DockerSettings{}
-			}
-			cfg.Backends.VLLM.Docker.Enabled = b
-		}
-	}
-	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
-		if cfg.Backends.VLLM.Docker == nil {
-			cfg.Backends.VLLM.Docker = &DockerSettings{}
-		}
-		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
-	}
-	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
-		if cfg.Backends.VLLM.Docker == nil {
-			cfg.Backends.VLLM.Docker = &DockerSettings{}
-		}
-		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
-	}
-	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
-		if cfg.Backends.VLLM.Docker == nil {
-			cfg.Backends.VLLM.Docker = &DockerSettings{}
-		}
-		if cfg.Backends.VLLM.Docker.Environment == nil {
-			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
-		}
-		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
-		if cfg.Backends.VLLM.ResponseHeaders == nil {
-			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
-		}
-		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
-	}
-
-	// MLX backend
-	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
-		cfg.Backends.MLX.Command = mlxCmd
-	}
-	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
-		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
-	}
-	if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
-		if cfg.Backends.MLX.Environment == nil {
-			cfg.Backends.MLX.Environment = make(map[string]string)
-		}
-		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
-	}
-	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
-		if cfg.Backends.MLX.ResponseHeaders == nil {
-			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
-		}
-		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
-	}
-
-	// Instance defaults
-	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
-		if b, err := strconv.ParseBool(autoRestart); err == nil {
-			cfg.Instances.DefaultAutoRestart = b
-		}
-	}
-	if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
-		if m, err := strconv.Atoi(maxRestarts); err == nil {
-			cfg.Instances.DefaultMaxRestarts = m
-		}
-	}
-	if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
-		if seconds, err := strconv.Atoi(restartDelay); err == nil {
-			cfg.Instances.DefaultRestartDelay = seconds
-		}
-	}
-	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
-		if b, err := strconv.ParseBool(onDemandStart); err == nil {
-			cfg.Instances.DefaultOnDemandStart = b
-		}
-	}
-	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
-		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
-			cfg.Instances.OnDemandStartTimeout = seconds
-		}
-	}
-	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
-		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
-			cfg.Instances.TimeoutCheckInterval = minutes
-		}
-	}
-	// Auth config
-	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
-		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
-			cfg.Auth.RequireInferenceAuth = b
-		}
-	}
-	if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
-		cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
-	}
-	if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
-		if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
-			cfg.Auth.RequireManagementAuth = b
-		}
-	}
-	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
-		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
-	}
-
-	// Local node config
-	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
-		cfg.LocalNode = localNode
-	}
-
-	// Database config
-	if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
-		cfg.Database.Path = dbPath
-	}
-	if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
-		if m, err := strconv.Atoi(maxOpenConns); err == nil {
-			cfg.Database.MaxOpenConnections = m
-		}
-	}
-	if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
-		if m, err := strconv.Atoi(maxIdleConns); err == nil {
-			cfg.Database.MaxIdleConnections = m
-		}
-	}
-	if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
-		if d, err := time.ParseDuration(connMaxLifetime); err == nil {
-			cfg.Database.ConnMaxLifetime = d
-		}
-	}
-}
-
-// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
-func ParsePortRange(s string) [2]int {
-	var parts []string
-
-	// Try both separators
-	if strings.Contains(s, "-") {
-		parts = strings.Split(s, "-")
-	} else if strings.Contains(s, ",") {
-		parts = strings.Split(s, ",")
-	}
-
-	// Parse the two parts
-	if len(parts) == 2 {
-		start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
-		end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
-		if err1 == nil && err2 == nil {
-			return [2]int{start, end}
-		}
-	}
-
-	return [2]int{0, 0} // Invalid format
-}
-
-// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
-// and populates the provided environment map
-func parseEnvVars(envString string, envMap map[string]string) {
-	if envString == "" {
-		return
-	}
-	for _, envPair := range strings.Split(envString, ",") {
-		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
-			envMap[parts[0]] = parts[1]
-		}
-	}
-}
-
-// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
-// and populates the provided environment map
-func parseHeaders(envString string, envMap map[string]string) {
-	if envString == "" {
-		return
-	}
-	for _, envPair := range strings.Split(envString, ";") {
-		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
-			envMap[parts[0]] = parts[1]
-		}
-	}
-}
-
-// getDefaultDataDirectory returns platform-specific default data directory
-func getDefaultDataDirectory() string {
-	switch runtime.GOOS {
-	case "windows":
-		// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
-		if programData := os.Getenv("PROGRAMDATA"); programData != "" {
-			return filepath.Join(programData, "llamactl")
-		}
-		if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
-			return filepath.Join(localAppData, "llamactl")
-		}
-		return "C:\\ProgramData\\llamactl" // Final fallback
-
-	case "darwin":
-		// For macOS, use user's Application Support directory
-		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
-			return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
-		}
-		return "/usr/local/var/llamactl" // Fallback
-
-	default:
-		// Linux and other Unix-like systems
-		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
-			return filepath.Join(homeDir, ".local", "share", "llamactl")
-		}
-		return "/var/lib/llamactl" // Final fallback
-	}
-}
-
-// getDefaultConfigLocations returns platform-specific config file locations
-func getDefaultConfigLocations() []string {
-	var locations []string
-	// Use ./llamactl.yaml and ./config.yaml as the default config file
-	locations = append(locations, "llamactl.yaml")
-	locations = append(locations, "config.yaml")
-
-	homeDir, _ := os.UserHomeDir()
-
-	switch runtime.GOOS {
-	case "windows":
-		// Windows: Use APPDATA if available, else user home, fallback to ProgramData
-		if appData := os.Getenv("APPDATA"); appData != "" {
-			locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
-		} else if homeDir != "" {
-			locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
-		}
-		locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
-
-	case "darwin":
-		// macOS: Use Application Support in user home, fallback to /Library/Application Support
-		if homeDir != "" {
-			locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
-		}
-		locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
-
-	default:
-		// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
-		if homeDir != "" {
-			locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
-		}
-		locations = append(locations, "/etc/llamactl/config.yaml")
-	}
-
-	return locations
-}
-
 // SanitizedCopy returns a copy of the AppConfig with sensitive information removed
 func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) {
 	// Deep copy via JSON marshal/unmarshal to avoid concurrent map access
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -78,8 +78,8 @@ server:
  port: 9090
 instances:
  port_range: [7000, 8000]
-  logs_dir: "/custom/logs"
  max_instances: 5
+  logs_dir: "/custom/logs"
  llama_executable: "/usr/bin/llama-server"
  default_auto_restart: false
  default_max_restarts: 10
@@ -219,7 +219,6 @@ instances:
 	}
 }

-
 func TestParsePortRange(t *testing.T) {
 	tests := []struct {
 		name     string
@@ -248,7 +247,6 @@ func TestParsePortRange(t *testing.T) {
 	}
 }

-
 func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
 	bc := &config.BackendConfig{
 		LlamaCpp: config.BackendSettings{
@@ -305,7 +303,6 @@ func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
 	}
 }

-
 func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
 	// Test that backend environment variables work correctly
 	envVars := map[string]string{
@@ -375,7 +372,6 @@ func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
 	}
 }

-
 func TestLoadConfig_LocalNode(t *testing.T) {
 	t.Run("default local node", func(t *testing.T) {
 		cfg, err := config.LoadConfig("nonexistent-file.yaml")
--- a/pkg/config/defaults.go
+++ b/pkg/config/defaults.go
@@ -0,0 +1,154 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"runtime"
+	"time"
+)
+
+func getDefaultConfig(dataDir string) AppConfig {
+	return AppConfig{
+		Server: ServerConfig{
+			Host:           "0.0.0.0",
+			Port:           8080,
+			AllowedOrigins: []string{"*"}, // Default to allow all origins
+			AllowedHeaders: []string{"*"}, // Default to allow all headers
+			EnableSwagger:  false,
+		},
+		LocalNode: "main",
+		Nodes:     map[string]NodeConfig{},
+		DataDir:   dataDir,
+		Backends: BackendConfig{
+			LlamaCpp: BackendSettings{
+				Command:     "llama-server",
+				Args:        []string{},
+				Environment: map[string]string{},
+				Docker: &DockerSettings{
+					Enabled: false,
+					Image:   "ghcr.io/ggml-org/llama.cpp:server",
+					Args: []string{
+						"run", "--rm", "--network", "host", "--gpus", "all",
+						"-v", filepath.Join(dataDir, "llama.cpp") + ":/root/.cache/llama.cpp"},
+					Environment: map[string]string{},
+				},
+			},
+			VLLM: BackendSettings{
+				Command: "vllm",
+				Args:    []string{"serve"},
+				Docker: &DockerSettings{
+					Enabled: false,
+					Image:   "vllm/vllm-openai:latest",
+					Args: []string{
+						"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
+						"-v", filepath.Join(dataDir, "huggingface") + ":/root/.cache/huggingface",
+					},
+					Environment: map[string]string{},
+				},
+			},
+			MLX: BackendSettings{
+				Command: "mlx_lm.server",
+				Args:    []string{},
+				// No Docker section for MLX - not supported
+			},
+		},
+		Instances: InstancesConfig{
+			PortRange: [2]int{8000, 9000},
+			// NOTE: empty string is set as placeholder value since InstancesDir
+			// should be relative path to DataDir if not explicitly set.
+			InstancesDir:         "",
+			AutoCreateDirs:       true,
+			MaxInstances:         -1, // -1 means unlimited
+			MaxRunningInstances:  -1, // -1 means unlimited
+			EnableLRUEviction:    true,
+			DefaultAutoRestart:   true,
+			DefaultMaxRestarts:   3,
+			DefaultRestartDelay:  5,
+			DefaultOnDemandStart: true,
+			OnDemandStartTimeout: 120, // 2 minutes
+			TimeoutCheckInterval: 5,   // Check timeouts every 5 minutes
+			LogsDir:              "",  // Will be set to data_dir/logs if empty
+			LogRotationEnabled:   true,
+			LogRotationMaxSize:   100,
+			LogRotationCompress:  false,
+		},
+		Database: DatabaseConfig{
+			Path:               "", // Will be set to data_dir/llamactl.db if empty
+			MaxOpenConnections: 25,
+			MaxIdleConnections: 5,
+			ConnMaxLifetime:    5 * time.Minute,
+		},
+		Auth: AuthConfig{
+			RequireInferenceAuth:  true,
+			InferenceKeys:         []string{},
+			RequireManagementAuth: true,
+			ManagementKeys:        []string{},
+		},
+	}
+}
+
+// getDefaultDataDir returns platform-specific default data directory
+func getDefaultDataDir() string {
+	switch runtime.GOOS {
+	case "windows":
+		// Try PROGRAMDATA first (system-wide), fallback to LOCALAPPDATA (user)
+		if programData := os.Getenv("PROGRAMDATA"); programData != "" {
+			return filepath.Join(programData, "llamactl")
+		}
+		if localAppData := os.Getenv("LOCALAPPDATA"); localAppData != "" {
+			return filepath.Join(localAppData, "llamactl")
+		}
+		return "C:\\ProgramData\\llamactl" // Final fallback
+
+	case "darwin":
+		// For macOS, use user's Application Support directory
+		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
+			return filepath.Join(homeDir, "Library", "Application Support", "llamactl")
+		}
+		return "/usr/local/var/llamactl" // Fallback
+
+	default:
+		// Linux and other Unix-like systems
+		if homeDir, _ := os.UserHomeDir(); homeDir != "" {
+			return filepath.Join(homeDir, ".local", "share", "llamactl")
+		}
+		return "/var/lib/llamactl" // Final fallback
+	}
+}
+
+// getDefaultConfigLocations returns platform-specific config file locations
+func getDefaultConfigLocations() []string {
+	var locations []string
+	// Use ./llamactl.yaml and ./config.yaml as the default config file
+	locations = append(locations, "llamactl.yaml")
+	locations = append(locations, "config.yaml")
+
+	homeDir, _ := os.UserHomeDir()
+
+	switch runtime.GOOS {
+	case "windows":
+		// Windows: Use APPDATA if available, else user home, fallback to ProgramData
+		if appData := os.Getenv("APPDATA"); appData != "" {
+			locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
+		} else if homeDir != "" {
+			locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
+		}
+		locations = append(locations, filepath.Join(os.Getenv("PROGRAMDATA"), "llamactl", "config.yaml"))
+
+	case "darwin":
+		// macOS: Use Application Support in user home, fallback to /Library/Application Support
+		if homeDir != "" {
+			locations = append(locations, filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"))
+		}
+		locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
+
+	default:
+		// Linux/Unix: Use ~/.config/llamactl/config.yaml, fallback to /etc/llamactl/config.yaml
+		if homeDir != "" {
+			locations = append(locations, filepath.Join(homeDir, ".config", "llamactl", "config.yaml"))
+		}
+		locations = append(locations, "/etc/llamactl/config.yaml")
+	}
+
+	return locations
+}
--- a/pkg/config/env.go
+++ b/pkg/config/env.go
@@ -0,0 +1,325 @@
+package config
+
+import (
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// loadEnvVars overrides config with environment variables
+func loadEnvVars(cfg *AppConfig) {
+	// Server config
+	if host := os.Getenv("LLAMACTL_HOST"); host != "" {
+		cfg.Server.Host = host
+	}
+	if port := os.Getenv("LLAMACTL_PORT"); port != "" {
+		if p, err := strconv.Atoi(port); err == nil {
+			cfg.Server.Port = p
+		}
+	}
+	if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
+		cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
+	}
+	if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
+		if b, err := strconv.ParseBool(enableSwagger); err == nil {
+			cfg.Server.EnableSwagger = b
+		}
+	}
+
+	// Data config
+	if dataDir := os.Getenv("LLAMACTL_DATA_DIRECTORY"); dataDir != "" {
+		cfg.DataDir = dataDir
+	}
+	if instancesDir := os.Getenv("LLAMACTL_INSTANCES_DIR"); instancesDir != "" {
+		cfg.Instances.InstancesDir = instancesDir
+	}
+	if logsDir := os.Getenv("LLAMACTL_LOGS_DIR"); logsDir != "" {
+		cfg.Instances.LogsDir = logsDir
+	}
+	if autoCreate := os.Getenv("LLAMACTL_AUTO_CREATE_DATA_DIR"); autoCreate != "" {
+		if b, err := strconv.ParseBool(autoCreate); err == nil {
+			cfg.Instances.AutoCreateDirs = b
+		}
+	}
+
+	// Instance config
+	if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
+		if ports := ParsePortRange(portRange); ports != [2]int{0, 0} {
+			cfg.Instances.PortRange = ports
+		}
+	}
+	if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
+		if m, err := strconv.Atoi(maxInstances); err == nil {
+			cfg.Instances.MaxInstances = m
+		}
+	}
+	if maxRunning := os.Getenv("LLAMACTL_MAX_RUNNING_INSTANCES"); maxRunning != "" {
+		if m, err := strconv.Atoi(maxRunning); err == nil {
+			cfg.Instances.MaxRunningInstances = m
+		}
+	}
+	if enableLRUEviction := os.Getenv("LLAMACTL_ENABLE_LRU_EVICTION"); enableLRUEviction != "" {
+		if b, err := strconv.ParseBool(enableLRUEviction); err == nil {
+			cfg.Instances.EnableLRUEviction = b
+		}
+	}
+	// Backend config
+	// LlamaCpp backend
+	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
+		cfg.Backends.LlamaCpp.Command = llamaCmd
+	}
+	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
+		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
+		if cfg.Backends.LlamaCpp.Environment == nil {
+			cfg.Backends.LlamaCpp.Environment = make(map[string]string)
+		}
+		parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
+	}
+	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
+		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
+			if cfg.Backends.LlamaCpp.Docker == nil {
+				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+			}
+			cfg.Backends.LlamaCpp.Docker.Enabled = b
+		}
+	}
+	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
+	}
+	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
+	}
+	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
+			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
+		}
+		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
+			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
+	}
+
+	// vLLM backend
+	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
+		cfg.Backends.VLLM.Command = vllmCmd
+	}
+	if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
+		cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
+	}
+	if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
+		if cfg.Backends.VLLM.Environment == nil {
+			cfg.Backends.VLLM.Environment = make(map[string]string)
+		}
+		parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
+	}
+	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
+		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
+			if cfg.Backends.VLLM.Docker == nil {
+				cfg.Backends.VLLM.Docker = &DockerSettings{}
+			}
+			cfg.Backends.VLLM.Docker.Enabled = b
+		}
+	}
+	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
+	}
+	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
+	}
+	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.VLLM.Docker.Environment == nil {
+			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
+		}
+		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.VLLM.ResponseHeaders == nil {
+			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
+	}
+
+	// MLX backend
+	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
+		cfg.Backends.MLX.Command = mlxCmd
+	}
+	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
+		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
+	}
+	if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
+		if cfg.Backends.MLX.Environment == nil {
+			cfg.Backends.MLX.Environment = make(map[string]string)
+		}
+		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
+	}
+	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.MLX.ResponseHeaders == nil {
+			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
+	}
+
+	// Instance defaults
+	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
+		if b, err := strconv.ParseBool(autoRestart); err == nil {
+			cfg.Instances.DefaultAutoRestart = b
+		}
+	}
+	if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
+		if m, err := strconv.Atoi(maxRestarts); err == nil {
+			cfg.Instances.DefaultMaxRestarts = m
+		}
+	}
+	if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
+		if seconds, err := strconv.Atoi(restartDelay); err == nil {
+			cfg.Instances.DefaultRestartDelay = seconds
+		}
+	}
+	if onDemandStart := os.Getenv("LLAMACTL_DEFAULT_ON_DEMAND_START"); onDemandStart != "" {
+		if b, err := strconv.ParseBool(onDemandStart); err == nil {
+			cfg.Instances.DefaultOnDemandStart = b
+		}
+	}
+	if onDemandTimeout := os.Getenv("LLAMACTL_ON_DEMAND_START_TIMEOUT"); onDemandTimeout != "" {
+		if seconds, err := strconv.Atoi(onDemandTimeout); err == nil {
+			cfg.Instances.OnDemandStartTimeout = seconds
+		}
+	}
+	if timeoutCheckInterval := os.Getenv("LLAMACTL_TIMEOUT_CHECK_INTERVAL"); timeoutCheckInterval != "" {
+		if minutes, err := strconv.Atoi(timeoutCheckInterval); err == nil {
+			cfg.Instances.TimeoutCheckInterval = minutes
+		}
+	}
+	// Auth config
+	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
+		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
+			cfg.Auth.RequireInferenceAuth = b
+		}
+	}
+	if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
+		cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
+	}
+	if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
+		if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
+			cfg.Auth.RequireManagementAuth = b
+		}
+	}
+	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
+		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
+	}
+
+	// Local node config
+	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
+		cfg.LocalNode = localNode
+	}
+
+	// Database config
+	if dbPath := os.Getenv("LLAMACTL_DATABASE_PATH"); dbPath != "" {
+		cfg.Database.Path = dbPath
+	}
+	if maxOpenConns := os.Getenv("LLAMACTL_DATABASE_MAX_OPEN_CONNECTIONS"); maxOpenConns != "" {
+		if m, err := strconv.Atoi(maxOpenConns); err == nil {
+			cfg.Database.MaxOpenConnections = m
+		}
+	}
+	if maxIdleConns := os.Getenv("LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS"); maxIdleConns != "" {
+		if m, err := strconv.Atoi(maxIdleConns); err == nil {
+			cfg.Database.MaxIdleConnections = m
+		}
+	}
+	if connMaxLifetime := os.Getenv("LLAMACTL_DATABASE_CONN_MAX_LIFETIME"); connMaxLifetime != "" {
+		if d, err := time.ParseDuration(connMaxLifetime); err == nil {
+			cfg.Database.ConnMaxLifetime = d
+		}
+	}
+
+	// Log rotation config
+	if logRotationEnabled := os.Getenv("LLAMACTL_LOG_ROTATION_ENABLED"); logRotationEnabled != "" {
+		if b, err := strconv.ParseBool(logRotationEnabled); err == nil {
+			cfg.Instances.LogRotationEnabled = b
+		}
+	}
+	if logRotationMaxSize := os.Getenv("LLAMACTL_LOG_ROTATION_MAX_SIZE"); logRotationMaxSize != "" {
+		if m, err := strconv.Atoi(logRotationMaxSize); err == nil {
+			cfg.Instances.LogRotationMaxSize = m
+		}
+	}
+	if logRotationCompress := os.Getenv("LLAMACTL_LOG_ROTATION_COMPRESS"); logRotationCompress != "" {
+		if b, err := strconv.ParseBool(logRotationCompress); err == nil {
+			cfg.Instances.LogRotationCompress = b
+		}
+	}
+}
+
+// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
+func ParsePortRange(s string) [2]int {
+	var parts []string
+
+	// Try both separators
+	if strings.Contains(s, "-") {
+		parts = strings.Split(s, "-")
+	} else if strings.Contains(s, ",") {
+		parts = strings.Split(s, ",")
+	}
+
+	// Parse the two parts
+	if len(parts) == 2 {
+		start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
+		end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
+		if err1 == nil && err2 == nil {
+			return [2]int{start, end}
+		}
+	}
+
+	return [2]int{0, 0} // Invalid format
+}
+
+// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
+// and populates the provided environment map
+func parseEnvVars(envString string, envMap map[string]string) {
+	if envString == "" {
+		return
+	}
+	for _, envPair := range strings.Split(envString, ",") {
+		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+}
+
+// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
+// and populates the provided environment map
+func parseHeaders(envString string, envMap map[string]string) {
+	if envString == "" {
+		return
+	}
+	for _, envPair := range strings.Split(envString, ";") {
+		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+}
--- a/pkg/config/types.go
+++ b/pkg/config/types.go
@@ -0,0 +1,149 @@
+package config
+
+import "time"
+
+// BackendSettings contains structured backend configuration
+type BackendSettings struct {
+	Command         string            `yaml:"command" json:"command"`
+	Args            []string          `yaml:"args" json:"args"`
+	Environment     map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
+	Docker          *DockerSettings   `yaml:"docker,omitempty" json:"docker,omitempty"`
+	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
+}
+
+// DockerSettings contains Docker-specific configuration
+type DockerSettings struct {
+	Enabled     bool              `yaml:"enabled" json:"enabled"`
+	Image       string            `yaml:"image" json:"image"`
+	Args        []string          `yaml:"args" json:"args"`
+	Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"`
+}
+
+// BackendConfig contains backend executable configurations
+type BackendConfig struct {
+	LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"`
+	VLLM     BackendSettings `yaml:"vllm" json:"vllm"`
+	MLX      BackendSettings `yaml:"mlx" json:"mlx"`
+}
+
+// AppConfig represents the configuration for llamactl
+type AppConfig struct {
+	Server    ServerConfig          `yaml:"server" json:"server"`
+	Backends  BackendConfig         `yaml:"backends" json:"backends"`
+	Instances InstancesConfig       `yaml:"instances" json:"instances"`
+	Database  DatabaseConfig        `yaml:"database" json:"database"`
+	Auth      AuthConfig            `yaml:"auth" json:"auth"`
+	LocalNode string                `yaml:"local_node,omitempty" json:"local_node,omitempty"`
+	Nodes     map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"`
+
+	// Directory where all llamactl data will be stored (database, instances, logs, etc.)
+	DataDir string `yaml:"data_dir" json:"data_dir"`
+
+	Version    string `yaml:"-" json:"version"`
+	CommitHash string `yaml:"-" json:"commit_hash"`
+	BuildTime  string `yaml:"-" json:"build_time"`
+}
+
+// ServerConfig contains HTTP server configuration
+type ServerConfig struct {
+	// Server host to bind to
+	Host string `yaml:"host" json:"host"`
+
+	// Server port to bind to
+	Port int `yaml:"port" json:"port"`
+
+	// Allowed origins for CORS (e.g., "http://localhost:3000")
+	AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"`
+
+	// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
+	AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"`
+
+	// Enable Swagger UI for API documentation
+	EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"`
+
+	// Response headers to send with responses
+	ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"`
+}
+
+// DatabaseConfig contains database configuration settings
+type DatabaseConfig struct {
+	// Database file path (relative to the top-level data_dir or absolute)
+	Path string `yaml:"path" json:"path"`
+
+	// Connection settings
+	MaxOpenConnections int           `yaml:"max_open_connections" json:"max_open_connections"`
+	MaxIdleConnections int           `yaml:"max_idle_connections" json:"max_idle_connections"`
+	ConnMaxLifetime    time.Duration `yaml:"connection_max_lifetime" json:"connection_max_lifetime" swaggertype:"string" example:"1h"`
+}
+
+// InstancesConfig contains instance management configuration
+type InstancesConfig struct {
+	// Port range for instances (e.g., 8000,9000)
+	PortRange [2]int `yaml:"port_range" json:"port_range"`
+
+	// Instance config directory override (relative to data_dir if not absolute)
+	InstancesDir string `yaml:"configs_dir" json:"configs_dir"`
+
+	// Automatically create the data directory if it doesn't exist
+	AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"`
+
+	// Maximum number of instances that can be created
+	MaxInstances int `yaml:"max_instances" json:"max_instances"`
+
+	// Maximum number of instances that can be running at the same time
+	MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"`
+
+	// Enable LRU eviction for instance logs
+	EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"`
+
+	// Default auto-restart setting for new instances
+	DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"`
+
+	// Default max restarts for new instances
+	DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"`
+
+	// Default restart delay for new instances (in seconds)
+	DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"`
+
+	// Default on-demand start setting for new instances
+	DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"`
+
+	// How long to wait for an instance to start on demand (in seconds)
+	OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"`
+
+	// Interval for checking instance timeouts (in minutes)
+	TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"`
+
+	// Logs directory override (relative to data_dir if not absolute)
+	LogsDir string `yaml:"logs_dir" json:"logs_dir"`
+
+	// Log rotation enabled
+	LogRotationEnabled bool `yaml:"log_rotation_enabled" default:"true"`
+
+	// Maximum log file size in MB before rotation
+	LogRotationMaxSize int `yaml:"log_rotation_max_size" default:"100"`
+
+	// Whether to compress rotated log files
+	LogRotationCompress bool `yaml:"log_rotation_compress" default:"false"`
+}
+
+// AuthConfig contains authentication settings
+type AuthConfig struct {
+
+	// Require authentication for OpenAI compatible inference endpoints
+	RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"`
+
+	// List of keys for OpenAI compatible inference endpoints
+	InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"`
+
+	// Require authentication for management endpoints
+	RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"`
+
+	// List of keys for management endpoints
+	ManagementKeys []string `yaml:"management_keys" json:"management_keys"`
+}
+
+type NodeConfig struct {
+	Address string `yaml:"address" json:"address"`
+	APIKey  string `yaml:"api_key,omitempty" json:"api_key,omitempty"`
+}
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -3,10 +3,11 @@ package instance
 import (
 	"encoding/json"
 	"fmt"
-	"llamactl/pkg/config"
 	"log"
 	"net/http"
 	"time"
+
+	"llamactl/pkg/config"
 )

 // Instance represents a running instance of llama server
@@ -68,7 +69,16 @@ func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusCha

 	// Only create logger, proxy, and process for local instances
 	if !instance.IsRemote() {
-		instance.logger = newLogger(name, globalInstanceSettings.LogsDir)
+		logRotationConfig := &LogRotationConfig{
+			Enabled:  globalInstanceSettings.LogRotationEnabled,
+			MaxSize:  globalInstanceSettings.LogRotationMaxSize,
+			Compress: globalInstanceSettings.LogRotationCompress,
+		}
+		instance.logger = newLogger(
+			name,
+			globalInstanceSettings.LogsDir,
+			logRotationConfig,
+		)
 		instance.process = newProcess(instance)
 	}

--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -27,8 +27,8 @@ func TestNewInstance(t *testing.T) {
 			},
 		},
 		Instances: config.InstancesConfig{
-			LogsDir:             "/tmp/test",
 			DefaultAutoRestart:  true,
+			LogsDir:             "/tmp/test",
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
@@ -120,8 +120,8 @@ func TestSetOptions(t *testing.T) {
 			},
 		},
 		Instances: config.InstancesConfig{
-			LogsDir:             "/tmp/test",
 			DefaultAutoRestart:  true,
+			LogsDir:             "/tmp/test",
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
--- a/pkg/instance/logger.go
+++ b/pkg/instance/logger.go
@@ -7,66 +7,117 @@ import (
 	"os"
 	"strings"
 	"sync"
-	"sync/atomic"
 	"time"
+
+	timber "github.com/DeRuina/timberjack"
 )

+// LogRotationConfig contains log rotation settings for instances
+type LogRotationConfig struct {
+	Enabled  bool
+	MaxSize  int
+	Compress bool
+}
+
 type logger struct {
 	name        string
 	logDir      string
-	logFile     atomic.Pointer[os.File]
+	logFile     *timber.Logger
 	logFilePath string
 	mu          sync.RWMutex
+	cfg         *LogRotationConfig
 }

-func newLogger(name string, logDir string) *logger {
+func newLogger(name, logDir string, cfg *LogRotationConfig) *logger {
 	return &logger{
 		name:   name,
 		logDir: logDir,
+		cfg:    cfg,
 	}
 }

-// create creates and opens the log files for stdout and stderr
-func (i *logger) create() error {
-	i.mu.Lock()
-	defer i.mu.Unlock()
+func (l *logger) create() error {
+	l.mu.Lock()
+	defer l.mu.Unlock()

-	if i.logDir == "" {
-		return fmt.Errorf("logDir is empty for instance %s", i.name)
+	if l.logDir == "" {
+		return fmt.Errorf("logDir empty for instance %s", l.name)
 	}

-	// Set up instance logs
-	logPath := i.logDir + "/" + i.name + ".log"
-
-	i.logFilePath = logPath
-	if err := os.MkdirAll(i.logDir, 0755); err != nil {
+	if err := os.MkdirAll(l.logDir, 0755); err != nil {
 		return fmt.Errorf("failed to create log directory: %w", err)
 	}

-	logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
-	if err != nil {
-		return fmt.Errorf("failed to create stdout log file: %w", err)
+	logPath := fmt.Sprintf("%s/%s.log", l.logDir, l.name)
+	l.logFilePath = logPath
+
+	// Build the timber logger
+	t := &timber.Logger{
+		Filename:   logPath,
+		MaxSize:    l.cfg.MaxSize,
+		MaxBackups: 0, // No limit on backups
+		// Compression: "gzip" if Compress is true, else "none"
+		Compression: func() string {
+			if l.cfg.Compress {
+				return "gzip"
+			}
+			return "none"
+		}(),
+		FileMode:  0644,
+		LocalTime: true,
 	}

-	i.logFile.Store(logFile)
+	// If rotation is disabled, set MaxSize to 0 so no rotation occurs
+	if !l.cfg.Enabled {
+		t.MaxSize = 0
+	}

-	// Write a startup marker to both files
-	timestamp := time.Now().Format("2006-01-02 15:04:05")
-	fmt.Fprintf(logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
+	l.logFile = t
+
+	// Write a startup marker
+	ts := time.Now().Format("2006-01-02 15:04:05")
+	fmt.Fprintf(t, "\n=== Instance %s started at %s ===\n", l.name, ts)

 	return nil
 }

-// getLogs retrieves the last n lines of logs from the instance
-func (i *logger) getLogs(num_lines int) (string, error) {
-	i.mu.RLock()
-	defer i.mu.RUnlock()
+func (l *logger) readOutput(rc io.ReadCloser) {
+	defer rc.Close()
+	scanner := bufio.NewScanner(rc)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if lg := l.logFile; lg != nil {
+			fmt.Fprintln(lg, line)
+		}
+	}
+}

-	if i.logFilePath == "" {
-		return "", fmt.Errorf("log file not created for instance %s", i.name)
+func (l *logger) close() {
+	l.mu.Lock()
+	defer l.mu.Unlock()
+
+	lg := l.logFile
+	if lg == nil {
+		return
 	}

-	file, err := os.Open(i.logFilePath)
+	ts := time.Now().Format("2006-01-02 15:04:05")
+	fmt.Fprintf(lg, "=== Instance %s stopped at %s ===\n\n", l.name, ts)
+
+	_ = lg.Close()
+	l.logFile = nil
+}
+
+// getLogs retrieves the last n lines of logs from the instance
+func (l *logger) getLogs(num_lines int) (string, error) {
+	l.mu.RLock()
+	defer l.mu.RUnlock()
+
+	if l.logFilePath == "" {
+		return "", fmt.Errorf("log file not created for instance %s", l.name)
+	}
+
+	file, err := os.Open(l.logFilePath)
 	if err != nil {
 		return "", fmt.Errorf("failed to open log file: %w", err)
 	}
@@ -97,31 +148,3 @@ func (i *logger) getLogs(num_lines int) (string, error) {

 	return strings.Join(lines[start:], "\n"), nil
 }
-
-// close closes the log files
-func (i *logger) close() {
-	i.mu.Lock()
-	defer i.mu.Unlock()
-
-	logFile := i.logFile.Swap(nil)
-	if logFile != nil {
-		timestamp := time.Now().Format("2006-01-02 15:04:05")
-		fmt.Fprintf(logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
-		logFile.Sync() // Ensure all buffered data is written to disk
-		logFile.Close()
-	}
-}
-
-// readOutput reads from the given reader and writes lines to the log file
-func (i *logger) readOutput(reader io.ReadCloser) {
-	defer reader.Close()
-
-	scanner := bufio.NewScanner(reader)
-	for scanner.Scan() {
-		line := scanner.Text()
-		// Use atomic load to avoid lock contention on every line
-		if logFile := i.logFile.Load(); logFile != nil {
-			fmt.Fprintln(logFile, line)
-		}
-	}
-}
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -203,11 +203,11 @@ func createTestAppConfig(instancesDir string) *config.AppConfig {
 		Instances: config.InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
 			InstancesDir:         instancesDir,
-			LogsDir:              instancesDir,
 			MaxInstances:         10,
 			MaxRunningInstances:  10,
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
+			LogsDir:              instancesDir,
 			DefaultRestartDelay:  5,
 			TimeoutCheckInterval: 5,
 		},
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -26,9 +26,6 @@ func SetupRouter(handler *Handler) *chi.Mux {
 		MaxAge:           300,
 	}))

-	// Add API authentication middleware
-	authMiddleware := NewAPIAuthMiddleware(handler.cfg.Auth, handler.authStore)
-
 	if handler.cfg.Server.EnableSwagger {
 		r.Get("/swagger/*", httpSwagger.Handler(
 			httpSwagger.URL("/swagger/doc.json"),
@@ -38,8 +35,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
 	// Define routes
 	r.Route("/api/v1", func(r chi.Router) {

-		if authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
-			r.Use(authMiddleware.ManagementAuthMiddleware())
+		if handler.authMiddleware != nil && handler.cfg.Auth.RequireManagementAuth {
+			r.Use(handler.authMiddleware.ManagementAuthMiddleware())
 		}

 		r.Get("/version", handler.VersionHandler())
@@ -107,8 +104,8 @@ func SetupRouter(handler *Handler) *chi.Mux {

 	r.Route("/v1", func(r chi.Router) {

-		if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
-			r.Use(authMiddleware.InferenceAuthMiddleware())
+		if handler.authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
+			r.Use(handler.authMiddleware.InferenceAuthMiddleware())
 		}

 		r.Get("/models", handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
@@ -135,8 +132,8 @@ func SetupRouter(handler *Handler) *chi.Mux {
 		// Private Routes
 		r.Group(func(r chi.Router) {

-			if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
-				r.Use(authMiddleware.InferenceAuthMiddleware())
+			if handler.authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
+				r.Use(handler.authMiddleware.InferenceAuthMiddleware())
 			}

 			// This handler auto starts the server if it's not running
--- a/test_client.py
+++ b/test_client.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""
+Simple Python script to interact with local LLM server's OpenAI-compatible API
+"""
+
+import requests
+import json
+import sys
+
+# Local LLM server configuration
+BASE_URL = "http://localhost:8080"
+API_KEY = None
+MODEL_NAME = None
+
+def get_models():
+    """Fetch available models from /v1/models endpoint"""
+    headers = {}
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+
+    try:
+        response = requests.get(f"{BASE_URL}/v1/models", headers=headers, timeout=10)
+        response.raise_for_status()
+        return response.json()["data"]
+    except Exception as e:
+        print(f"Error fetching models: {e}")
+        return []
+
+def send_message(message):
+    """
+    Send a message to local LLM server API
+
+    Args:
+        message (str): The message to send
+
+    Returns:
+        str: The AI response or error message
+    """
+
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+
+    data = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": message
+            }
+        ],
+        "temperature": 0.7,
+        "max_tokens": 1000,
+        "stream": False,
+    }
+    
+    response = requests.post(f"{BASE_URL}/v1/chat/completions", headers=headers, json=data, timeout=60)
+    response.raise_for_status()
+    return response.json()["choices"][0]["message"]["content"]
+
+def interactive_mode():
+    """Run in interactive mode for continuous conversation"""
+    global BASE_URL, API_KEY, MODEL_NAME
+
+    # Get base URL
+    url_input = input(f"Base URL [{BASE_URL}]: ").strip()
+    if url_input:
+        BASE_URL = url_input
+
+    # Get API key (optional)
+    key_input = input("API key (optional): ").strip()
+    if key_input:
+        API_KEY = key_input
+
+    # Fetch and select model
+    models = get_models()
+    if not models:
+        print("No models available. Exiting.")
+        return
+
+    print("\nAvailable models:")
+    for i, m in enumerate(models, 1):
+        print(f"{i}. {m['id']}")
+
+    while True:
+        try:
+            selection = int(input("\nSelect model: "))
+            if 1 <= selection <= len(models):
+                MODEL_NAME = models[selection - 1]["id"]
+                break
+            print(f"Please enter a number between 1 and {len(models)}")
+        except ValueError:
+            print("Please enter a valid number")
+
+    print(f"\nUsing model: {MODEL_NAME}")
+    print("Type 'quit' or 'exit' to stop")
+    print("-" * 40)
+    
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+            
+            if user_input.lower() in ['quit', 'exit', 'q']:
+                print("Goodbye!")
+                break
+                
+            if not user_input:
+                continue
+                
+            print("AI: ", end="", flush=True)
+            response = send_message(user_input)
+            print(response)
+            
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except EOFError:
+            print("\nGoodbye!")
+            break
+
+def main():
+    """Main function"""
+    if len(sys.argv) > 1:
+        # Single message mode
+        message = " ".join(sys.argv[1:])
+        response = send_message(message)
+        print(response)
+    else:
+        # Interactive mode
+        interactive_mode()
+
+if __name__ == "__main__":
+    main()
--- a/webui/package-lock.json
+++ b/webui/package-lock.json
--- a/webui/package.json
+++ b/webui/package.json
@@ -27,20 +27,20 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "date-fns": "^4.1.0",
-    "lucide-react": "^0.555.0",
+    "lucide-react": "^0.561.0",
    "react": "^19.2.0",
    "react-dom": "^19.2.0",
    "sonner": "^2.0.7",
    "tailwind-merge": "^3.4.0",
    "tailwindcss": "^4.1.11",
-    "zod": "^4.1.12"
+    "zod": "^4.2.0"
  },
  "devDependencies": {
    "@eslint/js": "^9.39.1",
    "@testing-library/jest-dom": "^6.9.1",
    "@testing-library/react": "^16.3.0",
    "@testing-library/user-event": "^14.6.1",
-    "@types/node": "^24.10.1",
+    "@types/node": "^25.0.2",
    "@types/react": "^19.2.4",
    "@types/react-dom": "^19.2.3",
    "@vitejs/plugin-react": "^5.1.1",
@@ -49,11 +49,11 @@
    "eslint-plugin-react": "^7.37.5",
    "eslint-plugin-react-hooks": "^7.0.1",
    "eslint-plugin-react-refresh": "^0.4.20",
-    "jsdom": "^27.2.0",
+    "jsdom": "^27.3.0",
    "tw-animate-css": "^1.4.0",
    "typescript": "^5.9.3",
-    "typescript-eslint": "^8.48.0",
-    "vite": "^7.2.2",
+    "typescript-eslint": "^8.50.0",
+    "vite": "^7.3.0",
    "vitest": "^4.0.8"
  }
 }
Author	SHA1	Message	Date
Matúš Námešný	ec84a7d331	Merge pull request #112 from lordmathis/fix/auth-middleware fix: Remove duplicate auth middleware init	2025-12-17 19:09:12 +01:00
LordMathis	b45219a01e	Reuse handler auth middleware	2025-12-17 19:06:04 +01:00
Matúš Námešný	463bb561e1	Merge pull request #111 from lordmathis/fix/cgo-enabled-build fix: Add multiplatform CGO_ENABLED=1 build	2025-12-17 14:38:27 +01:00
Matúš Námešný	ebdb9143c0	Remove separate windows build step	2025-12-17 14:32:10 +01:00
Matúš Námešný	4269d04381	Update release.yaml	2025-12-17 14:25:50 +01:00
Matúš Námešný	c734329a62	Merge pull request #109 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-62cd70be13 chore: bump the npm-production group in /webui with 2 updates	2025-12-15 23:50:27 +01:00
Matúš Námešný	15fcf7c377	Merge pull request #110 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-8fdc2c429d chore: bump the npm-development group in /webui with 3 updates	2025-12-15 23:48:42 +01:00
dependabot[bot]	795f530956	chore: bump the npm-development group in /webui with 3 updates Bumps the npm-development group in /webui with 3 updates: [@types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node), [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) and [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite). Updates `@types/node` from 24.10.1 to 25.0.2 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/node) Updates `typescript-eslint` from 8.49.0 to 8.50.0 - [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases) - [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md) - [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.50.0/packages/typescript-eslint) Updates `vite` from 7.2.2 to 7.3.0 - [Release notes](https://github.com/vitejs/vite/releases) - [Changelog](https://github.com/vitejs/vite/blob/v7.3.0/packages/vite/CHANGELOG.md) - [Commits](https://github.com/vitejs/vite/commits/v7.3.0/packages/vite) --- updated-dependencies: - dependency-name: "@types/node" dependency-version: 25.0.2 dependency-type: direct:development update-type: version-update:semver-major dependency-group: npm-development - dependency-name: typescript-eslint dependency-version: 8.50.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development - dependency-name: vite dependency-version: 7.3.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-15 22:06:41 +00:00
dependabot[bot]	4507358310	chore: bump the npm-production group in /webui with 2 updates Bumps the npm-production group in /webui with 2 updates: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react) and [zod](https://github.com/colinhacks/zod). Updates `lucide-react` from 0.560.0 to 0.561.0 - [Release notes](https://github.com/lucide-icons/lucide/releases) - [Commits](https://github.com/lucide-icons/lucide/commits/0.561.0/packages/lucide-react) Updates `zod` from 4.1.12 to 4.2.0 - [Release notes](https://github.com/colinhacks/zod/releases) - [Commits](https://github.com/colinhacks/zod/compare/v4.1.12...v4.2.0) --- updated-dependencies: - dependency-name: lucide-react dependency-version: 0.561.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: npm-production - dependency-name: zod dependency-version: 4.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: npm-production ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-15 22:06:01 +00:00
Matúš Námešný	f3c02b4939	Merge pull request #108 from lordmathis/refactor/config refactor: Split large config file	2025-12-13 14:27:25 +01:00
LordMathis	0a85409deb	Split large config file	2025-12-13 13:50:59 +01:00
Matúš Námešný	22fd295250	Merge pull request #107 from lordmathis/feat/logrotate feat: Add log rotation for instance logs	2025-12-13 13:30:20 +01:00
LordMathis	c0cecdd377	Clean up logger	2025-12-13 13:18:30 +01:00
LordMathis	4d57b37a5d	Remove verbose _mb suffix	2025-12-13 13:06:22 +01:00
LordMathis	c13b71d07f	Document new log rotation config options	2025-12-13 13:02:22 +01:00
LordMathis	406a711682	Move LogRotationConfig to logger package	2025-12-13 12:48:50 +01:00
LordMathis	0b3d654945	Simplify logging config	2025-12-13 12:48:50 +01:00
LordMathis	e2a49402d6	Implement instance log rotation	2025-12-13 12:48:50 +01:00
Matúš Námešný	48836c9c12	Merge pull request #105 from lordmathis/dependabot/npm_and_yarn/webui/npm-production-3fe24f4500 chore: bump lucide-react from 0.555.0 to 0.556.0 in /webui in the npm-production group	2025-12-12 10:43:12 +01:00
Matúš Námešný	4200b8eed9	Merge pull request #104 from lordmathis/dependabot/go_modules/go-dependencies-f180a085e8 chore: bump golang.org/x/crypto from 0.45.0 to 0.46.0 in the go-dependencies group	2025-12-11 18:51:40 +01:00
dependabot[bot]	9a7ae87df8	chore: bump lucide-react in /webui in the npm-production group Bumps the npm-production group in /webui with 1 update: [lucide-react](https://github.com/lucide-icons/lucide/tree/HEAD/packages/lucide-react). Updates `lucide-react` from 0.555.0 to 0.556.0 - [Release notes](https://github.com/lucide-icons/lucide/releases) - [Commits](https://github.com/lucide-icons/lucide/commits/0.556.0/packages/lucide-react) --- updated-dependencies: - dependency-name: lucide-react dependency-version: 0.556.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: npm-production ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-11 17:17:18 +00:00
Matúš Námešný	e54c495528	Merge pull request #106 from lordmathis/dependabot/npm_and_yarn/webui/npm-development-808d3127cd chore: bump the npm-development group in /webui with 2 updates	2025-12-11 18:12:15 +01:00
dependabot[bot]	83006968ca	chore: bump the npm-development group in /webui with 2 updates Bumps the npm-development group in /webui with 2 updates: [jsdom](https://github.com/jsdom/jsdom) and [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint). Updates `jsdom` from 27.2.0 to 27.3.0 - [Release notes](https://github.com/jsdom/jsdom/releases) - [Changelog](https://github.com/jsdom/jsdom/blob/main/Changelog.md) - [Commits](https://github.com/jsdom/jsdom/compare/27.2.0...27.3.0) Updates `typescript-eslint` from 8.48.0 to 8.49.0 - [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases) - [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md) - [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.49.0/packages/typescript-eslint) --- updated-dependencies: - dependency-name: jsdom dependency-version: 27.3.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development - dependency-name: typescript-eslint dependency-version: 8.49.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: npm-development ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-08 21:31:36 +00:00
dependabot[bot]	c8d9c6907c	chore: bump golang.org/x/crypto in the go-dependencies group Bumps the go-dependencies group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto). Updates `golang.org/x/crypto` from 0.45.0 to 0.46.0 - [Commits](https://github.com/golang/crypto/compare/v0.45.0...v0.46.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-version: 0.46.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: go-dependencies ... Signed-off-by: dependabot[bot] <support@github.com>	2025-12-08 21:30:38 +00:00
Matúš Námešný	c776785f30	Merge pull request #103 from lordmathis/docs/api-keys docs: Improve API key management documentation	2025-12-08 19:23:39 +01:00
LordMathis	1cfbd42eda	Update swagger docs	2025-12-08 19:16:02 +01:00
LordMathis	8fee27054d	Update docs for API key management	2025-12-08 19:15:42 +01:00
Matúš Námešný	fd33837026	Merge pull request #101 from lordmathis/feat/api-key-mgmt feat: Add inference api key management	2025-12-08 18:49:49 +01:00
LordMathis	3c4ebf7403	Addsimple python LLM test client	2025-12-08 18:44:28 +01:00
LordMathis	b7a0f7e3d8	Unhide migrated directory	2025-12-08 18:08:22 +01:00
LordMathis	d5b68a900f	Add .migrated directory for migrated json files	2025-12-08 18:06:15 +01:00