Merge pull request #7 from lordmathis/feat/api-key-auth

Feature: Add API key auth for OpenAI compatible endpoints and llamactl management API endpoints on backend
2025-11-06 09:04:27 +00:00 · 2025-07-30 21:36:40 +02:00
parent 72ba008d1e 8e8056f071
commit 89f90697ef
9 changed files with 879 additions and 19 deletions
--- a/README.md
+++ b/README.md
@@ -54,12 +54,14 @@ go build -o llamactl ./cmd/server
 ## Configuration
 llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
 1. Hardcoded defaults
 2. Configuration file
 3. Environment variables
 ### Configuration Files
 Configuration files are searched in the following locations:
@@ -76,19 +78,35 @@ Configuration files are searched in the following locations:
 You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable
 ## API Key Authentication
 llamactl now supports API Key authentication for both management and inference (OpenAI-compatible) endpoints. The are separate keys for management and inference APIs. Management keys grant full access; inference keys grant access to OpenAI-compatible endpoints
 **How to Use:**
 - Pass your API key in requests using one of:
  - `Authorization: Bearer <key>` header
  - `X-API-Key: <key>` header
  - `api_key=<key>` query parameter
 **Auto-generated keys**: If no keys are set and authentication is required, a key will be generated and printed to the terminal at startup. For production, set your own keys in config or environment variables.
 ### Configuration Options
 #### Server Configuration
 ```yaml
 server:
-  host: ""              # Server host to bind to (default: "")
+  host: "0.0.0.0"         # Server host to bind to (default: "0.0.0.0")
-  port: 8080             # Server port to bind to (default: 8080)
+  port: 8080              # Server port to bind to (default: 8080)
  allowed_origins: ["*"]  # CORS allowed origins (default: ["*"])
  enable_swagger: false   # Enable Swagger UI (default: false)
 ```
 **Environment Variables:**
 - `LLAMACTL_HOST` - Server host
 - `LLAMACTL_PORT` - Server port
 - `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
 - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
 #### Instance Configuration
@@ -112,6 +130,22 @@ instances:
 - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
 - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
 #### Auth Configuration
 ```yaml
 auth:
  require_inference_auth: true           # Require API key for OpenAI endpoints (default: true)
  inference_keys: []                     # List of valid inference API keys
  require_management_auth: true          # Require API key for management endpoints (default: true)
  management_keys: []                    # List of valid management API keys
 ```
 **Environment Variables:**
 - `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
 - `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
 - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
 - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
 ### Example Configuration
 ```yaml
@@ -127,6 +161,12 @@ instances:
  default_auto_restart: true
  default_max_restarts: 5
  default_restart_delay: 10
 auth:
  require_inference_auth: true
  inference_keys: ["sk-inference-abc123"]
  require_management_auth: true
  management_keys: ["sk-management-xyz456"]
 ```
 ## Usage
--- a/docs/docs.go
+++ b/docs/docs.go
@@ -21,6 +21,11 @@ const docTemplate = `{
    "paths": {
        "/instances": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns a list of all instances managed by the server",
                "tags": [
                    "instances"
@@ -47,6 +52,11 @@ const docTemplate = `{
        },
        "/instances/{name}": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the details of a specific instance by name",
                "tags": [
                    "instances"
@@ -83,6 +93,11 @@ const docTemplate = `{
                }
            },
            "put": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Updates the configuration of a specific instance by name",
                "consumes": [
                    "application/json"
@@ -131,6 +146,11 @@ const docTemplate = `{
                }
            },
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Creates a new instance with the provided configuration options",
                "consumes": [
                    "application/json"
@@ -179,6 +199,11 @@ const docTemplate = `{
                }
            },
            "delete": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Stops and removes a specific instance by name",
                "tags": [
                    "instances"
@@ -214,6 +239,11 @@ const docTemplate = `{
        },
        "/instances/{name}/logs": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the logs from a specific instance by name with optional line limit",
                "tags": [
                    "instances"
@@ -258,6 +288,11 @@ const docTemplate = `{
        },
        "/instances/{name}/proxy": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                "tags": [
                    "instances"
@@ -297,6 +332,11 @@ const docTemplate = `{
                }
            },
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                "tags": [
                    "instances"
@@ -338,6 +378,11 @@ const docTemplate = `{
        },
        "/instances/{name}/restart": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Restarts a specific instance by name",
                "tags": [
                    "instances"
@@ -376,6 +421,11 @@ const docTemplate = `{
        },
        "/instances/{name}/start": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Starts a specific instance by name",
                "tags": [
                    "instances"
@@ -414,6 +464,11 @@ const docTemplate = `{
        },
        "/instances/{name}/stop": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Stops a specific instance by name",
                "tags": [
                    "instances"
@@ -452,6 +507,11 @@ const docTemplate = `{
        },
        "/server/devices": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns a list of available devices for the llama server",
                "tags": [
                    "server"
@@ -475,6 +535,11 @@ const docTemplate = `{
        },
        "/server/help": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the help text for the llama server command",
                "tags": [
                    "server"
@@ -498,6 +563,11 @@ const docTemplate = `{
        },
        "/server/version": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the version of the llama server command",
                "tags": [
                    "server"
@@ -521,7 +591,12 @@ const docTemplate = `{
        },
        "/v1/": {
            "post": {
-                "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body",
+                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the ` + "`" + `Authorization` + "`" + ` header.",
                "consumes": [
                    "application/json"
                ],
@@ -550,6 +625,11 @@ const docTemplate = `{
        },
        "/v1/models": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns a list of instances in a format compatible with OpenAI API",
                "tags": [
                    "openai"
--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -14,6 +14,11 @@
    "paths": {
        "/instances": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns a list of all instances managed by the server",
                "tags": [
                    "instances"
@@ -40,6 +45,11 @@
        },
        "/instances/{name}": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the details of a specific instance by name",
                "tags": [
                    "instances"
@@ -76,6 +86,11 @@
                }
            },
            "put": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Updates the configuration of a specific instance by name",
                "consumes": [
                    "application/json"
@@ -124,6 +139,11 @@
                }
            },
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Creates a new instance with the provided configuration options",
                "consumes": [
                    "application/json"
@@ -172,6 +192,11 @@
                }
            },
            "delete": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Stops and removes a specific instance by name",
                "tags": [
                    "instances"
@@ -207,6 +232,11 @@
        },
        "/instances/{name}/logs": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the logs from a specific instance by name with optional line limit",
                "tags": [
                    "instances"
@@ -251,6 +281,11 @@
        },
        "/instances/{name}/proxy": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                "tags": [
                    "instances"
@@ -290,6 +325,11 @@
                }
            },
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
                "tags": [
                    "instances"
@@ -331,6 +371,11 @@
        },
        "/instances/{name}/restart": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Restarts a specific instance by name",
                "tags": [
                    "instances"
@@ -369,6 +414,11 @@
        },
        "/instances/{name}/start": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Starts a specific instance by name",
                "tags": [
                    "instances"
@@ -407,6 +457,11 @@
        },
        "/instances/{name}/stop": {
            "post": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Stops a specific instance by name",
                "tags": [
                    "instances"
@@ -445,6 +500,11 @@
        },
        "/server/devices": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns a list of available devices for the llama server",
                "tags": [
                    "server"
@@ -468,6 +528,11 @@
        },
        "/server/help": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the help text for the llama server command",
                "tags": [
                    "server"
@@ -491,6 +556,11 @@
        },
        "/server/version": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns the version of the llama server command",
                "tags": [
                    "server"
@@ -514,7 +584,12 @@
        },
        "/v1/": {
            "post": {
-                "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body",
+                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.",
                "consumes": [
                    "application/json"
                ],
@@ -543,6 +618,11 @@
        },
        "/v1/models": {
            "get": {
                "security": [
                    {
                        "ApiKeyAuth": []
                    }
                ],
                "description": "Returns a list of instances in a format compatible with OpenAI API",
                "tags": [
                    "openai"
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -399,6 +399,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List all instances
      tags:
      - instances
@@ -422,6 +424,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Delete an instance
      tags:
      - instances
@@ -446,6 +450,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get details of a specific instance
      tags:
      - instances
@@ -478,6 +484,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Create and start a new instance
      tags:
      - instances
@@ -510,6 +518,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Update an instance's configuration
      tags:
      - instances
@@ -540,6 +550,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get logs from a specific instance
      tags:
      - instances
@@ -568,6 +580,8 @@ paths:
          description: Instance is not running
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to a specific instance
      tags:
      - instances
@@ -595,6 +609,8 @@ paths:
          description: Instance is not running
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to a specific instance
      tags:
      - instances
@@ -620,6 +636,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Restart a running instance
      tags:
      - instances
@@ -645,6 +663,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Start a stopped instance
      tags:
      - instances
@@ -670,6 +690,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Stop a running instance
      tags:
      - instances
@@ -685,6 +707,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List available devices for llama server
      tags:
      - server
@@ -700,6 +724,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get help for llama server
      tags:
      - server
@@ -715,6 +741,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get version of llama server
      tags:
      - server
@@ -723,7 +751,8 @@ paths:
      consumes:
      - application/json
      description: Handles all POST requests to /v1/*, routing to the appropriate
-        instance based on the request body
+        instance based on the request body. Requires API key authentication via the
        `Authorization` header.
      responses:
        "200":
          description: OpenAI response
@@ -735,6 +764,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: OpenAI-compatible proxy endpoint
      tags:
      - openai
@@ -751,6 +782,8 @@ paths:
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List instances in OpenAI-compatible format
      tags:
      - openai
--- a/pkg/config.go
+++ b/pkg/config.go
@@ -14,6 +14,7 @@ import (
 type Config struct {
 	Server    ServerConfig    `yaml:"server"`
 	Instances InstancesConfig `yaml:"instances"`
 	Auth      AuthConfig      `yaml:"auth"`
 }
 // ServerConfig contains HTTP server configuration
@@ -26,6 +27,9 @@ type ServerConfig struct {
 	// Allowed origins for CORS (e.g., "http://localhost:3000")
 	AllowedOrigins []string `yaml:"allowed_origins"`
 	// Enable Swagger UI for API documentation
 	EnableSwagger bool `yaml:"enable_swagger"`
 }
 // InstancesConfig contains instance management configuration
@@ -52,6 +56,22 @@ type InstancesConfig struct {
 	DefaultRestartDelay int `yaml:"default_restart_delay"`
 }
 // AuthConfig contains authentication settings
 type AuthConfig struct {
 	// Require authentication for OpenAI compatible inference endpoints
 	RequireInferenceAuth bool `yaml:"require_inference_auth"`
 	// List of keys for OpenAI compatible inference endpoints
 	InferenceKeys []string `yaml:"inference_keys"`
 	// Require authentication for management endpoints
 	RequireManagementAuth bool `yaml:"require_management_auth"`
 	// List of keys for management endpoints
 	ManagementKeys []string `yaml:"management_keys"`
 }
 // LoadConfig loads configuration with the following precedence:
 // 1. Hardcoded defaults
 // 2. Config file
@@ -63,6 +83,7 @@ func LoadConfig(configPath string) (Config, error) {
 			Host:           "0.0.0.0",
 			Port:           8080,
 			AllowedOrigins: []string{"*"}, // Default to allow all origins
 			EnableSwagger:  false,
 		},
 		Instances: InstancesConfig{
 			PortRange:           [2]int{8000, 9000},
@@ -73,6 +94,12 @@ func LoadConfig(configPath string) (Config, error) {
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
 		Auth: AuthConfig{
 			RequireInferenceAuth:  true,
 			InferenceKeys:         []string{},
 			RequireManagementAuth: true,
 			ManagementKeys:        []string{},
 		},
 	}
 	// 2. Load from config file
@@ -121,6 +148,14 @@ func loadEnvVars(cfg *Config) {
 			cfg.Server.Port = p
 		}
 	}
 	if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
 		cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
 	}
 	if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
 		if b, err := strconv.ParseBool(enableSwagger); err == nil {
 			cfg.Server.EnableSwagger = b
 		}
 	}
 	// Instance config
 	if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
@@ -154,6 +189,23 @@ func loadEnvVars(cfg *Config) {
 			cfg.Instances.DefaultRestartDelay = seconds
 		}
 	}
 	// Auth config
 	if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
 		if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
 			cfg.Auth.RequireInferenceAuth = b
 		}
 	}
 	if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
 		cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
 	}
 	if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
 		if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
 			cfg.Auth.RequireManagementAuth = b
 		}
 	}
 	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
 		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
 	}
 }
 // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
--- a/pkg/handlers.go
+++ b/pkg/handlers.go
@@ -29,6 +29,7 @@ func NewHandler(im InstanceManager, config Config) *Handler {
 // @Summary Get help for llama server
 // @Description Returns the help text for the llama server command
 // @Tags server
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Help text"
 // @Failure 500 {string} string "Internal Server Error"
@@ -50,6 +51,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
 // @Summary Get version of llama server
 // @Description Returns the version of the llama server command
 // @Tags server
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
@@ -71,6 +73,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
 // @Summary List available devices for llama server
 // @Description Returns a list of available devices for the llama server
 // @Tags server
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "List of devices"
 // @Failure 500 {string} string "Internal Server Error"
@@ -92,6 +95,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
 // @Summary List all instances
 // @Description Returns a list of all instances managed by the server
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {array} Instance "List of instances"
 // @Failure 500 {string} string "Internal Server Error"
@@ -116,6 +120,7 @@ func (h *Handler) ListInstances() http.HandlerFunc {
 // @Summary Create and start a new instance
 // @Description Creates a new instance with the provided configuration options
 // @Tags instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
@@ -157,6 +162,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
 // @Summary Get details of a specific instance
 // @Description Returns the details of a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} Instance "Instance details"
@@ -189,6 +195,7 @@ func (h *Handler) GetInstance() http.HandlerFunc {
 // @Summary Update an instance's configuration
 // @Description Updates the configuration of a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
@@ -229,6 +236,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
 // @Summary Start a stopped instance
 // @Description Starts a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} Instance "Started instance details"
@@ -261,6 +269,7 @@ func (h *Handler) StartInstance() http.HandlerFunc {
 // @Summary Stop a running instance
 // @Description Stops a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} Instance "Stopped instance details"
@@ -293,6 +302,7 @@ func (h *Handler) StopInstance() http.HandlerFunc {
 // @Summary Restart a running instance
 // @Description Restarts a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} Instance "Restarted instance details"
@@ -325,6 +335,7 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
 // @Summary Delete an instance
 // @Description Stops and removes a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 204 "No Content"
 // @Failure 400 {string} string "Invalid name format"
@@ -351,6 +362,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
 // @Summary Get logs from a specific instance
 // @Description Returns the logs from a specific instance by name with optional line limit
 // @Tags instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Param lines query string false "Number of lines to retrieve (default: all lines)"
 // @Produces text/plain
@@ -398,6 +410,7 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
 // @Summary Proxy requests to a specific instance
 // @Description Forwards HTTP requests to the llama-server instance running on a specific port
 // @Tags instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 200 "Request successfully proxied to instance"
 // @Failure 400 {string} string "Invalid name format"
@@ -462,6 +475,7 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
 // @Summary List instances in OpenAI-compatible format
 // @Description Returns a list of instances in a format compatible with OpenAI API
 // @Tags openai
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
 // @Failure 500 {string} string "Internal Server Error"
@@ -499,8 +513,9 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
 // OpenAIProxy godoc
 // @Summary OpenAI-compatible proxy endpoint
-// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body
+// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
 // @Tags openai
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Success 200 "OpenAI response"
--- a/pkg/middleware.go
+++ b/pkg/middleware.go
@@ -0,0 +1,188 @@
 package llamactl
 import (
 	"crypto/rand"
 	"crypto/subtle"
 	"encoding/hex"
 	"fmt"
 	"log"
 	"net/http"
 	"os"
 	"strings"
 )
 type KeyType int
 const (
 	KeyTypeInference KeyType = iota
 	KeyTypeManagement
 )
 type APIAuthMiddleware struct {
 	requireInferenceAuth  bool
 	inferenceKeys         map[string]bool
 	requireManagementAuth bool
 	managementKeys        map[string]bool
 }
 // NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
 func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
 	var generated bool = false
 	inferenceAPIKeys := make(map[string]bool)
 	managementAPIKeys := make(map[string]bool)
 	const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
 	if config.RequireManagementAuth && len(config.ManagementKeys) == 0 {
 		key := generateAPIKey(KeyTypeManagement)
 		managementAPIKeys[key] = true
 		generated = true
 		fmt.Printf("%s\n⚠️  MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
 		fmt.Printf("🔑  Generated Management API Key:\n\n    %s\n\n", key)
 	}
 	for _, key := range config.ManagementKeys {
 		managementAPIKeys[key] = true
 	}
 	if config.RequireInferenceAuth && len(config.InferenceKeys) == 0 {
 		key := generateAPIKey(KeyTypeInference)
 		inferenceAPIKeys[key] = true
 		generated = true
 		fmt.Printf("%s\n⚠️  INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
 		fmt.Printf("🔑  Generated Inference API Key:\n\n    %s\n\n", key)
 	}
 	for _, key := range config.InferenceKeys {
 		inferenceAPIKeys[key] = true
 	}
 	if generated {
 		fmt.Printf("%s\n⚠️  IMPORTANT\n%s\n", banner, banner)
 		fmt.Println("• These keys are auto-generated and will change on restart")
 		fmt.Println("• For production, add explicit keys to your configuration")
 		fmt.Println("• Copy these keys before they disappear from the terminal")
 		fmt.Println(banner)
 	}
 	return &APIAuthMiddleware{
 		requireInferenceAuth:  config.RequireInferenceAuth,
 		inferenceKeys:         inferenceAPIKeys,
 		requireManagementAuth: config.RequireManagementAuth,
 		managementKeys:        managementAPIKeys,
 	}
 }
 // generateAPIKey creates a cryptographically secure API key
 func generateAPIKey(keyType KeyType) string {
 	// Generate 32 random bytes (256 bits)
 	randomBytes := make([]byte, 32)
 	var prefix string
 	switch keyType {
 	case KeyTypeInference:
 		prefix = "sk-inference"
 	case KeyTypeManagement:
 		prefix = "sk-management"
 	default:
 		prefix = "sk-unknown"
 	}
 	if _, err := rand.Read(randomBytes); err != nil {
 		log.Printf("Warning: Failed to generate secure random key, using fallback")
 		// Fallback to a less secure method if crypto/rand fails
 		return fmt.Sprintf("%s-fallback-%d", prefix, os.Getpid())
 	}
 	// Convert to hex and add prefix
 	return fmt.Sprintf("%s-%s", prefix, hex.EncodeToString(randomBytes))
 }
 // AuthMiddleware returns a middleware that checks API keys for the given key type
 func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) http.Handler {
 	return func(next http.Handler) http.Handler {
 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 			if r.Method == "OPTIONS" {
 				next.ServeHTTP(w, r)
 				return
 			}
 			apiKey := a.extractAPIKey(r)
 			if apiKey == "" {
 				a.unauthorized(w, "Missing API key")
 				return
 			}
 			var isValid bool
 			switch keyType {
 			case KeyTypeInference:
 				// Management keys also work for OpenAI endpoints (higher privilege)
 				isValid = a.isValidKey(apiKey, KeyTypeInference) || a.isValidKey(apiKey, KeyTypeManagement)
 			case KeyTypeManagement:
 				isValid = a.isValidKey(apiKey, KeyTypeManagement)
 			default:
 				isValid = false
 			}
 			if !isValid {
 				a.unauthorized(w, "Invalid API key")
 				return
 			}
 			next.ServeHTTP(w, r)
 		})
 	}
 }
 // extractAPIKey extracts the API key from the request
 func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
 	// Check Authorization header: "Bearer sk-..."
 	if auth := r.Header.Get("Authorization"); auth != "" {
 		if after, ok := strings.CutPrefix(auth, "Bearer "); ok {
 			return after
 		}
 	}
 	// Check X-API-Key header
 	if apiKey := r.Header.Get("X-API-Key"); apiKey != "" {
 		return apiKey
 	}
 	// Check query parameter
 	if apiKey := r.URL.Query().Get("api_key"); apiKey != "" {
 		return apiKey
 	}
 	return ""
 }
 // isValidKey checks if the provided API key is valid for the given key type
 func (a *APIAuthMiddleware) isValidKey(providedKey string, keyType KeyType) bool {
 	var validKeys map[string]bool
 	switch keyType {
 	case KeyTypeInference:
 		validKeys = a.inferenceKeys
 	case KeyTypeManagement:
 		validKeys = a.managementKeys
 	default:
 		return false
 	}
 	for validKey := range validKeys {
 		if len(providedKey) == len(validKey) &&
 			subtle.ConstantTimeCompare([]byte(providedKey), []byte(validKey)) == 1 {
 			return true
 		}
 	}
 	return false
 }
 // unauthorized sends an unauthorized response
 func (a *APIAuthMiddleware) unauthorized(w http.ResponseWriter, message string) {
 	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(http.StatusUnauthorized)
 	response := fmt.Sprintf(`{"error": {"message": "%s", "type": "authentication_error"}}`, message)
 	w.Write([]byte(response))
 }
--- a/pkg/middleware_test.go
+++ b/pkg/middleware_test.go
@@ -0,0 +1,354 @@
 package llamactl_test
 import (
 	"net/http"
 	"net/http/httptest"
 	"strings"
 	"testing"
 	llamactl "llamactl/pkg"
 )
 func TestAuthMiddleware(t *testing.T) {
 	tests := []struct {
 		name           string
 		keyType        llamactl.KeyType
 		inferenceKeys  []string
 		managementKeys []string
 		requestKey     string
 		method         string
 		expectedStatus int
 	}{
 		// Valid key tests
 		{
 			name:           "valid inference key for inference",
 			keyType:        llamactl.KeyTypeInference,
 			inferenceKeys:  []string{"sk-inference-valid123"},
 			requestKey:     "sk-inference-valid123",
 			method:         "GET",
 			expectedStatus: http.StatusOK,
 		},
 		{
 			name:           "valid management key for inference", // Management keys work for inference
 			keyType:        llamactl.KeyTypeInference,
 			managementKeys: []string{"sk-management-admin123"},
 			requestKey:     "sk-management-admin123",
 			method:         "GET",
 			expectedStatus: http.StatusOK,
 		},
 		{
 			name:           "valid management key for management",
 			keyType:        llamactl.KeyTypeManagement,
 			managementKeys: []string{"sk-management-admin123"},
 			requestKey:     "sk-management-admin123",
 			method:         "GET",
 			expectedStatus: http.StatusOK,
 		},
 		// Invalid key tests
 		{
 			name:           "inference key for management should fail",
 			keyType:        llamactl.KeyTypeManagement,
 			inferenceKeys:  []string{"sk-inference-user123"},
 			requestKey:     "sk-inference-user123",
 			method:         "GET",
 			expectedStatus: http.StatusUnauthorized,
 		},
 		{
 			name:           "invalid inference key",
 			keyType:        llamactl.KeyTypeInference,
 			inferenceKeys:  []string{"sk-inference-valid123"},
 			requestKey:     "sk-inference-invalid",
 			method:         "GET",
 			expectedStatus: http.StatusUnauthorized,
 		},
 		{
 			name:           "missing inference key",
 			keyType:        llamactl.KeyTypeInference,
 			inferenceKeys:  []string{"sk-inference-valid123"},
 			requestKey:     "",
 			method:         "GET",
 			expectedStatus: http.StatusUnauthorized,
 		},
 		{
 			name:           "invalid management key",
 			keyType:        llamactl.KeyTypeManagement,
 			managementKeys: []string{"sk-management-valid123"},
 			requestKey:     "sk-management-invalid",
 			method:         "GET",
 			expectedStatus: http.StatusUnauthorized,
 		},
 		{
 			name:           "missing management key",
 			keyType:        llamactl.KeyTypeManagement,
 			managementKeys: []string{"sk-management-valid123"},
 			requestKey:     "",
 			method:         "GET",
 			expectedStatus: http.StatusUnauthorized,
 		},
 		// OPTIONS requests should always pass
 		{
 			name:           "OPTIONS request bypasses inference auth",
 			keyType:        llamactl.KeyTypeInference,
 			inferenceKeys:  []string{"sk-inference-valid123"},
 			requestKey:     "",
 			method:         "OPTIONS",
 			expectedStatus: http.StatusOK,
 		},
 		{
 			name:           "OPTIONS request bypasses management auth",
 			keyType:        llamactl.KeyTypeManagement,
 			managementKeys: []string{"sk-management-valid123"},
 			requestKey:     "",
 			method:         "OPTIONS",
 			expectedStatus: http.StatusOK,
 		},
 		// Cross-key-type validation
 		{
 			name:           "management key works for inference endpoint",
 			keyType:        llamactl.KeyTypeInference,
 			inferenceKeys:  []string{},
 			managementKeys: []string{"sk-management-admin"},
 			requestKey:     "sk-management-admin",
 			method:         "POST",
 			expectedStatus: http.StatusOK,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			config := llamactl.AuthConfig{
 				InferenceKeys:  tt.inferenceKeys,
 				ManagementKeys: tt.managementKeys,
 			}
 			middleware := llamactl.NewAPIAuthMiddleware(config)
 			// Create test request
 			req := httptest.NewRequest(tt.method, "/test", nil)
 			if tt.requestKey != "" {
 				req.Header.Set("Authorization", "Bearer "+tt.requestKey)
 			}
 			// Create test handler using the appropriate middleware
 			var handler http.Handler
 			if tt.keyType == llamactl.KeyTypeInference {
 				handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 			} else {
 				handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 			}
 			// Execute request
 			recorder := httptest.NewRecorder()
 			handler.ServeHTTP(recorder, req)
 			if recorder.Code != tt.expectedStatus {
 				t.Errorf("AuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
 			}
 			// Check that unauthorized responses have proper format
 			if recorder.Code == http.StatusUnauthorized {
 				contentType := recorder.Header().Get("Content-Type")
 				if contentType != "application/json" {
 					t.Errorf("Unauthorized response Content-Type = %v, expected application/json", contentType)
 				}
 				body := recorder.Body.String()
 				if !strings.Contains(body, `"type": "authentication_error"`) {
 					t.Errorf("Unauthorized response missing proper error type: %v", body)
 				}
 			}
 		})
 	}
 }
 func TestGenerateAPIKey(t *testing.T) {
 	tests := []struct {
 		name    string
 		keyType llamactl.KeyType
 	}{
 		{"inference key generation", llamactl.KeyTypeInference},
 		{"management key generation", llamactl.KeyTypeManagement},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Test auto-generation by creating config that will trigger it
 			var config llamactl.AuthConfig
 			if tt.keyType == llamactl.KeyTypeInference {
 				config.RequireInferenceAuth = true
 				config.InferenceKeys = []string{} // Empty to trigger generation
 			} else {
 				config.RequireManagementAuth = true
 				config.ManagementKeys = []string{} // Empty to trigger generation
 			}
 			// Create middleware - this should trigger key generation
 			middleware := llamactl.NewAPIAuthMiddleware(config)
 			// Test that auth is required (meaning a key was generated)
 			req := httptest.NewRequest("GET", "/", nil)
 			recorder := httptest.NewRecorder()
 			var handler http.Handler
 			if tt.keyType == llamactl.KeyTypeInference {
 				handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 			} else {
 				handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 			}
 			handler.ServeHTTP(recorder, req)
 			// Should be unauthorized without a key (proving that a key was generated and auth is working)
 			if recorder.Code != http.StatusUnauthorized {
 				t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
 			}
 			// Test uniqueness by creating another middleware instance
 			middleware2 := llamactl.NewAPIAuthMiddleware(config)
 			req2 := httptest.NewRequest("GET", "/", nil)
 			recorder2 := httptest.NewRecorder()
 			if tt.keyType == llamactl.KeyTypeInference {
 				handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 				handler2.ServeHTTP(recorder2, req2)
 			} else {
 				handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 				handler2.ServeHTTP(recorder2, req2)
 			}
 			// Both should require auth (proving keys were generated for both instances)
 			if recorder2.Code != http.StatusUnauthorized {
 				t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
 			}
 		})
 	}
 }
 func TestAutoGeneration(t *testing.T) {
 	tests := []struct {
 		name               string
 		requireInference   bool
 		requireManagement  bool
 		providedInference  []string
 		providedManagement []string
 		shouldGenerateInf  bool // Whether inference key should be generated
 		shouldGenerateMgmt bool // Whether management key should be generated
 	}{
 		{
 			name:               "inference auth required, keys provided - no generation",
 			requireInference:   true,
 			requireManagement:  false,
 			providedInference:  []string{"sk-inference-provided"},
 			providedManagement: []string{},
 			shouldGenerateInf:  false,
 			shouldGenerateMgmt: false,
 		},
 		{
 			name:               "inference auth required, no keys - should auto-generate",
 			requireInference:   true,
 			requireManagement:  false,
 			providedInference:  []string{},
 			providedManagement: []string{},
 			shouldGenerateInf:  true,
 			shouldGenerateMgmt: false,
 		},
 		{
 			name:               "management auth required, keys provided - no generation",
 			requireInference:   false,
 			requireManagement:  true,
 			providedInference:  []string{},
 			providedManagement: []string{"sk-management-provided"},
 			shouldGenerateInf:  false,
 			shouldGenerateMgmt: false,
 		},
 		{
 			name:               "management auth required, no keys - should auto-generate",
 			requireInference:   false,
 			requireManagement:  true,
 			providedInference:  []string{},
 			providedManagement: []string{},
 			shouldGenerateInf:  false,
 			shouldGenerateMgmt: true,
 		},
 		{
 			name:               "both required, both provided - no generation",
 			requireInference:   true,
 			requireManagement:  true,
 			providedInference:  []string{"sk-inference-provided"},
 			providedManagement: []string{"sk-management-provided"},
 			shouldGenerateInf:  false,
 			shouldGenerateMgmt: false,
 		},
 		{
 			name:               "both required, none provided - should auto-generate both",
 			requireInference:   true,
 			requireManagement:  true,
 			providedInference:  []string{},
 			providedManagement: []string{},
 			shouldGenerateInf:  true,
 			shouldGenerateMgmt: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			config := llamactl.AuthConfig{
 				RequireInferenceAuth:  tt.requireInference,
 				RequireManagementAuth: tt.requireManagement,
 				InferenceKeys:         tt.providedInference,
 				ManagementKeys:        tt.providedManagement,
 			}
 			middleware := llamactl.NewAPIAuthMiddleware(config)
 			// Test inference behavior if inference auth is required
 			if tt.requireInference {
 				req := httptest.NewRequest("GET", "/v1/models", nil)
 				recorder := httptest.NewRecorder()
 				handler := middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 				handler.ServeHTTP(recorder, req)
 				// Should always be unauthorized without a key (since middleware assumes auth is required)
 				if recorder.Code != http.StatusUnauthorized {
 					t.Errorf("Expected unauthorized for inference without key, got status %v", recorder.Code)
 				}
 			}
 			// Test management behavior if management auth is required
 			if tt.requireManagement {
 				req := httptest.NewRequest("GET", "/api/v1/instances", nil)
 				recorder := httptest.NewRecorder()
 				handler := middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 					w.WriteHeader(http.StatusOK)
 				}))
 				handler.ServeHTTP(recorder, req)
 				// Should always be unauthorized without a key (since middleware assumes auth is required)
 				if recorder.Code != http.StatusUnauthorized {
 					t.Errorf("Expected unauthorized for management without key, got status %v", recorder.Code)
 				}
 			}
 		})
 	}
 }
--- a/pkg/routes.go
+++ b/pkg/routes.go
@@ -26,12 +26,22 @@ func SetupRouter(handler *Handler) *chi.Mux {
 		MaxAge:           300,
 	}))
-	r.Get("/swagger/*", httpSwagger.Handler(
+	// Add API authentication middleware
-		httpSwagger.URL("/swagger/doc.json"),
+	authMiddleware := NewAPIAuthMiddleware(handler.config.Auth)
-	))
+
 	if handler.config.Server.EnableSwagger {
 		r.Get("/swagger/*", httpSwagger.Handler(
 			httpSwagger.URL("/swagger/doc.json"),
 		))
 	}
 	// Define routes
 	r.Route("/api/v1", func(r chi.Router) {
 		if authMiddleware != nil && handler.config.Auth.RequireManagementAuth {
 			r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
 		}
 		r.Route("/server", func(r chi.Router) {
 			r.Get("/help", handler.HelpHandler())
 			r.Get("/version", handler.VersionHandler())
@@ -61,17 +71,25 @@ func SetupRouter(handler *Handler) *chi.Mux {
 		})
 	})
-	r.Get(("/v1/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
+	r.Route(("/v1"), func(r chi.Router) {
-	// OpenAI-compatible proxy endpoint
+		if authMiddleware != nil && handler.config.Auth.RequireInferenceAuth {
-	// Handles all POST requests to /v1/*, including:
+			r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
-	//   - /v1/completions
+		}
-	//   - /v1/chat/completions
+
-	//   - /v1/embeddings
+		r.Get(("/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
-	//   - /v1/rerank
+
-	//   - /v1/reranking
+		// OpenAI-compatible proxy endpoint
-	// The instance/model to use is determined by the request body.
+		// Handles all POST requests to /v1/*, including:
-	r.Post("/v1/*", handler.OpenAIProxy())
+		//   - /v1/completions
 		//   - /v1/chat/completions
 		//   - /v1/embeddings
 		//   - /v1/rerank
 		//   - /v1/reranking
 		// The instance/model to use is determined by the request body.
 		r.Post("/*", handler.OpenAIProxy())
 	})
 	// Serve WebUI files
 	if err := webui.SetupWebUI(r); err != nil {