Merge pull request #7 from lordmathis/feat/api-key-auth

Feature: Add API key auth for OpenAI compatible endpoints and llamactl management API endpoints on backend
This commit is contained in:
2025-07-30 21:36:40 +02:00
committed by GitHub
9 changed files with 879 additions and 19 deletions

View File

@@ -54,12 +54,14 @@ go build -o llamactl ./cmd/server
## Configuration ## Configuration
llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence: llamactl can be configured via configuration files or environment variables. Configuration is loaded in the following order of precedence:
1. Hardcoded defaults 1. Hardcoded defaults
2. Configuration file 2. Configuration file
3. Environment variables 3. Environment variables
### Configuration Files ### Configuration Files
Configuration files are searched in the following locations: Configuration files are searched in the following locations:
@@ -76,19 +78,35 @@ Configuration files are searched in the following locations:
You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable You can specify the path to config file with `LLAMACTL_CONFIG_PATH` environment variable
## API Key Authentication
llamactl now supports API Key authentication for both management and inference (OpenAI-compatible) endpoints. The are separate keys for management and inference APIs. Management keys grant full access; inference keys grant access to OpenAI-compatible endpoints
**How to Use:**
- Pass your API key in requests using one of:
- `Authorization: Bearer <key>` header
- `X-API-Key: <key>` header
- `api_key=<key>` query parameter
**Auto-generated keys**: If no keys are set and authentication is required, a key will be generated and printed to the terminal at startup. For production, set your own keys in config or environment variables.
### Configuration Options ### Configuration Options
#### Server Configuration #### Server Configuration
```yaml ```yaml
server: server:
host: "" # Server host to bind to (default: "") host: "0.0.0.0" # Server host to bind to (default: "0.0.0.0")
port: 8080 # Server port to bind to (default: 8080) port: 8080 # Server port to bind to (default: 8080)
allowed_origins: ["*"] # CORS allowed origins (default: ["*"])
enable_swagger: false # Enable Swagger UI (default: false)
``` ```
**Environment Variables:** **Environment Variables:**
- `LLAMACTL_HOST` - Server host - `LLAMACTL_HOST` - Server host
- `LLAMACTL_PORT` - Server port - `LLAMACTL_PORT` - Server port
- `LLAMACTL_ALLOWED_ORIGINS` - Comma-separated CORS origins
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
#### Instance Configuration #### Instance Configuration
@@ -112,6 +130,22 @@ instances:
- `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts - `LLAMACTL_DEFAULT_MAX_RESTARTS` - Default maximum restarts
- `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds - `LLAMACTL_DEFAULT_RESTART_DELAY` - Default restart delay in seconds
#### Auth Configuration
```yaml
auth:
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
inference_keys: [] # List of valid inference API keys
require_management_auth: true # Require API key for management endpoints (default: true)
management_keys: [] # List of valid management API keys
```
**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
### Example Configuration ### Example Configuration
```yaml ```yaml
@@ -127,6 +161,12 @@ instances:
default_auto_restart: true default_auto_restart: true
default_max_restarts: 5 default_max_restarts: 5
default_restart_delay: 10 default_restart_delay: 10
auth:
require_inference_auth: true
inference_keys: ["sk-inference-abc123"]
require_management_auth: true
management_keys: ["sk-management-xyz456"]
``` ```
## Usage ## Usage

View File

@@ -21,6 +21,11 @@ const docTemplate = `{
"paths": { "paths": {
"/instances": { "/instances": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all instances managed by the server", "description": "Returns a list of all instances managed by the server",
"tags": [ "tags": [
"instances" "instances"
@@ -47,6 +52,11 @@ const docTemplate = `{
}, },
"/instances/{name}": { "/instances/{name}": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the details of a specific instance by name", "description": "Returns the details of a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -83,6 +93,11 @@ const docTemplate = `{
} }
}, },
"put": { "put": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Updates the configuration of a specific instance by name", "description": "Updates the configuration of a specific instance by name",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -131,6 +146,11 @@ const docTemplate = `{
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Creates a new instance with the provided configuration options", "description": "Creates a new instance with the provided configuration options",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -179,6 +199,11 @@ const docTemplate = `{
} }
}, },
"delete": { "delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops and removes a specific instance by name", "description": "Stops and removes a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -214,6 +239,11 @@ const docTemplate = `{
}, },
"/instances/{name}/logs": { "/instances/{name}/logs": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the logs from a specific instance by name with optional line limit", "description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [ "tags": [
"instances" "instances"
@@ -258,6 +288,11 @@ const docTemplate = `{
}, },
"/instances/{name}/proxy": { "/instances/{name}/proxy": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -297,6 +332,11 @@ const docTemplate = `{
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -338,6 +378,11 @@ const docTemplate = `{
}, },
"/instances/{name}/restart": { "/instances/{name}/restart": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Restarts a specific instance by name", "description": "Restarts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -376,6 +421,11 @@ const docTemplate = `{
}, },
"/instances/{name}/start": { "/instances/{name}/start": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Starts a specific instance by name", "description": "Starts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -414,6 +464,11 @@ const docTemplate = `{
}, },
"/instances/{name}/stop": { "/instances/{name}/stop": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops a specific instance by name", "description": "Stops a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -452,6 +507,11 @@ const docTemplate = `{
}, },
"/server/devices": { "/server/devices": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server", "description": "Returns a list of available devices for the llama server",
"tags": [ "tags": [
"server" "server"
@@ -475,6 +535,11 @@ const docTemplate = `{
}, },
"/server/help": { "/server/help": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command", "description": "Returns the help text for the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -498,6 +563,11 @@ const docTemplate = `{
}, },
"/server/version": { "/server/version": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command", "description": "Returns the version of the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -521,7 +591,12 @@ const docTemplate = `{
}, },
"/v1/": { "/v1/": {
"post": { "post": {
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body", "security": [
{
"ApiKeyAuth": []
}
],
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the ` + "`" + `Authorization` + "`" + ` header.",
"consumes": [ "consumes": [
"application/json" "application/json"
], ],
@@ -550,6 +625,11 @@ const docTemplate = `{
}, },
"/v1/models": { "/v1/models": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of instances in a format compatible with OpenAI API", "description": "Returns a list of instances in a format compatible with OpenAI API",
"tags": [ "tags": [
"openai" "openai"

View File

@@ -14,6 +14,11 @@
"paths": { "paths": {
"/instances": { "/instances": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of all instances managed by the server", "description": "Returns a list of all instances managed by the server",
"tags": [ "tags": [
"instances" "instances"
@@ -40,6 +45,11 @@
}, },
"/instances/{name}": { "/instances/{name}": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the details of a specific instance by name", "description": "Returns the details of a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -76,6 +86,11 @@
} }
}, },
"put": { "put": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Updates the configuration of a specific instance by name", "description": "Updates the configuration of a specific instance by name",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -124,6 +139,11 @@
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Creates a new instance with the provided configuration options", "description": "Creates a new instance with the provided configuration options",
"consumes": [ "consumes": [
"application/json" "application/json"
@@ -172,6 +192,11 @@
} }
}, },
"delete": { "delete": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops and removes a specific instance by name", "description": "Stops and removes a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -207,6 +232,11 @@
}, },
"/instances/{name}/logs": { "/instances/{name}/logs": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the logs from a specific instance by name with optional line limit", "description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [ "tags": [
"instances" "instances"
@@ -251,6 +281,11 @@
}, },
"/instances/{name}/proxy": { "/instances/{name}/proxy": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -290,6 +325,11 @@
} }
}, },
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Forwards HTTP requests to the llama-server instance running on a specific port", "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [ "tags": [
"instances" "instances"
@@ -331,6 +371,11 @@
}, },
"/instances/{name}/restart": { "/instances/{name}/restart": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Restarts a specific instance by name", "description": "Restarts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -369,6 +414,11 @@
}, },
"/instances/{name}/start": { "/instances/{name}/start": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Starts a specific instance by name", "description": "Starts a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -407,6 +457,11 @@
}, },
"/instances/{name}/stop": { "/instances/{name}/stop": {
"post": { "post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Stops a specific instance by name", "description": "Stops a specific instance by name",
"tags": [ "tags": [
"instances" "instances"
@@ -445,6 +500,11 @@
}, },
"/server/devices": { "/server/devices": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of available devices for the llama server", "description": "Returns a list of available devices for the llama server",
"tags": [ "tags": [
"server" "server"
@@ -468,6 +528,11 @@
}, },
"/server/help": { "/server/help": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the help text for the llama server command", "description": "Returns the help text for the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -491,6 +556,11 @@
}, },
"/server/version": { "/server/version": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns the version of the llama server command", "description": "Returns the version of the llama server command",
"tags": [ "tags": [
"server" "server"
@@ -514,7 +584,12 @@
}, },
"/v1/": { "/v1/": {
"post": { "post": {
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body", "security": [
{
"ApiKeyAuth": []
}
],
"description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.",
"consumes": [ "consumes": [
"application/json" "application/json"
], ],
@@ -543,6 +618,11 @@
}, },
"/v1/models": { "/v1/models": {
"get": { "get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Returns a list of instances in a format compatible with OpenAI API", "description": "Returns a list of instances in a format compatible with OpenAI API",
"tags": [ "tags": [
"openai" "openai"

View File

@@ -399,6 +399,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: List all instances summary: List all instances
tags: tags:
- instances - instances
@@ -422,6 +424,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Delete an instance summary: Delete an instance
tags: tags:
- instances - instances
@@ -446,6 +450,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get details of a specific instance summary: Get details of a specific instance
tags: tags:
- instances - instances
@@ -478,6 +484,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Create and start a new instance summary: Create and start a new instance
tags: tags:
- instances - instances
@@ -510,6 +518,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Update an instance's configuration summary: Update an instance's configuration
tags: tags:
- instances - instances
@@ -540,6 +550,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get logs from a specific instance summary: Get logs from a specific instance
tags: tags:
- instances - instances
@@ -568,6 +580,8 @@ paths:
description: Instance is not running description: Instance is not running
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance summary: Proxy requests to a specific instance
tags: tags:
- instances - instances
@@ -595,6 +609,8 @@ paths:
description: Instance is not running description: Instance is not running
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance summary: Proxy requests to a specific instance
tags: tags:
- instances - instances
@@ -620,6 +636,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Restart a running instance summary: Restart a running instance
tags: tags:
- instances - instances
@@ -645,6 +663,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Start a stopped instance summary: Start a stopped instance
tags: tags:
- instances - instances
@@ -670,6 +690,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Stop a running instance summary: Stop a running instance
tags: tags:
- instances - instances
@@ -685,6 +707,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server summary: List available devices for llama server
tags: tags:
- server - server
@@ -700,6 +724,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server summary: Get help for llama server
tags: tags:
- server - server
@@ -715,6 +741,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server summary: Get version of llama server
tags: tags:
- server - server
@@ -723,7 +751,8 @@ paths:
consumes: consumes:
- application/json - application/json
description: Handles all POST requests to /v1/*, routing to the appropriate description: Handles all POST requests to /v1/*, routing to the appropriate
instance based on the request body instance based on the request body. Requires API key authentication via the
`Authorization` header.
responses: responses:
"200": "200":
description: OpenAI response description: OpenAI response
@@ -735,6 +764,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: OpenAI-compatible proxy endpoint summary: OpenAI-compatible proxy endpoint
tags: tags:
- openai - openai
@@ -751,6 +782,8 @@ paths:
description: Internal Server Error description: Internal Server Error
schema: schema:
type: string type: string
security:
- ApiKeyAuth: []
summary: List instances in OpenAI-compatible format summary: List instances in OpenAI-compatible format
tags: tags:
- openai - openai

View File

@@ -14,6 +14,7 @@ import (
type Config struct { type Config struct {
Server ServerConfig `yaml:"server"` Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"` Instances InstancesConfig `yaml:"instances"`
Auth AuthConfig `yaml:"auth"`
} }
// ServerConfig contains HTTP server configuration // ServerConfig contains HTTP server configuration
@@ -26,6 +27,9 @@ type ServerConfig struct {
// Allowed origins for CORS (e.g., "http://localhost:3000") // Allowed origins for CORS (e.g., "http://localhost:3000")
AllowedOrigins []string `yaml:"allowed_origins"` AllowedOrigins []string `yaml:"allowed_origins"`
// Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"`
} }
// InstancesConfig contains instance management configuration // InstancesConfig contains instance management configuration
@@ -52,6 +56,22 @@ type InstancesConfig struct {
DefaultRestartDelay int `yaml:"default_restart_delay"` DefaultRestartDelay int `yaml:"default_restart_delay"`
} }
// AuthConfig contains authentication settings
type AuthConfig struct {
// Require authentication for OpenAI compatible inference endpoints
RequireInferenceAuth bool `yaml:"require_inference_auth"`
// List of keys for OpenAI compatible inference endpoints
InferenceKeys []string `yaml:"inference_keys"`
// Require authentication for management endpoints
RequireManagementAuth bool `yaml:"require_management_auth"`
// List of keys for management endpoints
ManagementKeys []string `yaml:"management_keys"`
}
// LoadConfig loads configuration with the following precedence: // LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults // 1. Hardcoded defaults
// 2. Config file // 2. Config file
@@ -63,6 +83,7 @@ func LoadConfig(configPath string) (Config, error) {
Host: "0.0.0.0", Host: "0.0.0.0",
Port: 8080, Port: 8080,
AllowedOrigins: []string{"*"}, // Default to allow all origins AllowedOrigins: []string{"*"}, // Default to allow all origins
EnableSwagger: false,
}, },
Instances: InstancesConfig{ Instances: InstancesConfig{
PortRange: [2]int{8000, 9000}, PortRange: [2]int{8000, 9000},
@@ -73,6 +94,12 @@ func LoadConfig(configPath string) (Config, error) {
DefaultMaxRestarts: 3, DefaultMaxRestarts: 3,
DefaultRestartDelay: 5, DefaultRestartDelay: 5,
}, },
Auth: AuthConfig{
RequireInferenceAuth: true,
InferenceKeys: []string{},
RequireManagementAuth: true,
ManagementKeys: []string{},
},
} }
// 2. Load from config file // 2. Load from config file
@@ -121,6 +148,14 @@ func loadEnvVars(cfg *Config) {
cfg.Server.Port = p cfg.Server.Port = p
} }
} }
if allowedOrigins := os.Getenv("LLAMACTL_ALLOWED_ORIGINS"); allowedOrigins != "" {
cfg.Server.AllowedOrigins = strings.Split(allowedOrigins, ",")
}
if enableSwagger := os.Getenv("LLAMACTL_ENABLE_SWAGGER"); enableSwagger != "" {
if b, err := strconv.ParseBool(enableSwagger); err == nil {
cfg.Server.EnableSwagger = b
}
}
// Instance config // Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" { if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
@@ -154,6 +189,23 @@ func loadEnvVars(cfg *Config) {
cfg.Instances.DefaultRestartDelay = seconds cfg.Instances.DefaultRestartDelay = seconds
} }
} }
// Auth config
if requireInferenceAuth := os.Getenv("LLAMACTL_REQUIRE_INFERENCE_AUTH"); requireInferenceAuth != "" {
if b, err := strconv.ParseBool(requireInferenceAuth); err == nil {
cfg.Auth.RequireInferenceAuth = b
}
}
if inferenceKeys := os.Getenv("LLAMACTL_INFERENCE_KEYS"); inferenceKeys != "" {
cfg.Auth.InferenceKeys = strings.Split(inferenceKeys, ",")
}
if requireManagementAuth := os.Getenv("LLAMACTL_REQUIRE_MANAGEMENT_AUTH"); requireManagementAuth != "" {
if b, err := strconv.ParseBool(requireManagementAuth); err == nil {
cfg.Auth.RequireManagementAuth = b
}
}
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
}
} }
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000" // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"

View File

@@ -29,6 +29,7 @@ func NewHandler(im InstanceManager, config Config) *Handler {
// @Summary Get help for llama server // @Summary Get help for llama server
// @Description Returns the help text for the llama server command // @Description Returns the help text for the llama server command
// @Tags server // @Tags server
// @Security ApiKeyAuth
// @Produces text/plain // @Produces text/plain
// @Success 200 {string} string "Help text" // @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
@@ -50,6 +51,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc {
// @Summary Get version of llama server // @Summary Get version of llama server
// @Description Returns the version of the llama server command // @Description Returns the version of the llama server command
// @Tags server // @Tags server
// @Security ApiKeyAuth
// @Produces text/plain // @Produces text/plain
// @Success 200 {string} string "Version information" // @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
@@ -71,6 +73,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc {
// @Summary List available devices for llama server // @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server // @Description Returns a list of available devices for the llama server
// @Tags server // @Tags server
// @Security ApiKeyAuth
// @Produces text/plain // @Produces text/plain
// @Success 200 {string} string "List of devices" // @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
@@ -92,6 +95,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc {
// @Summary List all instances // @Summary List all instances
// @Description Returns a list of all instances managed by the server // @Description Returns a list of all instances managed by the server
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Success 200 {array} Instance "List of instances" // @Success 200 {array} Instance "List of instances"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
@@ -116,6 +120,7 @@ func (h *Handler) ListInstances() http.HandlerFunc {
// @Summary Create and start a new instance // @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options // @Description Creates a new instance with the provided configuration options
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
@@ -157,6 +162,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc {
// @Summary Get details of a specific instance // @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name // @Description Returns the details of a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Instance details" // @Success 200 {object} Instance "Instance details"
@@ -189,6 +195,7 @@ func (h *Handler) GetInstance() http.HandlerFunc {
// @Summary Update an instance's configuration // @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name // @Description Updates the configuration of a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
@@ -229,6 +236,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc {
// @Summary Start a stopped instance // @Summary Start a stopped instance
// @Description Starts a specific instance by name // @Description Starts a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Started instance details" // @Success 200 {object} Instance "Started instance details"
@@ -261,6 +269,7 @@ func (h *Handler) StartInstance() http.HandlerFunc {
// @Summary Stop a running instance // @Summary Stop a running instance
// @Description Stops a specific instance by name // @Description Stops a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Stopped instance details" // @Success 200 {object} Instance "Stopped instance details"
@@ -293,6 +302,7 @@ func (h *Handler) StopInstance() http.HandlerFunc {
// @Summary Restart a running instance // @Summary Restart a running instance
// @Description Restarts a specific instance by name // @Description Restarts a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Restarted instance details" // @Success 200 {object} Instance "Restarted instance details"
@@ -325,6 +335,7 @@ func (h *Handler) RestartInstance() http.HandlerFunc {
// @Summary Delete an instance // @Summary Delete an instance
// @Description Stops and removes a specific instance by name // @Description Stops and removes a specific instance by name
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 204 "No Content" // @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
@@ -351,6 +362,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc {
// @Summary Get logs from a specific instance // @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit // @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)" // @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain // @Produces text/plain
@@ -398,6 +410,7 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc {
// @Summary Proxy requests to a specific instance // @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port // @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances // @Tags instances
// @Security ApiKeyAuth
// @Param name path string true "Instance Name" // @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance" // @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format" // @Failure 400 {string} string "Invalid name format"
@@ -462,6 +475,7 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc {
// @Summary List instances in OpenAI-compatible format // @Summary List instances in OpenAI-compatible format
// @Description Returns a list of instances in a format compatible with OpenAI API // @Description Returns a list of instances in a format compatible with OpenAI API
// @Tags openai // @Tags openai
// @Security ApiKeyAuth
// @Produces json // @Produces json
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances" // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
@@ -499,8 +513,9 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
// OpenAIProxy godoc // OpenAIProxy godoc
// @Summary OpenAI-compatible proxy endpoint // @Summary OpenAI-compatible proxy endpoint
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body // @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
// @Tags openai // @Tags openai
// @Security ApiKeyAuth
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Success 200 "OpenAI response" // @Success 200 "OpenAI response"

188
pkg/middleware.go Normal file
View File

@@ -0,0 +1,188 @@
package llamactl
import (
"crypto/rand"
"crypto/subtle"
"encoding/hex"
"fmt"
"log"
"net/http"
"os"
"strings"
)
type KeyType int
const (
KeyTypeInference KeyType = iota
KeyTypeManagement
)
type APIAuthMiddleware struct {
requireInferenceAuth bool
inferenceKeys map[string]bool
requireManagementAuth bool
managementKeys map[string]bool
}
// NewAPIAuthMiddleware creates a new APIAuthMiddleware with the given configuration
func NewAPIAuthMiddleware(config AuthConfig) *APIAuthMiddleware {
var generated bool = false
inferenceAPIKeys := make(map[string]bool)
managementAPIKeys := make(map[string]bool)
const banner = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if config.RequireManagementAuth && len(config.ManagementKeys) == 0 {
key := generateAPIKey(KeyTypeManagement)
managementAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ MANAGEMENT AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Management API Key:\n\n %s\n\n", key)
}
for _, key := range config.ManagementKeys {
managementAPIKeys[key] = true
}
if config.RequireInferenceAuth && len(config.InferenceKeys) == 0 {
key := generateAPIKey(KeyTypeInference)
inferenceAPIKeys[key] = true
generated = true
fmt.Printf("%s\n⚠ INFERENCE AUTHENTICATION REQUIRED\n%s\n", banner, banner)
fmt.Printf("🔑 Generated Inference API Key:\n\n %s\n\n", key)
}
for _, key := range config.InferenceKeys {
inferenceAPIKeys[key] = true
}
if generated {
fmt.Printf("%s\n⚠ IMPORTANT\n%s\n", banner, banner)
fmt.Println("• These keys are auto-generated and will change on restart")
fmt.Println("• For production, add explicit keys to your configuration")
fmt.Println("• Copy these keys before they disappear from the terminal")
fmt.Println(banner)
}
return &APIAuthMiddleware{
requireInferenceAuth: config.RequireInferenceAuth,
inferenceKeys: inferenceAPIKeys,
requireManagementAuth: config.RequireManagementAuth,
managementKeys: managementAPIKeys,
}
}
// generateAPIKey creates a cryptographically secure API key
func generateAPIKey(keyType KeyType) string {
// Generate 32 random bytes (256 bits)
randomBytes := make([]byte, 32)
var prefix string
switch keyType {
case KeyTypeInference:
prefix = "sk-inference"
case KeyTypeManagement:
prefix = "sk-management"
default:
prefix = "sk-unknown"
}
if _, err := rand.Read(randomBytes); err != nil {
log.Printf("Warning: Failed to generate secure random key, using fallback")
// Fallback to a less secure method if crypto/rand fails
return fmt.Sprintf("%s-fallback-%d", prefix, os.Getpid())
}
// Convert to hex and add prefix
return fmt.Sprintf("%s-%s", prefix, hex.EncodeToString(randomBytes))
}
// AuthMiddleware returns a middleware that checks API keys for the given key type
func (a *APIAuthMiddleware) AuthMiddleware(keyType KeyType) func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
next.ServeHTTP(w, r)
return
}
apiKey := a.extractAPIKey(r)
if apiKey == "" {
a.unauthorized(w, "Missing API key")
return
}
var isValid bool
switch keyType {
case KeyTypeInference:
// Management keys also work for OpenAI endpoints (higher privilege)
isValid = a.isValidKey(apiKey, KeyTypeInference) || a.isValidKey(apiKey, KeyTypeManagement)
case KeyTypeManagement:
isValid = a.isValidKey(apiKey, KeyTypeManagement)
default:
isValid = false
}
if !isValid {
a.unauthorized(w, "Invalid API key")
return
}
next.ServeHTTP(w, r)
})
}
}
// extractAPIKey extracts the API key from the request
func (a *APIAuthMiddleware) extractAPIKey(r *http.Request) string {
// Check Authorization header: "Bearer sk-..."
if auth := r.Header.Get("Authorization"); auth != "" {
if after, ok := strings.CutPrefix(auth, "Bearer "); ok {
return after
}
}
// Check X-API-Key header
if apiKey := r.Header.Get("X-API-Key"); apiKey != "" {
return apiKey
}
// Check query parameter
if apiKey := r.URL.Query().Get("api_key"); apiKey != "" {
return apiKey
}
return ""
}
// isValidKey checks if the provided API key is valid for the given key type
func (a *APIAuthMiddleware) isValidKey(providedKey string, keyType KeyType) bool {
var validKeys map[string]bool
switch keyType {
case KeyTypeInference:
validKeys = a.inferenceKeys
case KeyTypeManagement:
validKeys = a.managementKeys
default:
return false
}
for validKey := range validKeys {
if len(providedKey) == len(validKey) &&
subtle.ConstantTimeCompare([]byte(providedKey), []byte(validKey)) == 1 {
return true
}
}
return false
}
// unauthorized sends an unauthorized response
func (a *APIAuthMiddleware) unauthorized(w http.ResponseWriter, message string) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusUnauthorized)
response := fmt.Sprintf(`{"error": {"message": "%s", "type": "authentication_error"}}`, message)
w.Write([]byte(response))
}

354
pkg/middleware_test.go Normal file
View File

@@ -0,0 +1,354 @@
package llamactl_test
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
llamactl "llamactl/pkg"
)
func TestAuthMiddleware(t *testing.T) {
tests := []struct {
name string
keyType llamactl.KeyType
inferenceKeys []string
managementKeys []string
requestKey string
method string
expectedStatus int
}{
// Valid key tests
{
name: "valid inference key for inference",
keyType: llamactl.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-valid123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for inference", // Management keys work for inference
keyType: llamactl.KeyTypeInference,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
{
name: "valid management key for management",
keyType: llamactl.KeyTypeManagement,
managementKeys: []string{"sk-management-admin123"},
requestKey: "sk-management-admin123",
method: "GET",
expectedStatus: http.StatusOK,
},
// Invalid key tests
{
name: "inference key for management should fail",
keyType: llamactl.KeyTypeManagement,
inferenceKeys: []string{"sk-inference-user123"},
requestKey: "sk-inference-user123",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid inference key",
keyType: llamactl.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "sk-inference-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing inference key",
keyType: llamactl.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "invalid management key",
keyType: llamactl.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "sk-management-invalid",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
{
name: "missing management key",
keyType: llamactl.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "GET",
expectedStatus: http.StatusUnauthorized,
},
// OPTIONS requests should always pass
{
name: "OPTIONS request bypasses inference auth",
keyType: llamactl.KeyTypeInference,
inferenceKeys: []string{"sk-inference-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
{
name: "OPTIONS request bypasses management auth",
keyType: llamactl.KeyTypeManagement,
managementKeys: []string{"sk-management-valid123"},
requestKey: "",
method: "OPTIONS",
expectedStatus: http.StatusOK,
},
// Cross-key-type validation
{
name: "management key works for inference endpoint",
keyType: llamactl.KeyTypeInference,
inferenceKeys: []string{},
managementKeys: []string{"sk-management-admin"},
requestKey: "sk-management-admin",
method: "POST",
expectedStatus: http.StatusOK,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := llamactl.AuthConfig{
InferenceKeys: tt.inferenceKeys,
ManagementKeys: tt.managementKeys,
}
middleware := llamactl.NewAPIAuthMiddleware(config)
// Create test request
req := httptest.NewRequest(tt.method, "/test", nil)
if tt.requestKey != "" {
req.Header.Set("Authorization", "Bearer "+tt.requestKey)
}
// Create test handler using the appropriate middleware
var handler http.Handler
if tt.keyType == llamactl.KeyTypeInference {
handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
// Execute request
recorder := httptest.NewRecorder()
handler.ServeHTTP(recorder, req)
if recorder.Code != tt.expectedStatus {
t.Errorf("AuthMiddleware() status = %v, expected %v", recorder.Code, tt.expectedStatus)
}
// Check that unauthorized responses have proper format
if recorder.Code == http.StatusUnauthorized {
contentType := recorder.Header().Get("Content-Type")
if contentType != "application/json" {
t.Errorf("Unauthorized response Content-Type = %v, expected application/json", contentType)
}
body := recorder.Body.String()
if !strings.Contains(body, `"type": "authentication_error"`) {
t.Errorf("Unauthorized response missing proper error type: %v", body)
}
}
})
}
}
func TestGenerateAPIKey(t *testing.T) {
tests := []struct {
name string
keyType llamactl.KeyType
}{
{"inference key generation", llamactl.KeyTypeInference},
{"management key generation", llamactl.KeyTypeManagement},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test auto-generation by creating config that will trigger it
var config llamactl.AuthConfig
if tt.keyType == llamactl.KeyTypeInference {
config.RequireInferenceAuth = true
config.InferenceKeys = []string{} // Empty to trigger generation
} else {
config.RequireManagementAuth = true
config.ManagementKeys = []string{} // Empty to trigger generation
}
// Create middleware - this should trigger key generation
middleware := llamactl.NewAPIAuthMiddleware(config)
// Test that auth is required (meaning a key was generated)
req := httptest.NewRequest("GET", "/", nil)
recorder := httptest.NewRecorder()
var handler http.Handler
if tt.keyType == llamactl.KeyTypeInference {
handler = middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
} else {
handler = middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
handler.ServeHTTP(recorder, req)
// Should be unauthorized without a key (proving that a key was generated and auth is working)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized without key, got status %v", recorder.Code)
}
// Test uniqueness by creating another middleware instance
middleware2 := llamactl.NewAPIAuthMiddleware(config)
req2 := httptest.NewRequest("GET", "/", nil)
recorder2 := httptest.NewRecorder()
if tt.keyType == llamactl.KeyTypeInference {
handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
} else {
handler2 := middleware2.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler2.ServeHTTP(recorder2, req2)
}
// Both should require auth (proving keys were generated for both instances)
if recorder2.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for second middleware without key, got status %v", recorder2.Code)
}
})
}
}
func TestAutoGeneration(t *testing.T) {
tests := []struct {
name string
requireInference bool
requireManagement bool
providedInference []string
providedManagement []string
shouldGenerateInf bool // Whether inference key should be generated
shouldGenerateMgmt bool // Whether management key should be generated
}{
{
name: "inference auth required, keys provided - no generation",
requireInference: true,
requireManagement: false,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "inference auth required, no keys - should auto-generate",
requireInference: true,
requireManagement: false,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: false,
},
{
name: "management auth required, keys provided - no generation",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "management auth required, no keys - should auto-generate",
requireInference: false,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: false,
shouldGenerateMgmt: true,
},
{
name: "both required, both provided - no generation",
requireInference: true,
requireManagement: true,
providedInference: []string{"sk-inference-provided"},
providedManagement: []string{"sk-management-provided"},
shouldGenerateInf: false,
shouldGenerateMgmt: false,
},
{
name: "both required, none provided - should auto-generate both",
requireInference: true,
requireManagement: true,
providedInference: []string{},
providedManagement: []string{},
shouldGenerateInf: true,
shouldGenerateMgmt: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
config := llamactl.AuthConfig{
RequireInferenceAuth: tt.requireInference,
RequireManagementAuth: tt.requireManagement,
InferenceKeys: tt.providedInference,
ManagementKeys: tt.providedManagement,
}
middleware := llamactl.NewAPIAuthMiddleware(config)
// Test inference behavior if inference auth is required
if tt.requireInference {
req := httptest.NewRequest("GET", "/v1/models", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(llamactl.KeyTypeInference)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should always be unauthorized without a key (since middleware assumes auth is required)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for inference without key, got status %v", recorder.Code)
}
}
// Test management behavior if management auth is required
if tt.requireManagement {
req := httptest.NewRequest("GET", "/api/v1/instances", nil)
recorder := httptest.NewRecorder()
handler := middleware.AuthMiddleware(llamactl.KeyTypeManagement)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
handler.ServeHTTP(recorder, req)
// Should always be unauthorized without a key (since middleware assumes auth is required)
if recorder.Code != http.StatusUnauthorized {
t.Errorf("Expected unauthorized for management without key, got status %v", recorder.Code)
}
}
})
}
}

View File

@@ -26,12 +26,22 @@ func SetupRouter(handler *Handler) *chi.Mux {
MaxAge: 300, MaxAge: 300,
})) }))
r.Get("/swagger/*", httpSwagger.Handler( // Add API authentication middleware
httpSwagger.URL("/swagger/doc.json"), authMiddleware := NewAPIAuthMiddleware(handler.config.Auth)
))
if handler.config.Server.EnableSwagger {
r.Get("/swagger/*", httpSwagger.Handler(
httpSwagger.URL("/swagger/doc.json"),
))
}
// Define routes // Define routes
r.Route("/api/v1", func(r chi.Router) { r.Route("/api/v1", func(r chi.Router) {
if authMiddleware != nil && handler.config.Auth.RequireManagementAuth {
r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement))
}
r.Route("/server", func(r chi.Router) { r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.HelpHandler()) r.Get("/help", handler.HelpHandler())
r.Get("/version", handler.VersionHandler()) r.Get("/version", handler.VersionHandler())
@@ -61,17 +71,25 @@ func SetupRouter(handler *Handler) *chi.Mux {
}) })
}) })
r.Get(("/v1/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format r.Route(("/v1"), func(r chi.Router) {
// OpenAI-compatible proxy endpoint if authMiddleware != nil && handler.config.Auth.RequireInferenceAuth {
// Handles all POST requests to /v1/*, including: r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
// - /v1/completions }
// - /v1/chat/completions
// - /v1/embeddings r.Get(("/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format
// - /v1/rerank
// - /v1/reranking // OpenAI-compatible proxy endpoint
// The instance/model to use is determined by the request body. // Handles all POST requests to /v1/*, including:
r.Post("/v1/*", handler.OpenAIProxy()) // - /v1/completions
// - /v1/chat/completions
// - /v1/embeddings
// - /v1/rerank
// - /v1/reranking
// The instance/model to use is determined by the request body.
r.Post("/*", handler.OpenAIProxy())
})
// Serve WebUI files // Serve WebUI files
if err := webui.SetupWebUI(r); err != nil { if err := webui.SetupWebUI(r); err != nil {