Merge pull request #103 from lordmathis/docs/api-keys

docs: Improve API key management documentation
Update swagger docs
2025-12-22 17:14:22 +00:00 · 2025-12-08 19:23:39 +01:00 · 2025-12-08 19:16:02 +01:00 · 2025-12-08 19:15:42 +01:00 · 2025-12-08 18:49:49 +01:00 · 2025-12-08 18:44:28 +01:00
9 changed files with 215 additions and 121 deletions
--- a/README.md
+++ b/README.md
@@ -183,7 +183,7 @@ data_dir: ~/.local/share/llamactl  # Main data directory (database, instances, l

 instances:
  port_range: [8000, 9000]                        # Port range for instances
-  configs_dir: ~/.local/share/llamactl/instances  # Instance configs directory (platform dependent)
+  configs_dir: ~/.local/share/llamactl/instances  # Instance configs directory (platform dependent) [deprecated]
  logs_dir: ~/.local/share/llamactl/logs          # Logs directory (platform dependent)
  auto_create_dirs: true                          # Auto-create data/config/logs dirs if missing
  max_instances: -1                               # Max instances (-1 = unlimited)
@@ -203,8 +203,7 @@ database:
  connection_max_lifetime: 5m                # Connection max lifetime

 auth:
-  require_inference_auth: true   # Require auth for inference endpoints
-  inference_keys: []             # Keys for inference endpoints
+  require_inference_auth: true   # Require auth for inference endpoints, API keys are created in web UI
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints
 ```
--- a/cmd/server/migrate_json.go
+++ b/cmd/server/migrate_json.go
@@ -13,6 +13,7 @@ import (

 // migrateFromJSON migrates instances from JSON files to SQLite database
 // This is a one-time migration that runs on first startup with existing JSON files.
+// Migrated files are moved to a migrated subdirectory to avoid re-importing.
 func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
 	instancesDir := cfg.Instances.InstancesDir
 	if instancesDir == "" {
@@ -24,16 +25,6 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
 		return nil // No instances directory, nothing to migrate
 	}

-	// Check if database is empty (no instances)
-	existing, err := db.LoadAll()
-	if err != nil {
-		return fmt.Errorf("failed to check existing instances: %w", err)
-	}
-
-	if len(existing) > 0 {
-		return nil // Database already has instances, skip migration
-	}
-
 	// Find all JSON files
 	files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
 	if err != nil {
@@ -46,6 +37,12 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {

 	log.Printf("Migrating %d instances from JSON to SQLite...", len(files))

+	// Create migrated directory
+	migratedDir := filepath.Join(instancesDir, "migrated")
+	if err := os.MkdirAll(migratedDir, 0755); err != nil {
+		return fmt.Errorf("failed to create migrated directory: %w", err)
+	}
+
 	// Migrate each JSON file
 	var migrated int
 	for _, file := range files {
@@ -53,6 +50,14 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
 			log.Printf("Failed to migrate %s: %v", file, err)
 			continue
 		}
+
+		// Move the file to the migrated directory
+		destPath := filepath.Join(migratedDir, filepath.Base(file))
+		if err := os.Rename(file, destPath); err != nil {
+			log.Printf("Warning: Failed to move %s to migrated directory: %v", file, err)
+			// Don't fail the migration if we can't move the file
+		}
+
 		migrated++
 	}

--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -74,7 +74,6 @@ database:

 auth:
  require_inference_auth: true   # Require auth for inference endpoints
-  inference_keys: []             # Keys for inference endpoints
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints

@@ -266,17 +265,33 @@ database:

 ### Authentication Configuration

+llamactl supports two types of authentication:
+
+- **Management API Keys**: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables.
+- **Inference API Keys**: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database.
+
 ```yaml
 auth:
  require_inference_auth: true           # Require API key for OpenAI endpoints (default: true)
-  inference_keys: []                     # List of valid inference API keys
  require_management_auth: true          # Require API key for management endpoints (default: true)
  management_keys: []                    # List of valid management API keys
 ```

+**Managing Inference API Keys:**
+
+Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys:
+
+1. Open the web UI and log in with a management API key
+2. Navigate to **Settings → API Keys**
+3. Click **Create API Key**
+4. Configure the key:
+   - **Name**: A descriptive name for the key
+   - **Expiration**: Optional expiration date
+   - **Permissions**: Grant access to all instances or specific instances only
+5. Copy the generated key - it won't be shown again
+
 **Environment Variables:**
 - `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
 - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
 - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys

--- a/docs/docs.go
+++ b/docs/docs.go
@@ -2063,20 +2063,19 @@ const docTemplate = `{
        "server.CreateKeyRequest": {
            "type": "object",
            "properties": {
-                "expiresAt": {
-                    "type": "integer",
-                    "format": "int64"
+                "expires_at": {
+                    "type": "integer"
                },
-                "instancePermissions": {
+                "instance_ids": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/server.InstancePermission"
+                        "type": "integer"
                    }
                },
                "name": {
                    "type": "string"
                },
-                "permissionMode": {
+                "permission_mode": {
                    "$ref": "#/definitions/auth.PermissionMode"
                }
            }
@@ -2087,9 +2086,6 @@ const docTemplate = `{
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
@@ -2116,29 +2112,9 @@ const docTemplate = `{
                }
            }
        },
-        "server.InstancePermission": {
-            "type": "object",
-            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
-                "instance_id": {
-                    "type": "integer"
-                }
-            }
-        },
        "server.KeyPermissionResponse": {
            "type": "object",
            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
                "instance_id": {
                    "type": "integer"
                },
@@ -2153,9 +2129,6 @@ const docTemplate = `{
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -17,10 +17,10 @@ Before you start, let's clarify a few key terms:

 Llamactl uses two types of API keys:

- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
+- **Management API Key**: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal.
+- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). These are created and managed via the web UI.

-By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
+By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the [Configuration](configuration.md) guide.

 ## Start Llamactl

@@ -38,24 +38,17 @@ llamactl

    sk-management-...

-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-⚠️  INFERENCE AUTHENTICATION REQUIRED
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-🔑  Generated Inference API Key:
-
-    sk-inference-...
-
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ⚠️  IMPORTANT
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-• These keys are auto-generated and will change on restart
-• For production, add explicit keys to your configuration
-• Copy these keys before they disappear from the terminal
+• This key is auto-generated and will change on restart
+• For production, add explicit management_keys to your configuration
+• Copy this key before it disappears from the terminal
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Llamactl server listening on 0.0.0.0:8080
 ```

-Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
+Copy the **Management API Key** from the terminal - you'll need it to access the web UI.

 By default, Llamactl will start on `http://localhost:8080`.

@@ -82,7 +75,7 @@ You should see the Llamactl web interface.
     - **Additional Options**: Backend-specific parameters

    !!! tip "Auto-Assignment"
-        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values.

    !!! note "Remote Node Deployment"
        If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
@@ -98,6 +91,24 @@ Once created, you can:
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed

+## Create an Inference API Key
+
+To make inference requests to your instances, you'll need an inference API key:
+
+1. In the web UI, click the **Settings** icon (gear icon in the top-right)
+2. Navigate to the **API Keys** tab
+3. Click **Create API Key**
+4. Configure your key:
+   - **Name**: Give it a descriptive name (e.g., "Production Key", "Development Key")
+   - **Expiration**: Optionally set an expiration date for the key
+   - **Permissions**: Choose whether the key can access all instances or only specific ones
+5. Click **Create**
+6. **Copy the generated key** - it will only be shown once!
+
+The key will look like: `llamactl-...`
+
+You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances).
+
 ## Example Configurations

 Here are basic example configurations for each backend:
@@ -246,7 +257,7 @@ print(response.choices[0].message.content)
 ```

 !!! note "API Key"
-    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
+    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key you created via the web UI (Settings → API Keys).

 ### List Available Models

--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -2056,20 +2056,19 @@
        "server.CreateKeyRequest": {
            "type": "object",
            "properties": {
-                "expiresAt": {
-                    "type": "integer",
-                    "format": "int64"
+                "expires_at": {
+                    "type": "integer"
                },
-                "instancePermissions": {
+                "instance_ids": {
                    "type": "array",
                    "items": {
-                        "$ref": "#/definitions/server.InstancePermission"
+                        "type": "integer"
                    }
                },
                "name": {
                    "type": "string"
                },
-                "permissionMode": {
+                "permission_mode": {
                    "$ref": "#/definitions/auth.PermissionMode"
                }
            }
@@ -2080,9 +2079,6 @@
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
@@ -2109,29 +2105,9 @@
                }
            }
        },
-        "server.InstancePermission": {
-            "type": "object",
-            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
-                "instance_id": {
-                    "type": "integer"
-                }
-            }
-        },
        "server.KeyPermissionResponse": {
            "type": "object",
            "properties": {
-                "can_infer": {
-                    "type": "boolean"
-                },
-                "can_view_logs": {
-                    "type": "boolean"
-                },
                "instance_id": {
                    "type": "integer"
                },
@@ -2146,9 +2122,6 @@
                "created_at": {
                    "type": "integer"
                },
-                "enabled": {
-                    "type": "boolean"
-                },
                "expires_at": {
                    "type": "integer"
                },
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -232,24 +232,21 @@ definitions:
    type: object
  server.CreateKeyRequest:
    properties:
-      expiresAt:
-        format: int64
+      expires_at:
        type: integer
-      instancePermissions:
+      instance_ids:
        items:
-          $ref: '#/definitions/server.InstancePermission'
+          type: integer
        type: array
      name:
        type: string
-      permissionMode:
+      permission_mode:
        $ref: '#/definitions/auth.PermissionMode'
    type: object
  server.CreateKeyResponse:
    properties:
      created_at:
        type: integer
-      enabled:
-        type: boolean
      expires_at:
        type: integer
      id:
@@ -267,21 +264,8 @@ definitions:
      user_id:
        type: string
    type: object
-  server.InstancePermission:
-    properties:
-      can_infer:
-        type: boolean
-      can_view_logs:
-        type: boolean
-      instance_id:
-        type: integer
-    type: object
  server.KeyPermissionResponse:
    properties:
-      can_infer:
-        type: boolean
-      can_view_logs:
-        type: boolean
      instance_id:
        type: integer
      instance_name:
@@ -291,8 +275,6 @@ definitions:
    properties:
      created_at:
        type: integer
-      enabled:
-        type: boolean
      expires_at:
        type: integer
      id:
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -115,15 +115,15 @@ vllm serve microsoft/DialoGPT-medium --port 8081
     require_inference_auth: false
   ```

-2. **Configure API keys:**
+2. **Configure management API keys:**
   ```yaml
   auth:
     management_keys:
       - "your-management-key"
-     inference_keys:
-       - "your-inference-key"
   ```

+   For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key.
+
 3. **Use correct Authorization header:**
   ```bash
   curl -H "Authorization: Bearer your-api-key" \
--- a/test_client.py
+++ b/test_client.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""
+Simple Python script to interact with local LLM server's OpenAI-compatible API
+"""
+
+import requests
+import json
+import sys
+
+# Local LLM server configuration
+BASE_URL = "http://localhost:8080"
+API_KEY = None
+MODEL_NAME = None
+
+def get_models():
+    """Fetch available models from /v1/models endpoint"""
+    headers = {}
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+
+    try:
+        response = requests.get(f"{BASE_URL}/v1/models", headers=headers, timeout=10)
+        response.raise_for_status()
+        return response.json()["data"]
+    except Exception as e:
+        print(f"Error fetching models: {e}")
+        return []
+
+def send_message(message):
+    """
+    Send a message to local LLM server API
+
+    Args:
+        message (str): The message to send
+
+    Returns:
+        str: The AI response or error message
+    """
+
+    headers = {
+        "Content-Type": "application/json",
+    }
+
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+
+    data = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "user",
+                "content": message
+            }
+        ],
+        "temperature": 0.7,
+        "max_tokens": 1000,
+        "stream": False,
+    }
+    
+    response = requests.post(f"{BASE_URL}/v1/chat/completions", headers=headers, json=data, timeout=60)
+    response.raise_for_status()
+    return response.json()["choices"][0]["message"]["content"]
+
+def interactive_mode():
+    """Run in interactive mode for continuous conversation"""
+    global BASE_URL, API_KEY, MODEL_NAME
+
+    # Get base URL
+    url_input = input(f"Base URL [{BASE_URL}]: ").strip()
+    if url_input:
+        BASE_URL = url_input
+
+    # Get API key (optional)
+    key_input = input("API key (optional): ").strip()
+    if key_input:
+        API_KEY = key_input
+
+    # Fetch and select model
+    models = get_models()
+    if not models:
+        print("No models available. Exiting.")
+        return
+
+    print("\nAvailable models:")
+    for i, m in enumerate(models, 1):
+        print(f"{i}. {m['id']}")
+
+    while True:
+        try:
+            selection = int(input("\nSelect model: "))
+            if 1 <= selection <= len(models):
+                MODEL_NAME = models[selection - 1]["id"]
+                break
+            print(f"Please enter a number between 1 and {len(models)}")
+        except ValueError:
+            print("Please enter a valid number")
+
+    print(f"\nUsing model: {MODEL_NAME}")
+    print("Type 'quit' or 'exit' to stop")
+    print("-" * 40)
+    
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+            
+            if user_input.lower() in ['quit', 'exit', 'q']:
+                print("Goodbye!")
+                break
+                
+            if not user_input:
+                continue
+                
+            print("AI: ", end="", flush=True)
+            response = send_message(user_input)
+            print(response)
+            
+        except KeyboardInterrupt:
+            print("\nGoodbye!")
+            break
+        except EOFError:
+            print("\nGoodbye!")
+            break
+
+def main():
+    """Main function"""
+    if len(sys.argv) > 1:
+        # Single message mode
+        message = " ".join(sys.argv[1:])
+        response = send_message(message)
+        print(response)
+    else:
+        # Interactive mode
+        interactive_mode()
+
+if __name__ == "__main__":
+    main()
Author	SHA1	Message	Date
Matúš Námešný	c776785f30	Merge pull request #103 from lordmathis/docs/api-keys docs: Improve API key management documentation	2025-12-08 19:23:39 +01:00
LordMathis	1cfbd42eda	Update swagger docs	2025-12-08 19:16:02 +01:00
LordMathis	8fee27054d	Update docs for API key management	2025-12-08 19:15:42 +01:00
Matúš Námešný	fd33837026	Merge pull request #101 from lordmathis/feat/api-key-mgmt feat: Add inference api key management	2025-12-08 18:49:49 +01:00
LordMathis	3c4ebf7403	Addsimple python LLM test client	2025-12-08 18:44:28 +01:00
LordMathis	b7a0f7e3d8	Unhide migrated directory	2025-12-08 18:08:22 +01:00
LordMathis	d5b68a900f	Add .migrated directory for migrated json files	2025-12-08 18:06:15 +01:00