mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-12-22 17:14:22 +00:00
Compare commits
7 Commits
00cd8c8877
...
v0.13.0
| Author | SHA1 | Date | |
|---|---|---|---|
| c776785f30 | |||
| 1cfbd42eda | |||
| 8fee27054d | |||
| fd33837026 | |||
| 3c4ebf7403 | |||
| b7a0f7e3d8 | |||
| d5b68a900f |
@@ -183,7 +183,7 @@ data_dir: ~/.local/share/llamactl # Main data directory (database, instances, l
|
||||
|
||||
instances:
|
||||
port_range: [8000, 9000] # Port range for instances
|
||||
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory (platform dependent)
|
||||
configs_dir: ~/.local/share/llamactl/instances # Instance configs directory (platform dependent) [deprecated]
|
||||
logs_dir: ~/.local/share/llamactl/logs # Logs directory (platform dependent)
|
||||
auto_create_dirs: true # Auto-create data/config/logs dirs if missing
|
||||
max_instances: -1 # Max instances (-1 = unlimited)
|
||||
@@ -203,8 +203,7 @@ database:
|
||||
connection_max_lifetime: 5m # Connection max lifetime
|
||||
|
||||
auth:
|
||||
require_inference_auth: true # Require auth for inference endpoints
|
||||
inference_keys: [] # Keys for inference endpoints
|
||||
require_inference_auth: true # Require auth for inference endpoints, API keys are created in web UI
|
||||
require_management_auth: true # Require auth for management endpoints
|
||||
management_keys: [] # Keys for management endpoints
|
||||
```
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
|
||||
// migrateFromJSON migrates instances from JSON files to SQLite database
|
||||
// This is a one-time migration that runs on first startup with existing JSON files.
|
||||
// Migrated files are moved to a migrated subdirectory to avoid re-importing.
|
||||
func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
|
||||
instancesDir := cfg.Instances.InstancesDir
|
||||
if instancesDir == "" {
|
||||
@@ -24,16 +25,6 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
|
||||
return nil // No instances directory, nothing to migrate
|
||||
}
|
||||
|
||||
// Check if database is empty (no instances)
|
||||
existing, err := db.LoadAll()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check existing instances: %w", err)
|
||||
}
|
||||
|
||||
if len(existing) > 0 {
|
||||
return nil // Database already has instances, skip migration
|
||||
}
|
||||
|
||||
// Find all JSON files
|
||||
files, err := filepath.Glob(filepath.Join(instancesDir, "*.json"))
|
||||
if err != nil {
|
||||
@@ -46,6 +37,12 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
|
||||
|
||||
log.Printf("Migrating %d instances from JSON to SQLite...", len(files))
|
||||
|
||||
// Create migrated directory
|
||||
migratedDir := filepath.Join(instancesDir, "migrated")
|
||||
if err := os.MkdirAll(migratedDir, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create migrated directory: %w", err)
|
||||
}
|
||||
|
||||
// Migrate each JSON file
|
||||
var migrated int
|
||||
for _, file := range files {
|
||||
@@ -53,6 +50,14 @@ func migrateFromJSON(cfg *config.AppConfig, db database.InstanceStore) error {
|
||||
log.Printf("Failed to migrate %s: %v", file, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Move the file to the migrated directory
|
||||
destPath := filepath.Join(migratedDir, filepath.Base(file))
|
||||
if err := os.Rename(file, destPath); err != nil {
|
||||
log.Printf("Warning: Failed to move %s to migrated directory: %v", file, err)
|
||||
// Don't fail the migration if we can't move the file
|
||||
}
|
||||
|
||||
migrated++
|
||||
}
|
||||
|
||||
|
||||
@@ -74,7 +74,6 @@ database:
|
||||
|
||||
auth:
|
||||
require_inference_auth: true # Require auth for inference endpoints
|
||||
inference_keys: [] # Keys for inference endpoints
|
||||
require_management_auth: true # Require auth for management endpoints
|
||||
management_keys: [] # Keys for management endpoints
|
||||
|
||||
@@ -266,17 +265,33 @@ database:
|
||||
|
||||
### Authentication Configuration
|
||||
|
||||
llamactl supports two types of authentication:
|
||||
|
||||
- **Management API Keys**: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables.
|
||||
- **Inference API Keys**: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database.
|
||||
|
||||
```yaml
|
||||
auth:
|
||||
require_inference_auth: true # Require API key for OpenAI endpoints (default: true)
|
||||
inference_keys: [] # List of valid inference API keys
|
||||
require_management_auth: true # Require API key for management endpoints (default: true)
|
||||
management_keys: [] # List of valid management API keys
|
||||
```
|
||||
|
||||
**Managing Inference API Keys:**
|
||||
|
||||
Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys:
|
||||
|
||||
1. Open the web UI and log in with a management API key
|
||||
2. Navigate to **Settings → API Keys**
|
||||
3. Click **Create API Key**
|
||||
4. Configure the key:
|
||||
- **Name**: A descriptive name for the key
|
||||
- **Expiration**: Optional expiration date
|
||||
- **Permissions**: Grant access to all instances or specific instances only
|
||||
5. Copy the generated key - it won't be shown again
|
||||
|
||||
**Environment Variables:**
|
||||
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
|
||||
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
|
||||
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
||||
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
||||
|
||||
|
||||
37
docs/docs.go
37
docs/docs.go
@@ -2063,20 +2063,19 @@ const docTemplate = `{
|
||||
"server.CreateKeyRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"expiresAt": {
|
||||
"type": "integer",
|
||||
"format": "int64"
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"instancePermissions": {
|
||||
"instance_ids": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/server.InstancePermission"
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"permissionMode": {
|
||||
"permission_mode": {
|
||||
"$ref": "#/definitions/auth.PermissionMode"
|
||||
}
|
||||
}
|
||||
@@ -2087,9 +2086,6 @@ const docTemplate = `{
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -2116,29 +2112,9 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"server.InstancePermission": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"can_infer": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"can_view_logs": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"instance_id": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"server.KeyPermissionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"can_infer": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"can_view_logs": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"instance_id": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -2153,9 +2129,6 @@ const docTemplate = `{
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
||||
@@ -17,10 +17,10 @@ Before you start, let's clarify a few key terms:
|
||||
|
||||
Llamactl uses two types of API keys:
|
||||
|
||||
- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
|
||||
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
|
||||
- **Management API Key**: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal.
|
||||
- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). These are created and managed via the web UI.
|
||||
|
||||
By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
|
||||
By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the [Configuration](configuration.md) guide.
|
||||
|
||||
## Start Llamactl
|
||||
|
||||
@@ -38,24 +38,17 @@ llamactl
|
||||
|
||||
sk-management-...
|
||||
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
⚠️ INFERENCE AUTHENTICATION REQUIRED
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
🔑 Generated Inference API Key:
|
||||
|
||||
sk-inference-...
|
||||
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
⚠️ IMPORTANT
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
• These keys are auto-generated and will change on restart
|
||||
• For production, add explicit keys to your configuration
|
||||
• Copy these keys before they disappear from the terminal
|
||||
• This key is auto-generated and will change on restart
|
||||
• For production, add explicit management_keys to your configuration
|
||||
• Copy this key before it disappears from the terminal
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
Llamactl server listening on 0.0.0.0:8080
|
||||
```
|
||||
|
||||
Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
|
||||
Copy the **Management API Key** from the terminal - you'll need it to access the web UI.
|
||||
|
||||
By default, Llamactl will start on `http://localhost:8080`.
|
||||
|
||||
@@ -82,7 +75,7 @@ You should see the Llamactl web interface.
|
||||
- **Additional Options**: Backend-specific parameters
|
||||
|
||||
!!! tip "Auto-Assignment"
|
||||
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
|
||||
Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values.
|
||||
|
||||
!!! note "Remote Node Deployment"
|
||||
If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
|
||||
@@ -98,6 +91,24 @@ Once created, you can:
|
||||
- **View logs** by clicking the logs button
|
||||
- **Stop** the instance when needed
|
||||
|
||||
## Create an Inference API Key
|
||||
|
||||
To make inference requests to your instances, you'll need an inference API key:
|
||||
|
||||
1. In the web UI, click the **Settings** icon (gear icon in the top-right)
|
||||
2. Navigate to the **API Keys** tab
|
||||
3. Click **Create API Key**
|
||||
4. Configure your key:
|
||||
- **Name**: Give it a descriptive name (e.g., "Production Key", "Development Key")
|
||||
- **Expiration**: Optionally set an expiration date for the key
|
||||
- **Permissions**: Choose whether the key can access all instances or only specific ones
|
||||
5. Click **Create**
|
||||
6. **Copy the generated key** - it will only be shown once!
|
||||
|
||||
The key will look like: `llamactl-...`
|
||||
|
||||
You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances).
|
||||
|
||||
## Example Configurations
|
||||
|
||||
Here are basic example configurations for each backend:
|
||||
@@ -246,7 +257,7 @@ print(response.choices[0].message.content)
|
||||
```
|
||||
|
||||
!!! note "API Key"
|
||||
If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
|
||||
If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key you created via the web UI (Settings → API Keys).
|
||||
|
||||
### List Available Models
|
||||
|
||||
|
||||
@@ -2056,20 +2056,19 @@
|
||||
"server.CreateKeyRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"expiresAt": {
|
||||
"type": "integer",
|
||||
"format": "int64"
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"instancePermissions": {
|
||||
"instance_ids": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/server.InstancePermission"
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"permissionMode": {
|
||||
"permission_mode": {
|
||||
"$ref": "#/definitions/auth.PermissionMode"
|
||||
}
|
||||
}
|
||||
@@ -2080,9 +2079,6 @@
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -2109,29 +2105,9 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"server.InstancePermission": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"can_infer": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"can_view_logs": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"instance_id": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"server.KeyPermissionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"can_infer": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"can_view_logs": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"instance_id": {
|
||||
"type": "integer"
|
||||
},
|
||||
@@ -2146,9 +2122,6 @@
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
||||
@@ -232,24 +232,21 @@ definitions:
|
||||
type: object
|
||||
server.CreateKeyRequest:
|
||||
properties:
|
||||
expiresAt:
|
||||
format: int64
|
||||
expires_at:
|
||||
type: integer
|
||||
instancePermissions:
|
||||
instance_ids:
|
||||
items:
|
||||
$ref: '#/definitions/server.InstancePermission'
|
||||
type: integer
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
permissionMode:
|
||||
permission_mode:
|
||||
$ref: '#/definitions/auth.PermissionMode'
|
||||
type: object
|
||||
server.CreateKeyResponse:
|
||||
properties:
|
||||
created_at:
|
||||
type: integer
|
||||
enabled:
|
||||
type: boolean
|
||||
expires_at:
|
||||
type: integer
|
||||
id:
|
||||
@@ -267,21 +264,8 @@ definitions:
|
||||
user_id:
|
||||
type: string
|
||||
type: object
|
||||
server.InstancePermission:
|
||||
properties:
|
||||
can_infer:
|
||||
type: boolean
|
||||
can_view_logs:
|
||||
type: boolean
|
||||
instance_id:
|
||||
type: integer
|
||||
type: object
|
||||
server.KeyPermissionResponse:
|
||||
properties:
|
||||
can_infer:
|
||||
type: boolean
|
||||
can_view_logs:
|
||||
type: boolean
|
||||
instance_id:
|
||||
type: integer
|
||||
instance_name:
|
||||
@@ -291,8 +275,6 @@ definitions:
|
||||
properties:
|
||||
created_at:
|
||||
type: integer
|
||||
enabled:
|
||||
type: boolean
|
||||
expires_at:
|
||||
type: integer
|
||||
id:
|
||||
|
||||
@@ -115,15 +115,15 @@ vllm serve microsoft/DialoGPT-medium --port 8081
|
||||
require_inference_auth: false
|
||||
```
|
||||
|
||||
2. **Configure API keys:**
|
||||
2. **Configure management API keys:**
|
||||
```yaml
|
||||
auth:
|
||||
management_keys:
|
||||
- "your-management-key"
|
||||
inference_keys:
|
||||
- "your-inference-key"
|
||||
```
|
||||
|
||||
For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key.
|
||||
|
||||
3. **Use correct Authorization header:**
|
||||
```bash
|
||||
curl -H "Authorization: Bearer your-api-key" \
|
||||
|
||||
136
test_client.py
Normal file
136
test_client.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple Python script to interact with local LLM server's OpenAI-compatible API
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
|
||||
# Local LLM server configuration
|
||||
BASE_URL = "http://localhost:8080"
|
||||
API_KEY = None
|
||||
MODEL_NAME = None
|
||||
|
||||
def get_models():
|
||||
"""Fetch available models from /v1/models endpoint"""
|
||||
headers = {}
|
||||
if API_KEY:
|
||||
headers["Authorization"] = f"Bearer {API_KEY}"
|
||||
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/v1/models", headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response.json()["data"]
|
||||
except Exception as e:
|
||||
print(f"Error fetching models: {e}")
|
||||
return []
|
||||
|
||||
def send_message(message):
|
||||
"""
|
||||
Send a message to local LLM server API
|
||||
|
||||
Args:
|
||||
message (str): The message to send
|
||||
|
||||
Returns:
|
||||
str: The AI response or error message
|
||||
"""
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
if API_KEY:
|
||||
headers["Authorization"] = f"Bearer {API_KEY}"
|
||||
|
||||
data = {
|
||||
"model": MODEL_NAME,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": message
|
||||
}
|
||||
],
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 1000,
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
response = requests.post(f"{BASE_URL}/v1/chat/completions", headers=headers, json=data, timeout=60)
|
||||
response.raise_for_status()
|
||||
return response.json()["choices"][0]["message"]["content"]
|
||||
|
||||
def interactive_mode():
|
||||
"""Run in interactive mode for continuous conversation"""
|
||||
global BASE_URL, API_KEY, MODEL_NAME
|
||||
|
||||
# Get base URL
|
||||
url_input = input(f"Base URL [{BASE_URL}]: ").strip()
|
||||
if url_input:
|
||||
BASE_URL = url_input
|
||||
|
||||
# Get API key (optional)
|
||||
key_input = input("API key (optional): ").strip()
|
||||
if key_input:
|
||||
API_KEY = key_input
|
||||
|
||||
# Fetch and select model
|
||||
models = get_models()
|
||||
if not models:
|
||||
print("No models available. Exiting.")
|
||||
return
|
||||
|
||||
print("\nAvailable models:")
|
||||
for i, m in enumerate(models, 1):
|
||||
print(f"{i}. {m['id']}")
|
||||
|
||||
while True:
|
||||
try:
|
||||
selection = int(input("\nSelect model: "))
|
||||
if 1 <= selection <= len(models):
|
||||
MODEL_NAME = models[selection - 1]["id"]
|
||||
break
|
||||
print(f"Please enter a number between 1 and {len(models)}")
|
||||
except ValueError:
|
||||
print("Please enter a valid number")
|
||||
|
||||
print(f"\nUsing model: {MODEL_NAME}")
|
||||
print("Type 'quit' or 'exit' to stop")
|
||||
print("-" * 40)
|
||||
|
||||
while True:
|
||||
try:
|
||||
user_input = input("\nYou: ").strip()
|
||||
|
||||
if user_input.lower() in ['quit', 'exit', 'q']:
|
||||
print("Goodbye!")
|
||||
break
|
||||
|
||||
if not user_input:
|
||||
continue
|
||||
|
||||
print("AI: ", end="", flush=True)
|
||||
response = send_message(user_input)
|
||||
print(response)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nGoodbye!")
|
||||
break
|
||||
except EOFError:
|
||||
print("\nGoodbye!")
|
||||
break
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
if len(sys.argv) > 1:
|
||||
# Single message mode
|
||||
message = " ".join(sys.argv[1:])
|
||||
response = send_message(message)
|
||||
print(response)
|
||||
else:
|
||||
# Interactive mode
|
||||
interactive_mode()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user