Merge pull request #35 from lordmathis/chore/docs-update

chore: Update docs
This commit is contained in:
2025-09-22 23:24:12 +02:00
committed by GitHub
10 changed files with 81 additions and 22 deletions

4
.gitignore vendored
View File

@@ -32,4 +32,6 @@ go.work.sum
# .vscode/ # .vscode/
node_modules/ node_modules/
dist/ dist/
__pycache__/

View File

@@ -2,7 +2,7 @@
![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
**Unified management and routing for llama.cpp and MLX models with web dashboard.** **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
## Features ## Features
@@ -12,7 +12,7 @@
- **State Persistence**: Ensure instances remain intact across server restarts - **State Persistence**: Ensure instances remain intact across server restarts
### 🔗 Universal Compatibility ### 🔗 Universal Compatibility
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
### 🌐 User-Friendly Interface ### 🌐 User-Friendly Interface

View File

@@ -762,7 +762,7 @@ const docTemplate = `{
"description": "OpenAI response" "description": "OpenAI response"
}, },
"400": { "400": {
"description": "Invalid request body or model name", "description": "Invalid request body or instance name",
"schema": { "schema": {
"type": "string" "type": "string"
} }

View File

@@ -755,7 +755,7 @@
"description": "OpenAI response" "description": "OpenAI response"
}, },
"400": { "400": {
"description": "Invalid request body or model name", "description": "Invalid request body or instance name",
"schema": { "schema": {
"type": "string" "type": "string"
} }

View File

@@ -556,7 +556,7 @@ paths:
"200": "200":
description: OpenAI response description: OpenAI response
"400": "400":
description: Invalid request body or model name description: Invalid request body or instance name
schema: schema:
type: string type: string
"500": "500":

View File

@@ -1,24 +1,16 @@
# Llamactl Documentation # Llamactl Documentation
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.** Welcome to the Llamactl documentation!
![Dashboard Screenshot](images/dashboard.png) ![Dashboard Screenshot](images/dashboard.png)
## What is Llamactl? ## What is Llamactl?
Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support. **{{HEADLINE}}**
## Features ## Features
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) {{FEATURES}}
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
🔐 **API Key Authentication**: Separate keys for management vs inference access
📊 **Instance Monitoring**: Health checks, auto-restart, log management
**Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
💾 **State Persistence**: Ensure instances remain intact across server restarts
## Quick Links ## Quick Links

62
docs/readme_sync.py Normal file
View File

@@ -0,0 +1,62 @@
"""
MkDocs hook to sync content from README.md to docs/index.md
"""
import re
import os
def on_page_markdown(markdown, page, config, **kwargs):
"""Process markdown content before rendering"""
# Only process the index.md file
if page.file.src_path != 'index.md':
return markdown
# Get the path to README.md (relative to mkdocs.yml)
readme_path = os.path.join(os.path.dirname(config['config_file_path']), 'README.md')
if not os.path.exists(readme_path):
print(f"Warning: README.md not found at {readme_path}")
return markdown
try:
with open(readme_path, 'r', encoding='utf-8') as f:
readme_content = f.read()
except Exception as e:
print(f"Error reading README.md: {e}")
return markdown
# Extract headline (the text in bold after the title)
headline_match = re.search(r'\*\*(.*?)\*\*', readme_content)
headline = headline_match.group(1) if headline_match else 'Management server for llama.cpp and MLX instances'
# Extract features section - everything between ## Features and the next ## heading
features_match = re.search(r'## Features\n(.*?)(?=\n## |\Z)', readme_content, re.DOTALL)
if features_match:
features_content = features_match.group(1).strip()
# Just add line breaks at the end of each line for proper MkDocs rendering
features_with_breaks = add_line_breaks(features_content)
else:
features_with_breaks = "Features content not found in README.md"
# Replace placeholders in the markdown
markdown = markdown.replace('{{HEADLINE}}', headline)
markdown = markdown.replace('{{FEATURES}}', features_with_breaks)
# Fix image paths: convert docs/images/ to images/ for MkDocs
markdown = re.sub(r'docs/images/', 'images/', markdown)
return markdown
def add_line_breaks(content):
"""Add two spaces at the end of each line for proper MkDocs line breaks"""
lines = content.split('\n')
processed_lines = []
for line in lines:
if line.strip(): # Only add spaces to non-empty lines
processed_lines.append(line.rstrip() + ' ')
else:
processed_lines.append(line)
return '\n'.join(processed_lines)

View File

@@ -310,7 +310,7 @@ POST /v1/reranking
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md). The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
**Error Responses:** **Error Responses:**
- `400 Bad Request`: Invalid request body or missing model name - `400 Bad Request`: Invalid request body or missing instance name
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled - `503 Service Unavailable`: Instance is not running and on-demand start is disabled
- `409 Conflict`: Cannot start instance due to maximum instances limit - `409 Conflict`: Cannot start instance due to maximum instances limit

View File

@@ -67,6 +67,9 @@ plugins:
javascript_dir: js javascript_dir: js
canonical_version: null canonical_version: null
hooks:
- docs/readme_sync.py
extra: extra:
version: version:
provider: mike provider: mike

View File

@@ -551,7 +551,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
// @Accept json // @Accept json
// @Produces json // @Produces json
// @Success 200 "OpenAI response" // @Success 200 "OpenAI response"
// @Failure 400 {string} string "Invalid request body or model name" // @Failure 400 {string} string "Invalid request body or instance name"
// @Failure 500 {string} string "Internal Server Error" // @Failure 500 {string} string "Internal Server Error"
// @Router /v1/ [post] // @Router /v1/ [post]
func (h *Handler) OpenAIProxy() http.HandlerFunc { func (h *Handler) OpenAIProxy() http.HandlerFunc {
@@ -564,7 +564,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
} }
r.Body.Close() r.Body.Close()
// Parse the body to extract model name // Parse the body to extract instance name
var requestBody map[string]any var requestBody map[string]any
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil { if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest) http.Error(w, "Invalid request body", http.StatusBadRequest)
@@ -573,11 +573,11 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
modelName, ok := requestBody["model"].(string) modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" { if !ok || modelName == "" {
http.Error(w, "Model name is required", http.StatusBadRequest) http.Error(w, "Instance name is required", http.StatusBadRequest)
return return
} }
// Route to the appropriate inst based on model name // Route to the appropriate inst based on instance name
inst, err := h.InstanceManager.GetInstance(modelName) inst, err := h.InstanceManager.GetInstance(modelName)
if err != nil { if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)