mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Update documentation and add README synchronization
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -33,3 +33,5 @@ go.work.sum
|
|||||||
|
|
||||||
node_modules/
|
node_modules/
|
||||||
dist/
|
dist/
|
||||||
|
|
||||||
|
__pycache__/
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
  
|
  
|
||||||
|
|
||||||
**Unified management and routing for llama.cpp and MLX models with web dashboard.**
|
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
- **State Persistence**: Ensure instances remain intact across server restarts
|
- **State Persistence**: Ensure instances remain intact across server restarts
|
||||||
|
|
||||||
### 🔗 Universal Compatibility
|
### 🔗 Universal Compatibility
|
||||||
- **OpenAI API Compatible**: Drop-in replacement - route requests by model name
|
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
|
||||||
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
|
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
|
||||||
|
|
||||||
### 🌐 User-Friendly Interface
|
### 🌐 User-Friendly Interface
|
||||||
|
|||||||
@@ -1,24 +1,16 @@
|
|||||||
# Llamactl Documentation
|
# Llamactl Documentation
|
||||||
|
|
||||||
Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.**
|
Welcome to the Llamactl documentation!
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## What is Llamactl?
|
## What is Llamactl?
|
||||||
|
|
||||||
Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support.
|
**{{HEADLINE}}**
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
|
{{FEATURES}}
|
||||||
🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name
|
|
||||||
🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized)
|
|
||||||
🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
|
||||||
🔐 **API Key Authentication**: Separate keys for management vs inference access
|
|
||||||
📊 **Instance Monitoring**: Health checks, auto-restart, log management
|
|
||||||
⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
|
||||||
💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests
|
|
||||||
💾 **State Persistence**: Ensure instances remain intact across server restarts
|
|
||||||
|
|
||||||
## Quick Links
|
## Quick Links
|
||||||
|
|
||||||
|
|||||||
59
docs/readme_sync.py
Normal file
59
docs/readme_sync.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"""
|
||||||
|
MkDocs hook to sync content from README.md to docs/index.md
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def on_page_markdown(markdown, page, config, **kwargs):
|
||||||
|
"""Process markdown content before rendering"""
|
||||||
|
# Only process the index.md file
|
||||||
|
if page.file.src_path != 'index.md':
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
# Get the path to README.md (relative to mkdocs.yml)
|
||||||
|
readme_path = os.path.join(os.path.dirname(config['config_file_path']), 'README.md')
|
||||||
|
|
||||||
|
if not os.path.exists(readme_path):
|
||||||
|
print(f"Warning: README.md not found at {readme_path}")
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(readme_path, 'r', encoding='utf-8') as f:
|
||||||
|
readme_content = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading README.md: {e}")
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
# Extract headline (the text in bold after the title)
|
||||||
|
headline_match = re.search(r'\*\*(.*?)\*\*', readme_content)
|
||||||
|
headline = headline_match.group(1) if headline_match else 'Management server for llama.cpp and MLX instances'
|
||||||
|
|
||||||
|
# Extract features section - everything between ## Features and the next ## heading
|
||||||
|
features_match = re.search(r'## Features\n(.*?)(?=\n## |\Z)', readme_content, re.DOTALL)
|
||||||
|
if features_match:
|
||||||
|
features_content = features_match.group(1).strip()
|
||||||
|
# Just add line breaks at the end of each line for proper MkDocs rendering
|
||||||
|
features_with_breaks = add_line_breaks(features_content)
|
||||||
|
else:
|
||||||
|
features_with_breaks = "Features content not found in README.md"
|
||||||
|
|
||||||
|
# Replace placeholders in the markdown
|
||||||
|
markdown = markdown.replace('{{HEADLINE}}', headline)
|
||||||
|
markdown = markdown.replace('{{FEATURES}}', features_with_breaks)
|
||||||
|
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
|
||||||
|
def add_line_breaks(content):
|
||||||
|
"""Add two spaces at the end of each line for proper MkDocs line breaks"""
|
||||||
|
lines = content.split('\n')
|
||||||
|
processed_lines = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.strip(): # Only add spaces to non-empty lines
|
||||||
|
processed_lines.append(line.rstrip() + ' ')
|
||||||
|
else:
|
||||||
|
processed_lines.append(line)
|
||||||
|
|
||||||
|
return '\n'.join(processed_lines)
|
||||||
@@ -310,7 +310,7 @@ POST /v1/reranking
|
|||||||
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
|
The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
|
||||||
|
|
||||||
**Error Responses:**
|
**Error Responses:**
|
||||||
- `400 Bad Request`: Invalid request body or missing model name
|
- `400 Bad Request`: Invalid request body or missing instance name
|
||||||
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
|
- `503 Service Unavailable`: Instance is not running and on-demand start is disabled
|
||||||
- `409 Conflict`: Cannot start instance due to maximum instances limit
|
- `409 Conflict`: Cannot start instance due to maximum instances limit
|
||||||
|
|
||||||
|
|||||||
@@ -67,6 +67,9 @@ plugins:
|
|||||||
javascript_dir: js
|
javascript_dir: js
|
||||||
canonical_version: null
|
canonical_version: null
|
||||||
|
|
||||||
|
hooks:
|
||||||
|
- docs/readme_sync.py
|
||||||
|
|
||||||
extra:
|
extra:
|
||||||
version:
|
version:
|
||||||
provider: mike
|
provider: mike
|
||||||
|
|||||||
@@ -551,7 +551,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc {
|
|||||||
// @Accept json
|
// @Accept json
|
||||||
// @Produces json
|
// @Produces json
|
||||||
// @Success 200 "OpenAI response"
|
// @Success 200 "OpenAI response"
|
||||||
// @Failure 400 {string} string "Invalid request body or model name"
|
// @Failure 400 {string} string "Invalid request body or instance name"
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
// @Router /v1/ [post]
|
// @Router /v1/ [post]
|
||||||
func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
||||||
@@ -564,7 +564,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
r.Body.Close()
|
r.Body.Close()
|
||||||
|
|
||||||
// Parse the body to extract model name
|
// Parse the body to extract instance name
|
||||||
var requestBody map[string]any
|
var requestBody map[string]any
|
||||||
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
|
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
@@ -573,11 +573,11 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|||||||
|
|
||||||
modelName, ok := requestBody["model"].(string)
|
modelName, ok := requestBody["model"].(string)
|
||||||
if !ok || modelName == "" {
|
if !ok || modelName == "" {
|
||||||
http.Error(w, "Model name is required", http.StatusBadRequest)
|
http.Error(w, "Instance name is required", http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Route to the appropriate inst based on model name
|
// Route to the appropriate inst based on instance name
|
||||||
inst, err := h.InstanceManager.GetInstance(modelName)
|
inst, err := h.InstanceManager.GetInstance(modelName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
|||||||
Reference in New Issue
Block a user