diff --git a/.gitignore b/.gitignore index 160028f..fda26ed 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,6 @@ go.work.sum # .vscode/ node_modules/ -dist/ \ No newline at end of file +dist/ + +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index 31c827c..99eb77e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) -**Unified management and routing for llama.cpp and MLX models with web dashboard.** +**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.** ## Features @@ -12,7 +12,7 @@ - **State Persistence**: Ensure instances remain intact across server restarts ### 🔗 Universal Compatibility -- **OpenAI API Compatible**: Drop-in replacement - route requests by model name +- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM ### 🌐 User-Friendly Interface diff --git a/docs/index.md b/docs/index.md index 585363c..501d426 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,24 +1,16 @@ # Llamactl Documentation -Welcome to the Llamactl documentation! **Management server and proxy for multiple llama.cpp and MLX instances with OpenAI-compatible API routing.** +Welcome to the Llamactl documentation! ![Dashboard Screenshot](images/dashboard.png) ## What is Llamactl? -Llamactl is designed to simplify the deployment and management of llama-server and MLX instances. It provides a modern solution for running multiple large language models with centralized management and multi-backend support. +**{{HEADLINE}}** ## Features -🚀 **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) -🔗 **OpenAI API Compatible**: Drop-in replacement - route requests by model name -🍎 **Multi-Backend Support**: Native support for both llama.cpp and MLX (Apple Silicon optimized) -🌐 **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) -🔐 **API Key Authentication**: Separate keys for management vs inference access -📊 **Instance Monitoring**: Health checks, auto-restart, log management -⚡ **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits -💡 **On-Demand Instance Start**: Automatically launch instances upon receiving OpenAI-compatible API requests -💾 **State Persistence**: Ensure instances remain intact across server restarts +{{FEATURES}} ## Quick Links diff --git a/docs/readme_sync.py b/docs/readme_sync.py new file mode 100644 index 0000000..1325cdc --- /dev/null +++ b/docs/readme_sync.py @@ -0,0 +1,59 @@ +""" +MkDocs hook to sync content from README.md to docs/index.md +""" +import re +import os + + +def on_page_markdown(markdown, page, config, **kwargs): + """Process markdown content before rendering""" + # Only process the index.md file + if page.file.src_path != 'index.md': + return markdown + + # Get the path to README.md (relative to mkdocs.yml) + readme_path = os.path.join(os.path.dirname(config['config_file_path']), 'README.md') + + if not os.path.exists(readme_path): + print(f"Warning: README.md not found at {readme_path}") + return markdown + + try: + with open(readme_path, 'r', encoding='utf-8') as f: + readme_content = f.read() + except Exception as e: + print(f"Error reading README.md: {e}") + return markdown + + # Extract headline (the text in bold after the title) + headline_match = re.search(r'\*\*(.*?)\*\*', readme_content) + headline = headline_match.group(1) if headline_match else 'Management server for llama.cpp and MLX instances' + + # Extract features section - everything between ## Features and the next ## heading + features_match = re.search(r'## Features\n(.*?)(?=\n## |\Z)', readme_content, re.DOTALL) + if features_match: + features_content = features_match.group(1).strip() + # Just add line breaks at the end of each line for proper MkDocs rendering + features_with_breaks = add_line_breaks(features_content) + else: + features_with_breaks = "Features content not found in README.md" + + # Replace placeholders in the markdown + markdown = markdown.replace('{{HEADLINE}}', headline) + markdown = markdown.replace('{{FEATURES}}', features_with_breaks) + + return markdown + + +def add_line_breaks(content): + """Add two spaces at the end of each line for proper MkDocs line breaks""" + lines = content.split('\n') + processed_lines = [] + + for line in lines: + if line.strip(): # Only add spaces to non-empty lines + processed_lines.append(line.rstrip() + ' ') + else: + processed_lines.append(line) + + return '\n'.join(processed_lines) \ No newline at end of file diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md index 27189e3..348c1c0 100644 --- a/docs/user-guide/api-reference.md +++ b/docs/user-guide/api-reference.md @@ -310,7 +310,7 @@ POST /v1/reranking The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md). **Error Responses:** -- `400 Bad Request`: Invalid request body or missing model name +- `400 Bad Request`: Invalid request body or missing instance name - `503 Service Unavailable`: Instance is not running and on-demand start is disabled - `409 Conflict`: Cannot start instance due to maximum instances limit diff --git a/mkdocs.yml b/mkdocs.yml index cc69245..a4f51ea 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,6 +67,9 @@ plugins: javascript_dir: js canonical_version: null +hooks: + - docs/readme_sync.py + extra: version: provider: mike diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 0d74851..d1c4d08 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -551,7 +551,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc { // @Accept json // @Produces json // @Success 200 "OpenAI response" -// @Failure 400 {string} string "Invalid request body or model name" +// @Failure 400 {string} string "Invalid request body or instance name" // @Failure 500 {string} string "Internal Server Error" // @Router /v1/ [post] func (h *Handler) OpenAIProxy() http.HandlerFunc { @@ -564,7 +564,7 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { } r.Body.Close() - // Parse the body to extract model name + // Parse the body to extract instance name var requestBody map[string]any if err := json.Unmarshal(bodyBytes, &requestBody); err != nil { http.Error(w, "Invalid request body", http.StatusBadRequest) @@ -573,11 +573,11 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc { modelName, ok := requestBody["model"].(string) if !ok || modelName == "" { - http.Error(w, "Model name is required", http.StatusBadRequest) + http.Error(w, "Instance name is required", http.StatusBadRequest) return } - // Route to the appropriate inst based on model name + // Route to the appropriate inst based on instance name inst, err := h.InstanceManager.GetInstance(modelName) if err != nil { http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)