diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1f4a50e..9ffc0a3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -86,7 +86,7 @@ go install github.com/swaggo/swag/cmd/swag@latest # Update Swagger comments in pkg/server/handlers.go # Then regenerate docs -swag init -g cmd/server/main.go -o apidocs +swag init -g cmd/server/main.go ``` ## Pull Request Guidelines diff --git a/README.md b/README.md index d9fea15..9f2039b 100644 --- a/README.md +++ b/README.md @@ -4,133 +4,32 @@ **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.** -## Features - -### 🚀 Easy Model Management -- **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality) -- **On-Demand Instance Start**: Automatically launch instances upon receiving API requests -- **State Persistence**: Ensure instances remain intact across server restarts - -### 🔗 Universal Compatibility -- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name -- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM -- **Docker Support**: Run backends in containers - -### 🌐 User-Friendly Interface -- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) -- **API Key Authentication**: Separate keys for management vs inference access - -### ⚡ Smart Operations -- **Instance Monitoring**: Health checks, auto-restart, log management -- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits -- **Environment Variables**: Set custom environment variables per instance for advanced configuration - -### 🔗 Remote Instance Deployment -- **Remote Node Support**: Deploy instances on remote hosts -- **Central Management**: Manage remote instances from a single dashboard -- **Seamless Routing**: Automatic request routing to remote instances +📚 **[Full Documentation →](https://llamactl.org)** ![Dashboard Screenshot](docs/images/dashboard.png) +## Features + +**🚀 Easy Model Management** +- **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality) +- **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits +- **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs + +**🔗 Flexible Integration** +- **OpenAI API Compatible**: Drop-in replacement - route requests to different models by instance name +- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM +- **Docker Ready**: Run backends in containers with full GPU support + +**🌐 Distributed Deployment** +- **Remote Instances**: Deploy instances on remote hosts +- **Central Management**: Manage everything from a single dashboard with automatic routing + ## Quick Start -```bash -# 1. Install backend (one-time setup) -# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start -# For MLX on macOS: pip install mlx-lm -# For vLLM: pip install vllm -# Or use Docker - no local installation required - -# 2. Download and run llamactl -LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') -curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz -sudo mv llamactl /usr/local/bin/ - -# 3. Start the server -llamactl -# Access dashboard at http://localhost:8080 -``` - -## Usage - -### Create and manage instances via web dashboard: -1. Open http://localhost:8080 -2. Click "Create Instance" -3. Choose backend type (llama.cpp, MLX, or vLLM) -4. Set model path and backend-specific options -5. Configure environment variables if needed (optional) -6. Start or stop the instance - -### Or use the REST API: -```bash -# Create llama.cpp instance -curl -X POST localhost:8080/api/v1/instances/my-7b-model \ - -H "Authorization: Bearer your-key" \ - -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}' - -# Create MLX instance (macOS) -curl -X POST localhost:8080/api/v1/instances/my-mlx-model \ - -H "Authorization: Bearer your-key" \ - -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' - -# Create vLLM instance with environment variables -curl -X POST localhost:8080/api/v1/instances/my-vllm-model \ - -H "Authorization: Bearer your-key" \ - -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}' - -# Use with OpenAI SDK -curl -X POST localhost:8080/v1/chat/completions \ - -H "Authorization: Bearer your-key" \ - -d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}' -``` - -## Installation - -### Option 1: Download Binary (Recommended) - -```bash -# Linux/macOS - Get latest version and download -LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') -curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz -sudo mv llamactl /usr/local/bin/ - -# Or download manually from the releases page: -# https://github.com/lordmathis/llamactl/releases/latest - -# Windows - Download from releases page -``` - -### Option 2: Docker (No local backend installation required) - -```bash -# Clone repository and build Docker images -git clone https://github.com/lordmathis/llamactl.git -cd llamactl -mkdir -p data/llamacpp data/vllm models - -# Build and start llamactl with llama.cpp CUDA backend -docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d - -# Build and start llamactl with vLLM CUDA backend -docker-compose -f docker/docker-compose.yml up llamactl-vllm -d - -# Build from source using multi-stage build -docker build -f docker/Dockerfile.source -t llamactl:source . -``` - -**Features:** CUDA support, automatic latest release installation, no backend dependencies. -**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.). - -For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md). - -### Option 3: Build from Source -Requires Go 1.24+ and Node.js 22+ -```bash -git clone https://github.com/lordmathis/llamactl.git -cd llamactl -cd webui && npm ci && npm run build && cd .. -go build -o llamactl ./cmd/server -``` +1. Install a backend (llama.cpp, MLX, or vLLM) - see [Prerequisites](#prerequisites) below +2. [Download llamactl](#installation) for your platform +3. Run `llamactl` and open http://localhost:8080 +4. Create an instance and start inferencing! ## Prerequisites @@ -175,9 +74,9 @@ pip install vllm # Or use Docker - no local installation required ``` -## Backend Docker Support +### Docker Support -llamactl can run backends in Docker containers: +llamactl can run backends in Docker containers, eliminating the need for local backend installation: ```yaml backends: @@ -189,9 +88,58 @@ backends: enabled: true ``` -**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support. +## Installation -For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md). +### Option 1: Download Binary (Recommended) + +```bash +# Linux/macOS - Get latest version and download +LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') +curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz +sudo mv llamactl /usr/local/bin/ + +# Or download manually from the releases page: +# https://github.com/lordmathis/llamactl/releases/latest + +# Windows - Download from releases page +``` + +### Option 2: Docker (No local backend installation required) + +```bash +# Clone repository and build Docker images +git clone https://github.com/lordmathis/llamactl.git +cd llamactl +mkdir -p data/llamacpp data/vllm models + +# Build and start llamactl with llama.cpp CUDA backend +docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d + +# Build and start llamactl with vLLM CUDA backend +docker-compose -f docker/docker-compose.yml up llamactl-vllm -d + +# Build from source using multi-stage build +docker build -f docker/Dockerfile.source -t llamactl:source . +``` + +**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.). + +### Option 3: Build from Source +Requires Go 1.24+ and Node.js 22+ +```bash +git clone https://github.com/lordmathis/llamactl.git +cd llamactl +cd webui && npm ci && npm run build && cd .. +go build -o llamactl ./cmd/server +``` + +## Usage + +1. Open http://localhost:8080 +2. Click "Create Instance" +3. Choose backend type (llama.cpp, MLX, or vLLM) +4. Configure your model and options (ports and API keys are auto-assigned) +5. Start the instance and use it with any OpenAI-compatible client ## Configuration @@ -213,7 +161,7 @@ backends: docker: enabled: false image: "ghcr.io/ggml-org/llama.cpp:server" - args: ["run", "--rm", "--network", "host", "--gpus", "all"] + args: ["run", "--rm", "--network", "host", "--gpus", "all", "-v", "~/.local/share/llamactl/llama.cpp:/root/.cache/llama.cpp"] environment: {} # Environment variables for the container vllm: @@ -223,7 +171,7 @@ backends: docker: enabled: false image: "vllm/vllm-openai:latest" - args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g", "-v", "~/.local/share/llamactl/huggingface:/root/.cache/huggingface"] environment: {} # Environment variables for the container mlx: diff --git a/cmd/server/main.go b/cmd/server/main.go index 2cba231..dee87ae 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -22,6 +22,9 @@ var buildTime string = "unknown" // @license.name MIT License // @license.url https://opensource.org/license/mit/ // @basePath /api/v1 +// @securityDefinitions.apikey ApiKeyAuth +// @in header +// @name X-API-Key func main() { // --version flag to print the version diff --git a/docker/Dockerfile.source b/docker/Dockerfile.source index 2b4482f..9cc610a 100644 --- a/docker/Dockerfile.source +++ b/docker/Dockerfile.source @@ -33,7 +33,7 @@ RUN go mod download # Copy source code COPY cmd/ ./cmd/ COPY pkg/ ./pkg/ -COPY apidocs/ ./apidocs/ +COPY docs/ ./docs/ COPY webui/webui.go ./webui/ # Copy built webui from webui-builder diff --git a/docs-requirements.txt b/docs-requirements.txt index 632c6e1..05b03c0 100644 --- a/docs-requirements.txt +++ b/docs-requirements.txt @@ -1,5 +1,6 @@ -mkdocs-material==9.5.3 -mkdocs==1.5.3 -pymdown-extensions==10.7 -mkdocs-git-revision-date-localized-plugin==1.2.4 -mike==2.0.0 +mkdocs-material==9.6.22 +mkdocs==1.6.1 +pymdown-extensions==10.16.1 +mkdocs-git-revision-date-localized-plugin==1.4.7 +mike==2.1.3 +neoteroi-mkdocs==1.1.3 \ No newline at end of file diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..339c609 --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1 @@ +[OAD(swagger.yaml)] \ No newline at end of file diff --git a/docs/getting-started/configuration.md b/docs/configuration.md similarity index 99% rename from docs/getting-started/configuration.md rename to docs/configuration.md index 6f9ee98..c271f29 100644 --- a/docs/getting-started/configuration.md +++ b/docs/configuration.md @@ -80,7 +80,7 @@ nodes: # Node configuration for multi-node deployment ### Configuration File Locations -Configuration files are searched in the following locations (in order of precedence): +Configuration files are searched in the following locations (in order of precedence, first found is used): **Linux:** - `./llamactl.yaml` or `./config.yaml` (current directory) diff --git a/docs/css/css-v1.1.3.css b/docs/css/css-v1.1.3.css new file mode 100644 index 0000000..e9daefe --- /dev/null +++ b/docs/css/css-v1.1.3.css @@ -0,0 +1,1814 @@ +/** + * All CSS for the neoteroi-mkdocs extensions. + * + * https://github.com/Neoteroi/mkdocs-plugins +**/ +:root { + --nt-color-0: #CD853F; + --nt-color-1: #B22222; + --nt-color-2: #000080; + --nt-color-3: #4B0082; + --nt-color-4: #3CB371; + --nt-color-5: #D2B48C; + --nt-color-6: #FF00FF; + --nt-color-7: #98FB98; + --nt-color-8: #FFEBCD; + --nt-color-9: #2E8B57; + --nt-color-10: #6A5ACD; + --nt-color-11: #48D1CC; + --nt-color-12: #FFA500; + --nt-color-13: #F4A460; + --nt-color-14: #A52A2A; + --nt-color-15: #FFE4C4; + --nt-color-16: #FF4500; + --nt-color-17: #AFEEEE; + --nt-color-18: #FA8072; + --nt-color-19: #2F4F4F; + --nt-color-20: #FFDAB9; + --nt-color-21: #BC8F8F; + --nt-color-22: #FFC0CB; + --nt-color-23: #00FA9A; + --nt-color-24: #F0FFF0; + --nt-color-25: #FFFACD; + --nt-color-26: #F5F5F5; + --nt-color-27: #FF6347; + --nt-color-28: #FFFFF0; + --nt-color-29: #7FFFD4; + --nt-color-30: #E9967A; + --nt-color-31: #7B68EE; + --nt-color-32: #FFF8DC; + --nt-color-33: #0000CD; + --nt-color-34: #D2691E; + --nt-color-35: #708090; + --nt-color-36: #5F9EA0; + --nt-color-37: #008080; + --nt-color-38: #008000; + --nt-color-39: #FFE4E1; + --nt-color-40: #FFFF00; + --nt-color-41: #FFFAF0; + --nt-color-42: #DCDCDC; + --nt-color-43: #ADFF2F; + --nt-color-44: #ADD8E6; + --nt-color-45: #8B008B; + --nt-color-46: #7FFF00; + --nt-color-47: #800000; + --nt-color-48: #20B2AA; + --nt-color-49: #556B2F; + --nt-color-50: #778899; + --nt-color-51: #E6E6FA; + --nt-color-52: #FFFAFA; + --nt-color-53: #FF7F50; + --nt-color-54: #FF0000; + --nt-color-55: #F5DEB3; + --nt-color-56: #008B8B; + --nt-color-57: #66CDAA; + --nt-color-58: #808000; + --nt-color-59: #FAF0E6; + --nt-color-60: #00BFFF; + --nt-color-61: #C71585; + --nt-color-62: #00FFFF; + --nt-color-63: #8B4513; + --nt-color-64: #F0F8FF; + --nt-color-65: #FAEBD7; + --nt-color-66: #8B0000; + --nt-color-67: #4682B4; + --nt-color-68: #F0E68C; + --nt-color-69: #BDB76B; + --nt-color-70: #A0522D; + --nt-color-71: #FAFAD2; + --nt-color-72: #FFD700; + --nt-color-73: #DEB887; + --nt-color-74: #E0FFFF; + --nt-color-75: #8A2BE2; + --nt-color-76: #32CD32; + --nt-color-77: #87CEFA; + --nt-color-78: #00CED1; + --nt-color-79: #696969; + --nt-color-80: #DDA0DD; + --nt-color-81: #EE82EE; + --nt-color-82: #FFB6C1; + --nt-color-83: #8FBC8F; + --nt-color-84: #D8BFD8; + --nt-color-85: #9400D3; + --nt-color-86: #A9A9A9; + --nt-color-87: #FFFFE0; + --nt-color-88: #FFF5EE; + --nt-color-89: #FFF0F5; + --nt-color-90: #FFDEAD; + --nt-color-91: #800080; + --nt-color-92: #B0E0E6; + --nt-color-93: #9932CC; + --nt-color-94: #DAA520; + --nt-color-95: #F0FFFF; + --nt-color-96: #40E0D0; + --nt-color-97: #00FF7F; + --nt-color-98: #006400; + --nt-color-99: #808080; + --nt-color-100: #87CEEB; + --nt-color-101: #0000FF; + --nt-color-102: #6495ED; + --nt-color-103: #FDF5E6; + --nt-color-104: #B8860B; + --nt-color-105: #BA55D3; + --nt-color-106: #C0C0C0; + --nt-color-107: #000000; + --nt-color-108: #F08080; + --nt-color-109: #B0C4DE; + --nt-color-110: #00008B; + --nt-color-111: #6B8E23; + --nt-color-112: #FFE4B5; + --nt-color-113: #FFA07A; + --nt-color-114: #9ACD32; + --nt-color-115: #FFFFFF; + --nt-color-116: #F5F5DC; + --nt-color-117: #90EE90; + --nt-color-118: #1E90FF; + --nt-color-119: #7CFC00; + --nt-color-120: #FF69B4; + --nt-color-121: #F8F8FF; + --nt-color-122: #F5FFFA; + --nt-color-123: #00FF00; + --nt-color-124: #D3D3D3; + --nt-color-125: #DB7093; + --nt-color-126: #DA70D6; + --nt-color-127: #FF1493; + --nt-color-128: #228B22; + --nt-color-129: #FFEFD5; + --nt-color-130: #4169E1; + --nt-color-131: #191970; + --nt-color-132: #9370DB; + --nt-color-133: #483D8B; + --nt-color-134: #FF8C00; + --nt-color-135: #EEE8AA; + --nt-color-136: #CD5C5C; + --nt-color-137: #DC143C; +} + +:root { + --nt-group-0-main: #000000; + --nt-group-0-dark: #FFFFFF; + --nt-group-0-light: #000000; + --nt-group-0-main-bg: #F44336; + --nt-group-0-dark-bg: #BA000D; + --nt-group-0-light-bg: #FF7961; + --nt-group-1-main: #000000; + --nt-group-1-dark: #FFFFFF; + --nt-group-1-light: #000000; + --nt-group-1-main-bg: #E91E63; + --nt-group-1-dark-bg: #B0003A; + --nt-group-1-light-bg: #FF6090; + --nt-group-2-main: #FFFFFF; + --nt-group-2-dark: #FFFFFF; + --nt-group-2-light: #000000; + --nt-group-2-main-bg: #9C27B0; + --nt-group-2-dark-bg: #6A0080; + --nt-group-2-light-bg: #D05CE3; + --nt-group-3-main: #FFFFFF; + --nt-group-3-dark: #FFFFFF; + --nt-group-3-light: #000000; + --nt-group-3-main-bg: #673AB7; + --nt-group-3-dark-bg: #320B86; + --nt-group-3-light-bg: #9A67EA; + --nt-group-4-main: #FFFFFF; + --nt-group-4-dark: #FFFFFF; + --nt-group-4-light: #000000; + --nt-group-4-main-bg: #3F51B5; + --nt-group-4-dark-bg: #002984; + --nt-group-4-light-bg: #757DE8; + --nt-group-5-main: #000000; + --nt-group-5-dark: #FFFFFF; + --nt-group-5-light: #000000; + --nt-group-5-main-bg: #2196F3; + --nt-group-5-dark-bg: #0069C0; + --nt-group-5-light-bg: #6EC6FF; + --nt-group-6-main: #000000; + --nt-group-6-dark: #FFFFFF; + --nt-group-6-light: #000000; + --nt-group-6-main-bg: #03A9F4; + --nt-group-6-dark-bg: #007AC1; + --nt-group-6-light-bg: #67DAFF; + --nt-group-7-main: #000000; + --nt-group-7-dark: #000000; + --nt-group-7-light: #000000; + --nt-group-7-main-bg: #00BCD4; + --nt-group-7-dark-bg: #008BA3; + --nt-group-7-light-bg: #62EFFF; + --nt-group-8-main: #000000; + --nt-group-8-dark: #FFFFFF; + --nt-group-8-light: #000000; + --nt-group-8-main-bg: #009688; + --nt-group-8-dark-bg: #00675B; + --nt-group-8-light-bg: #52C7B8; + --nt-group-9-main: #000000; + --nt-group-9-dark: #FFFFFF; + --nt-group-9-light: #000000; + --nt-group-9-main-bg: #4CAF50; + --nt-group-9-dark-bg: #087F23; + --nt-group-9-light-bg: #80E27E; + --nt-group-10-main: #000000; + --nt-group-10-dark: #000000; + --nt-group-10-light: #000000; + --nt-group-10-main-bg: #8BC34A; + --nt-group-10-dark-bg: #5A9216; + --nt-group-10-light-bg: #BEF67A; + --nt-group-11-main: #000000; + --nt-group-11-dark: #000000; + --nt-group-11-light: #000000; + --nt-group-11-main-bg: #CDDC39; + --nt-group-11-dark-bg: #99AA00; + --nt-group-11-light-bg: #FFFF6E; + --nt-group-12-main: #000000; + --nt-group-12-dark: #000000; + --nt-group-12-light: #000000; + --nt-group-12-main-bg: #FFEB3B; + --nt-group-12-dark-bg: #C8B900; + --nt-group-12-light-bg: #FFFF72; + --nt-group-13-main: #000000; + --nt-group-13-dark: #000000; + --nt-group-13-light: #000000; + --nt-group-13-main-bg: #FFC107; + --nt-group-13-dark-bg: #C79100; + --nt-group-13-light-bg: #FFF350; + --nt-group-14-main: #000000; + --nt-group-14-dark: #000000; + --nt-group-14-light: #000000; + --nt-group-14-main-bg: #FF9800; + --nt-group-14-dark-bg: #C66900; + --nt-group-14-light-bg: #FFC947; + --nt-group-15-main: #000000; + --nt-group-15-dark: #FFFFFF; + --nt-group-15-light: #000000; + --nt-group-15-main-bg: #FF5722; + --nt-group-15-dark-bg: #C41C00; + --nt-group-15-light-bg: #FF8A50; + --nt-group-16-main: #FFFFFF; + --nt-group-16-dark: #FFFFFF; + --nt-group-16-light: #000000; + --nt-group-16-main-bg: #795548; + --nt-group-16-dark-bg: #4B2C20; + --nt-group-16-light-bg: #A98274; + --nt-group-17-main: #000000; + --nt-group-17-dark: #FFFFFF; + --nt-group-17-light: #000000; + --nt-group-17-main-bg: #9E9E9E; + --nt-group-17-dark-bg: #707070; + --nt-group-17-light-bg: #CFCFCF; + --nt-group-18-main: #000000; + --nt-group-18-dark: #FFFFFF; + --nt-group-18-light: #000000; + --nt-group-18-main-bg: #607D8B; + --nt-group-18-dark-bg: #34515E; + --nt-group-18-light-bg: #8EACBB; +} + +.nt-pastello { + --nt-group-0-main: #000000; + --nt-group-0-dark: #000000; + --nt-group-0-light: #000000; + --nt-group-0-main-bg: #EF9A9A; + --nt-group-0-dark-bg: #BA6B6C; + --nt-group-0-light-bg: #FFCCCB; + --nt-group-1-main: #000000; + --nt-group-1-dark: #000000; + --nt-group-1-light: #000000; + --nt-group-1-main-bg: #F48FB1; + --nt-group-1-dark-bg: #BF5F82; + --nt-group-1-light-bg: #FFC1E3; + --nt-group-2-main: #000000; + --nt-group-2-dark: #000000; + --nt-group-2-light: #000000; + --nt-group-2-main-bg: #CE93D8; + --nt-group-2-dark-bg: #9C64A6; + --nt-group-2-light-bg: #FFC4FF; + --nt-group-3-main: #000000; + --nt-group-3-dark: #000000; + --nt-group-3-light: #000000; + --nt-group-3-main-bg: #B39DDB; + --nt-group-3-dark-bg: #836FA9; + --nt-group-3-light-bg: #E6CEFF; + --nt-group-4-main: #000000; + --nt-group-4-dark: #000000; + --nt-group-4-light: #000000; + --nt-group-4-main-bg: #9FA8DA; + --nt-group-4-dark-bg: #6F79A8; + --nt-group-4-light-bg: #D1D9FF; + --nt-group-5-main: #000000; + --nt-group-5-dark: #000000; + --nt-group-5-light: #000000; + --nt-group-5-main-bg: #90CAF9; + --nt-group-5-dark-bg: #5D99C6; + --nt-group-5-light-bg: #C3FDFF; + --nt-group-6-main: #000000; + --nt-group-6-dark: #000000; + --nt-group-6-light: #000000; + --nt-group-6-main-bg: #81D4FA; + --nt-group-6-dark-bg: #4BA3C7; + --nt-group-6-light-bg: #B6FFFF; + --nt-group-7-main: #000000; + --nt-group-7-dark: #000000; + --nt-group-7-light: #000000; + --nt-group-7-main-bg: #80DEEA; + --nt-group-7-dark-bg: #4BACB8; + --nt-group-7-light-bg: #B4FFFF; + --nt-group-8-main: #000000; + --nt-group-8-dark: #000000; + --nt-group-8-light: #000000; + --nt-group-8-main-bg: #80CBC4; + --nt-group-8-dark-bg: #4F9A94; + --nt-group-8-light-bg: #B2FEF7; + --nt-group-9-main: #000000; + --nt-group-9-dark: #000000; + --nt-group-9-light: #000000; + --nt-group-9-main-bg: #A5D6A7; + --nt-group-9-dark-bg: #75A478; + --nt-group-9-light-bg: #D7FFD9; + --nt-group-10-main: #000000; + --nt-group-10-dark: #000000; + --nt-group-10-light: #000000; + --nt-group-10-main-bg: #C5E1A5; + --nt-group-10-dark-bg: #94AF76; + --nt-group-10-light-bg: #F8FFD7; + --nt-group-11-main: #000000; + --nt-group-11-dark: #000000; + --nt-group-11-light: #000000; + --nt-group-11-main-bg: #E6EE9C; + --nt-group-11-dark-bg: #B3BC6D; + --nt-group-11-light-bg: #FFFFCE; + --nt-group-12-main: #000000; + --nt-group-12-dark: #000000; + --nt-group-12-light: #000000; + --nt-group-12-main-bg: #FFF59D; + --nt-group-12-dark-bg: #CBC26D; + --nt-group-12-light-bg: #FFFFCF; + --nt-group-13-main: #000000; + --nt-group-13-dark: #000000; + --nt-group-13-light: #000000; + --nt-group-13-main-bg: #FFE082; + --nt-group-13-dark-bg: #CAAE53; + --nt-group-13-light-bg: #FFFFB3; + --nt-group-14-main: #000000; + --nt-group-14-dark: #000000; + --nt-group-14-light: #000000; + --nt-group-14-main-bg: #FFCC80; + --nt-group-14-dark-bg: #CA9B52; + --nt-group-14-light-bg: #FFFFB0; + --nt-group-15-main: #000000; + --nt-group-15-dark: #000000; + --nt-group-15-light: #000000; + --nt-group-15-main-bg: #FFAB91; + --nt-group-15-dark-bg: #C97B63; + --nt-group-15-light-bg: #FFDDC1; + --nt-group-16-main: #000000; + --nt-group-16-dark: #000000; + --nt-group-16-light: #000000; + --nt-group-16-main-bg: #BCAAA4; + --nt-group-16-dark-bg: #8C7B75; + --nt-group-16-light-bg: #EFDCD5; + --nt-group-17-main: #000000; + --nt-group-17-dark: #000000; + --nt-group-17-light: #000000; + --nt-group-17-main-bg: #EEEEEE; + --nt-group-17-dark-bg: #BCBCBC; + --nt-group-17-light-bg: #FFFFFF; + --nt-group-18-main: #000000; + --nt-group-18-dark: #000000; + --nt-group-18-light: #000000; + --nt-group-18-main-bg: #B0BEC5; + --nt-group-18-dark-bg: #808E95; + --nt-group-18-light-bg: #E2F1F8; +} + +.nt-group-0 .nt-plan-group-summary, +.nt-group-0 .nt-timeline-dot { + color: var(--nt-group-0-dark); + background-color: var(--nt-group-0-dark-bg); +} +.nt-group-0 .period { + color: var(--nt-group-0-main); + background-color: var(--nt-group-0-main-bg); +} + +.nt-group-1 .nt-plan-group-summary, +.nt-group-1 .nt-timeline-dot { + color: var(--nt-group-1-dark); + background-color: var(--nt-group-1-dark-bg); +} +.nt-group-1 .period { + color: var(--nt-group-1-main); + background-color: var(--nt-group-1-main-bg); +} + +.nt-group-2 .nt-plan-group-summary, +.nt-group-2 .nt-timeline-dot { + color: var(--nt-group-2-dark); + background-color: var(--nt-group-2-dark-bg); +} +.nt-group-2 .period { + color: var(--nt-group-2-main); + background-color: var(--nt-group-2-main-bg); +} + +.nt-group-3 .nt-plan-group-summary, +.nt-group-3 .nt-timeline-dot { + color: var(--nt-group-3-dark); + background-color: var(--nt-group-3-dark-bg); +} +.nt-group-3 .period { + color: var(--nt-group-3-main); + background-color: var(--nt-group-3-main-bg); +} + +.nt-group-4 .nt-plan-group-summary, +.nt-group-4 .nt-timeline-dot { + color: var(--nt-group-4-dark); + background-color: var(--nt-group-4-dark-bg); +} +.nt-group-4 .period { + color: var(--nt-group-4-main); + background-color: var(--nt-group-4-main-bg); +} + +.nt-group-5 .nt-plan-group-summary, +.nt-group-5 .nt-timeline-dot { + color: var(--nt-group-5-dark); + background-color: var(--nt-group-5-dark-bg); +} +.nt-group-5 .period { + color: var(--nt-group-5-main); + background-color: var(--nt-group-5-main-bg); +} + +.nt-group-6 .nt-plan-group-summary, +.nt-group-6 .nt-timeline-dot { + color: var(--nt-group-6-dark); + background-color: var(--nt-group-6-dark-bg); +} +.nt-group-6 .period { + color: var(--nt-group-6-main); + background-color: var(--nt-group-6-main-bg); +} + +.nt-group-7 .nt-plan-group-summary, +.nt-group-7 .nt-timeline-dot { + color: var(--nt-group-7-dark); + background-color: var(--nt-group-7-dark-bg); +} +.nt-group-7 .period { + color: var(--nt-group-7-main); + background-color: var(--nt-group-7-main-bg); +} + +.nt-group-8 .nt-plan-group-summary, +.nt-group-8 .nt-timeline-dot { + color: var(--nt-group-8-dark); + background-color: var(--nt-group-8-dark-bg); +} +.nt-group-8 .period { + color: var(--nt-group-8-main); + background-color: var(--nt-group-8-main-bg); +} + +.nt-group-9 .nt-plan-group-summary, +.nt-group-9 .nt-timeline-dot { + color: var(--nt-group-9-dark); + background-color: var(--nt-group-9-dark-bg); +} +.nt-group-9 .period { + color: var(--nt-group-9-main); + background-color: var(--nt-group-9-main-bg); +} + +.nt-group-10 .nt-plan-group-summary, +.nt-group-10 .nt-timeline-dot { + color: var(--nt-group-10-dark); + background-color: var(--nt-group-10-dark-bg); +} +.nt-group-10 .period { + color: var(--nt-group-10-main); + background-color: var(--nt-group-10-main-bg); +} + +.nt-group-11 .nt-plan-group-summary, +.nt-group-11 .nt-timeline-dot { + color: var(--nt-group-11-dark); + background-color: var(--nt-group-11-dark-bg); +} +.nt-group-11 .period { + color: var(--nt-group-11-main); + background-color: var(--nt-group-11-main-bg); +} + +.nt-group-12 .nt-plan-group-summary, +.nt-group-12 .nt-timeline-dot { + color: var(--nt-group-12-dark); + background-color: var(--nt-group-12-dark-bg); +} +.nt-group-12 .period { + color: var(--nt-group-12-main); + background-color: var(--nt-group-12-main-bg); +} + +.nt-group-13 .nt-plan-group-summary, +.nt-group-13 .nt-timeline-dot { + color: var(--nt-group-13-dark); + background-color: var(--nt-group-13-dark-bg); +} +.nt-group-13 .period { + color: var(--nt-group-13-main); + background-color: var(--nt-group-13-main-bg); +} + +.nt-group-14 .nt-plan-group-summary, +.nt-group-14 .nt-timeline-dot { + color: var(--nt-group-14-dark); + background-color: var(--nt-group-14-dark-bg); +} +.nt-group-14 .period { + color: var(--nt-group-14-main); + background-color: var(--nt-group-14-main-bg); +} + +.nt-group-15 .nt-plan-group-summary, +.nt-group-15 .nt-timeline-dot { + color: var(--nt-group-15-dark); + background-color: var(--nt-group-15-dark-bg); +} +.nt-group-15 .period { + color: var(--nt-group-15-main); + background-color: var(--nt-group-15-main-bg); +} + +.nt-group-16 .nt-plan-group-summary, +.nt-group-16 .nt-timeline-dot { + color: var(--nt-group-16-dark); + background-color: var(--nt-group-16-dark-bg); +} +.nt-group-16 .period { + color: var(--nt-group-16-main); + background-color: var(--nt-group-16-main-bg); +} + +.nt-group-17 .nt-plan-group-summary, +.nt-group-17 .nt-timeline-dot { + color: var(--nt-group-17-dark); + background-color: var(--nt-group-17-dark-bg); +} +.nt-group-17 .period { + color: var(--nt-group-17-main); + background-color: var(--nt-group-17-main-bg); +} + +.nt-group-18 .nt-plan-group-summary, +.nt-group-18 .nt-timeline-dot { + color: var(--nt-group-18-dark); + background-color: var(--nt-group-18-dark-bg); +} +.nt-group-18 .period { + color: var(--nt-group-18-main); + background-color: var(--nt-group-18-main-bg); +} + +/** + * Extra CSS file for MkDocs and the neoteroi.timeline extension. + * + * https://github.com/Neoteroi/mkdocs-plugins +**/ +.nt-error { + border: 2px dashed darkred; + padding: 0 1rem; + background: #faf9ba; + color: darkred; +} + +.nt-timeline { + margin-top: 30px; +} +.nt-timeline .nt-timeline-title { + font-size: 1.1rem; + margin-top: 0; +} +.nt-timeline .nt-timeline-sub-title { + margin-top: 0; +} +.nt-timeline .nt-timeline-content { + font-size: 0.8rem; + border-bottom: 2px dashed #ccc; + padding-bottom: 1.2rem; +} +.nt-timeline.horizontal .nt-timeline-items { + flex-direction: row; + overflow-x: scroll; +} +.nt-timeline.horizontal .nt-timeline-items > div { + min-width: 400px; + margin-right: 50px; +} +.nt-timeline.horizontal.reverse .nt-timeline-items { + flex-direction: row-reverse; +} +.nt-timeline.horizontal.center .nt-timeline-before { + background-image: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%); + background-repeat: no-repeat; + background-size: 100% 2px; + background-position: 0 center; +} +.nt-timeline.horizontal.center .nt-timeline-after { + background-image: linear-gradient(180deg, rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%); + background-repeat: no-repeat; + background-size: 100% 2px; + background-position: 0 center; +} +.nt-timeline.horizontal.center .nt-timeline-items { + background-image: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%); + background-repeat: no-repeat; + background-size: 100% 2px; + background-position: 0 center; +} +.nt-timeline.horizontal .nt-timeline-dot { + left: 50%; +} +.nt-timeline.horizontal .nt-timeline-dot:not(.bigger) { + top: calc(50% - 4px); +} +.nt-timeline.horizontal .nt-timeline-dot.bigger { + top: calc(50% - 15px); +} +.nt-timeline.vertical .nt-timeline-items { + flex-direction: column; +} +.nt-timeline.vertical.reverse .nt-timeline-items { + flex-direction: column-reverse; +} +.nt-timeline.vertical.center .nt-timeline-before { + background: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat center/2px 100%; +} +.nt-timeline.vertical.center .nt-timeline-after { + background: linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat center/2px 100%; +} +.nt-timeline.vertical.center .nt-timeline-items { + background: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat center/2px 100%; +} +.nt-timeline.vertical.center .nt-timeline-dot { + left: calc(50% - 10px); +} +.nt-timeline.vertical.center .nt-timeline-dot:not(.bigger) { + top: 10px; +} +.nt-timeline.vertical.center .nt-timeline-dot.bigger { + left: calc(50% - 20px); +} +.nt-timeline.vertical.left { + padding-left: 100px; +} +.nt-timeline.vertical.left .nt-timeline-item { + padding-left: 70px; +} +.nt-timeline.vertical.left .nt-timeline-sub-title { + left: -100px; + width: 100px; +} +.nt-timeline.vertical.left .nt-timeline-before { + background: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat 30px/2px 100%; +} +.nt-timeline.vertical.left .nt-timeline-after { + background: linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat 30px/2px 100%; +} +.nt-timeline.vertical.left .nt-timeline-items { + background: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat 30px/2px 100%; +} +.nt-timeline.vertical.left .nt-timeline-dot { + left: 21px; + top: 8px; +} +.nt-timeline.vertical.left .nt-timeline-dot.bigger { + top: 0px; + left: 10px; +} +.nt-timeline.vertical.right { + padding-right: 100px; +} +.nt-timeline.vertical.right .nt-timeline-sub-title { + right: -100px; + text-align: left; + width: 100px; +} +.nt-timeline.vertical.right .nt-timeline-item { + padding-right: 70px; +} +.nt-timeline.vertical.right .nt-timeline-before { + background: linear-gradient(rgba(252, 70, 107, 0) 0%, rgb(252, 70, 107) 100%) no-repeat calc(100% - 30px)/2px 100%; +} +.nt-timeline.vertical.right .nt-timeline-after { + background: linear-gradient(rgb(252, 70, 107) 0%, rgba(252, 70, 107, 0) 100%) no-repeat calc(100% - 30px)/2px 100%; +} +.nt-timeline.vertical.right .nt-timeline-items { + background: radial-gradient(circle, rgb(63, 94, 251) 0%, rgb(252, 70, 107) 100%) no-repeat calc(100% - 30px)/2px 100%; +} +.nt-timeline.vertical.right .nt-timeline-dot { + right: 21px; + top: 8px; +} +.nt-timeline.vertical.right .nt-timeline-dot.bigger { + top: 10px; + right: 10px; +} + +.nt-timeline-items { + display: flex; + position: relative; +} +.nt-timeline-items > div { + min-height: 100px; + padding-top: 2px; + padding-bottom: 20px; +} + +.nt-timeline-before { + content: ""; + height: 15px; +} + +.nt-timeline-after { + content: ""; + height: 60px; + margin-bottom: 20px; +} + +.nt-timeline-sub-title { + position: absolute; + width: 50%; + top: 4px; + font-size: 18px; + color: var(--nt-color-50); +} + +[data-md-color-scheme=slate] .nt-timeline-sub-title { + color: var(--nt-color-51); +} + +.nt-timeline-item { + position: relative; +} + +.nt-timeline.vertical.center:not(.alternate) .nt-timeline-item { + padding-left: calc(50% + 40px); +} +.nt-timeline.vertical.center:not(.alternate) .nt-timeline-item .nt-timeline-sub-title { + left: 0; + padding-right: 40px; + text-align: right; +} +.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) { + padding-left: calc(50% + 40px); +} +.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) .nt-timeline-sub-title { + left: 0; + padding-right: 40px; + text-align: right; +} +.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) { + text-align: right; + padding-right: calc(50% + 40px); +} +.nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) .nt-timeline-sub-title { + right: 0; + padding-left: 40px; + text-align: left; +} + +.nt-timeline-dot { + position: relative; + width: 20px; + height: 20px; + border-radius: 100%; + background-color: #fc5b5b; + position: absolute; + top: 0px; + z-index: 2; + display: flex; + justify-content: center; + align-items: center; + box-shadow: 0 2px 1px -1px rgba(0, 0, 0, 0.2), 0 1px 1px 0 rgba(0, 0, 0, 0.14), 0 1px 3px 0 rgba(0, 0, 0, 0.12); + border: 3px solid white; +} +.nt-timeline-dot:not(.bigger) .icon { + font-size: 10px; +} +.nt-timeline-dot.bigger { + width: 40px; + height: 40px; + padding: 3px; +} +.nt-timeline-dot .icon { + color: white; + position: relative; + top: 1px; +} + +/* Fix for webkit (Chrome, Safari) */ +@supports not (-moz-appearance: none) { + /* + This fix is necessary, for some reason, to render the timeline properly + inside `details` elements used by pymdownx. Firefox doesn't need this fix, + it renders elements properly. + */ + details .nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(odd) .nt-timeline-sub-title, +details .nt-timeline.vertical.center:not(.alternate) .nt-timeline-item .nt-timeline-sub-title { + left: -40px; + } + details .nt-timeline.vertical.center.alternate .nt-timeline-item:nth-child(even) .nt-timeline-sub-title { + right: -40px; + } + details .nt-timeline.vertical.center .nt-timeline-dot { + left: calc(50% - 12px); + } + details .nt-timeline-dot.bigger { + font-size: 1rem !important; + } +} +/* default colors */ +.nt-timeline-item:nth-child(0) .nt-timeline-dot { + background-color: var(--nt-color-0); +} + +.nt-timeline-item:nth-child(1) .nt-timeline-dot { + background-color: var(--nt-color-1); +} + +.nt-timeline-item:nth-child(2) .nt-timeline-dot { + background-color: var(--nt-color-2); +} + +.nt-timeline-item:nth-child(3) .nt-timeline-dot { + background-color: var(--nt-color-3); +} + +.nt-timeline-item:nth-child(4) .nt-timeline-dot { + background-color: var(--nt-color-4); +} + +.nt-timeline-item:nth-child(5) .nt-timeline-dot { + background-color: var(--nt-color-5); +} + +.nt-timeline-item:nth-child(6) .nt-timeline-dot { + background-color: var(--nt-color-6); +} + +.nt-timeline-item:nth-child(7) .nt-timeline-dot { + background-color: var(--nt-color-7); +} + +.nt-timeline-item:nth-child(8) .nt-timeline-dot { + background-color: var(--nt-color-8); +} + +.nt-timeline-item:nth-child(9) .nt-timeline-dot { + background-color: var(--nt-color-9); +} + +.nt-timeline-item:nth-child(10) .nt-timeline-dot { + background-color: var(--nt-color-10); +} + +.nt-timeline-item:nth-child(11) .nt-timeline-dot { + background-color: var(--nt-color-11); +} + +.nt-timeline-item:nth-child(12) .nt-timeline-dot { + background-color: var(--nt-color-12); +} + +.nt-timeline-item:nth-child(13) .nt-timeline-dot { + background-color: var(--nt-color-13); +} + +.nt-timeline-item:nth-child(14) .nt-timeline-dot { + background-color: var(--nt-color-14); +} + +.nt-timeline-item:nth-child(15) .nt-timeline-dot { + background-color: var(--nt-color-15); +} + +.nt-timeline-item:nth-child(16) .nt-timeline-dot { + background-color: var(--nt-color-16); +} + +.nt-timeline-item:nth-child(17) .nt-timeline-dot { + background-color: var(--nt-color-17); +} + +.nt-timeline-item:nth-child(18) .nt-timeline-dot { + background-color: var(--nt-color-18); +} + +.nt-timeline-item:nth-child(19) .nt-timeline-dot { + background-color: var(--nt-color-19); +} + +.nt-timeline-item:nth-child(20) .nt-timeline-dot { + background-color: var(--nt-color-20); +} + +/** + * Extra CSS for the neoteroi.projects.gantt extension. + * + * https://github.com/Neoteroi/mkdocs-plugins +**/ +:root { + --nt-scrollbar-color: #2751b0; + --nt-plan-actions-height: 24px; + --nt-units-background: #ff9800; + --nt-months-background: #2751b0; + --nt-plan-vertical-line-color: #a3a3a3ad; +} + +.nt-pastello { + --nt-scrollbar-color: #9fb8f4; + --nt-units-background: #f5dc82; + --nt-months-background: #5b7fd1; +} + +[data-md-color-scheme=slate] { + --nt-units-background: #003773; +} +[data-md-color-scheme=slate] .nt-pastello { + --nt-units-background: #3f4997; +} + +.nt-plan-root { + min-height: 200px; + scrollbar-width: 20px; + scrollbar-color: var(--nt-scrollbar-color); + display: flex; +} +.nt-plan-root ::-webkit-scrollbar { + width: 20px; +} +.nt-plan-root ::-webkit-scrollbar-track { + box-shadow: inset 0 0 5px grey; + border-radius: 10px; +} +.nt-plan-root ::-webkit-scrollbar-thumb { + background: var(--nt-scrollbar-color); + border-radius: 10px; +} +.nt-plan-root .nt-plan { + flex: 80%; +} +.nt-plan-root.no-groups .nt-plan-periods { + padding-left: 0; +} +.nt-plan-root.no-groups .nt-plan-group-summary { + display: none; +} +.nt-plan-root .nt-timeline-dot.bigger { + top: -10px; +} +.nt-plan-root .nt-timeline-dot.bigger[title] { + cursor: help; +} + +.nt-plan { + white-space: nowrap; + overflow-x: auto; + display: flex; +} +.nt-plan .ug-timeline-dot { + left: 368px; + top: -8px; + cursor: help; +} + +.months { + display: flex; +} + +.month { + flex: auto; + display: inline-block; + box-shadow: rgba(0, 0, 0, 0.2) 0px 3px 1px -2px, rgba(0, 0, 0, 0.14) 0px 2px 2px 0px, rgba(0, 0, 0, 0.12) 0px 1px 5px 0px inset; + background-color: var(--nt-months-background); + color: white; + text-transform: uppercase; + font-family: Roboto, Helvetica, Arial, sans-serif; + padding: 2px 5px; + font-size: 12px; + border: 1px solid #000; + width: 150px; + border-radius: 8px; +} + +.nt-plan-group-activities { + flex: auto; + position: relative; +} + +.nt-vline { + border-left: 1px dashed var(--nt-plan-vertical-line-color); + height: 100%; + left: 0; + position: absolute; + margin-left: -0.5px; + top: 0; + -webkit-transition: all 0.5s linear !important; + -moz-transition: all 0.5s linear !important; + -ms-transition: all 0.5s linear !important; + -o-transition: all 0.5s linear !important; + transition: all 0.5s linear !important; + z-index: -2; +} + +.nt-plan-activity { + display: flex; + margin: 2px 0; + background-color: rgba(187, 187, 187, 0.2509803922); +} + +.actions { + height: var(--nt-plan-actions-height); +} + +.actions { + position: relative; +} + +.period { + display: inline-block; + height: var(--nt-plan-actions-height); + width: 120px; + position: absolute; + left: 0px; + background: #1da1f2; + border-radius: 5px; + transition: all 0.5s; + cursor: help; + -webkit-transition: width 1s ease-in-out; + -moz-transition: width 1s ease-in-out; + -o-transition: width 1s ease-in-out; + transition: width 1s ease-in-out; +} +.period .nt-tooltip { + display: none; + top: 30px; + position: relative; + padding: 1rem; + text-align: center; + font-size: 12px; +} +.period:hover .nt-tooltip { + display: inline-block; +} + +.period-0 { + left: 340px; + visibility: visible; + background-color: rgb(69, 97, 101); +} + +.period-1 { + left: 40px; + visibility: visible; + background-color: green; +} + +.period-2 { + left: 120px; + visibility: visible; + background-color: pink; + width: 80px; +} + +.period-3 { + left: 190px; + visibility: visible; + background-color: darkred; + width: 150px; +} + +.weeks > span, +.days > span { + height: 25px; +} + +.weeks > span { + display: inline-block; + margin: 0; + padding: 0; + font-weight: bold; +} +.weeks > span .week-text { + font-size: 10px; + position: absolute; + display: inline-block; + padding: 3px 4px; +} + +.days { + z-index: -2; + position: relative; +} + +.day-text { + font-size: 10px; + position: absolute; + display: inline-block; + padding: 3px 4px; +} + +.period span { + font-size: 12px; + vertical-align: top; + margin-left: 4px; + color: black; + background: rgba(255, 255, 255, 0.6588235294); + border-radius: 6px; + padding: 0 4px; +} + +.weeks, +.days { + height: 20px; + display: flex; + box-sizing: content-box; +} + +.months { + display: flex; +} + +.week, +.day { + height: 20px; + position: relative; + border: 1; + flex: auto; + border: 2px solid white; + border-radius: 4px; + background-color: var(--nt-units-background); + cursor: help; +} + +.years { + display: flex; +} + +.year { + text-align: center; + border-right: 1px solid var(--nt-plan-vertical-line-color); + font-weight: bold; +} +.year:first-child { + border-left: 1px solid var(--nt-plan-vertical-line-color); +} +.year:first-child:last-child { + width: 100%; +} + +.quarters { + display: flex; +} + +.quarter { + width: 12.5%; + text-align: center; + border-right: 1px solid var(--nt-plan-vertical-line-color); + font-weight: bold; +} +.quarter:first-child { + border-left: 1px solid var(--nt-plan-vertical-line-color); +} + +.nt-plan-group { + margin: 20px 0; + position: relative; +} + +.nt-plan-group { + display: flex; +} + +.nt-plan-group-summary { + background: #2751b0; + width: 150px; + white-space: normal; + padding: 0.1rem 0.5rem; + border-radius: 5px; + color: #fff; + z-index: 3; +} +.nt-plan-group-summary p { + margin: 0; + padding: 0; + font-size: 0.6rem; + color: #fff; +} + +.nt-plan-group-summary, +.month, +.period, +.week, +.day, +.nt-tooltip { + border: 3px solid white; + box-shadow: 0 2px 3px -1px rgba(0, 0, 0, 0.2), 0 3px 3px 0 rgba(0, 0, 0, 0.14), 0 1px 5px 0 rgba(0, 0, 0, 0.12); +} + +.nt-plan-periods { + padding-left: 150px; +} + +.months { + z-index: 2; + position: relative; +} + +.weeks { + position: relative; + top: -2px; + z-index: 0; +} + +.month, +.quarter, +.year, +.week, +.day, +.nt-tooltip { + font-family: Roboto, Helvetica, Arial, sans-serif; + box-sizing: border-box; +} + +.nt-cards.nt-grid { + display: grid; + grid-auto-columns: 1fr; + gap: 0.5rem; + max-width: 100vw; + overflow-x: auto; + padding: 1px; +} +.nt-cards.nt-grid.cols-1 { + grid-template-columns: repeat(1, 1fr); +} +.nt-cards.nt-grid.cols-2 { + grid-template-columns: repeat(2, 1fr); +} +.nt-cards.nt-grid.cols-3 { + grid-template-columns: repeat(3, 1fr); +} +.nt-cards.nt-grid.cols-4 { + grid-template-columns: repeat(4, 1fr); +} +.nt-cards.nt-grid.cols-5 { + grid-template-columns: repeat(5, 1fr); +} +.nt-cards.nt-grid.cols-6 { + grid-template-columns: repeat(6, 1fr); +} + +@media only screen and (max-width: 400px) { + .nt-cards.nt-grid { + grid-template-columns: repeat(1, 1fr) !important; + } +} +.nt-card { + box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14), 0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12); +} +.nt-card:hover { + box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.24), 0 3px 1px -2px rgba(0, 0, 0, 0.3), 0 1px 5px 0 rgba(0, 0, 0, 0.22); +} + +[data-md-color-scheme=slate] .nt-card { + box-shadow: 0 2px 2px 0 rgba(4, 40, 33, 0.14), 0 3px 1px -2px rgba(40, 86, 94, 0.47), 0 1px 5px 0 rgba(139, 252, 255, 0.64); +} +[data-md-color-scheme=slate] .nt-card:hover { + box-shadow: 0 2px 2px 0 rgba(0, 255, 206, 0.14), 0 3px 1px -2px rgba(33, 156, 177, 0.47), 0 1px 5px 0 rgba(96, 251, 255, 0.64); +} + +.nt-card > a { + color: var(--md-default-fg-color); +} + +.nt-card > a > div { + cursor: pointer; +} + +.nt-card { + padding: 5px; + margin-bottom: 0.5rem; +} + +.nt-card-title { + font-size: 1rem; + font-weight: bold; + margin: 4px 0 8px 0; + line-height: 22px; +} + +.nt-card-content { + padding: 0.4rem 0.8rem 0.8rem 0.8rem; +} + +.nt-card-text { + font-size: 14px; + padding: 0; + margin: 0; +} + +.nt-card .nt-card-image { + text-align: center; + border-radius: 2px; + background-position: center center; + background-size: cover; + background-repeat: no-repeat; + min-height: 120px; +} + +.nt-card .nt-card-image.tags img { + margin-top: 12px; +} + +.nt-card .nt-card-image img { + height: 105px; + margin-top: 5px; +} + +.nt-card .nt-card-icon { + text-align: center; + padding-top: 12px; + min-height: 120px; +} + +.nt-card .nt-card-icon .icon { + font-size: 95px; + line-height: 1; +} + +.nt-card a:hover, +.nt-card a:focus { + color: var(--md-accent-fg-color); +} + +.nt-card h2 { + margin: 0; +} + +/** + * Extra CSS file recommended for MkDocs and neoteroi.spantable extension. + * + * https://github.com/Neoteroi/mkdocs-plugins +**/ +.span-table-wrapper table { + border-collapse: collapse; + margin-bottom: 2rem; + border-radius: 0.1rem; +} + +.span-table td, +.span-table th { + padding: 0.2rem; + background-color: var(--md-default-bg-color); + font-size: 0.64rem; + max-width: 100%; + overflow: auto; + touch-action: auto; + border-top: 0.05rem solid var(--md-typeset-table-color); + padding: 0.9375em 1.25em; + vertical-align: top; +} + +.span-table tr:first-child td { + font-weight: 700; + min-width: 5rem; + padding: 0.9375em 1.25em; + vertical-align: top; +} + +.span-table td:first-child { + border-left: 0.05rem solid var(--md-typeset-table-color); +} + +.span-table td:last-child { + border-right: 0.05rem solid var(--md-typeset-table-color); +} + +.span-table tr:last-child { + border-bottom: 0.05rem solid var(--md-typeset-table-color); +} + +.span-table [colspan], +.span-table [rowspan] { + font-weight: bold; + border: 0.05rem solid var(--md-typeset-table-color); +} + +.span-table tr:not(:first-child):hover td:not([colspan]):not([rowspan]), +.span-table td[colspan]:hover, +.span-table td[rowspan]:hover { + background-color: rgba(0, 0, 0, 0.035); + box-shadow: 0 0.05rem 0 var(--md-default-bg-color) inset; + transition: background-color 125ms; +} + +.nt-contribs { + margin-top: 2rem; + font-size: small; + border-top: 1px dotted lightgray; + padding-top: 0.5rem; +} +.nt-contribs .nt-contributors { + padding-top: 0.5rem; + display: flex; + flex-wrap: wrap; +} +.nt-contribs .nt-contributor { + background: lightgrey; + background-size: cover; + width: 40px; + height: 40px; + border-radius: 100%; + margin: 0 6px 6px 0; + cursor: help; + opacity: 0.7; +} +.nt-contribs .nt-contributor:hover { + opacity: 1; +} +.nt-contribs .nt-contributors-title { + font-style: italic; + margin-bottom: 0; +} +.nt-contribs .nt-initials { + text-transform: uppercase; + font-size: 20px; + text-align: center; + width: 40px; + height: 40px; + display: inline-block; + vertical-align: middle; + position: relative; + top: 4px; + color: inherit; + font-weight: bold; +} +.nt-contribs .nt-group-0 { + background-color: var(--nt-color-0); +} +.nt-contribs .nt-group-1 { + background-color: var(--nt-color-1); +} +.nt-contribs .nt-group-2 { + background-color: var(--nt-color-2); +} +.nt-contribs .nt-group-3 { + background-color: var(--nt-color-3); +} +.nt-contribs .nt-group-4 { + background-color: var(--nt-color-4); +} +.nt-contribs .nt-group-5 { + background-color: var(--nt-color-5); +} +.nt-contribs .nt-group-6 { + background-color: var(--nt-color-6); +} +.nt-contribs .nt-group-7 { + color: #000; + background-color: var(--nt-color-7); +} +.nt-contribs .nt-group-8 { + color: #000; + background-color: var(--nt-color-8); +} +.nt-contribs .nt-group-9 { + background-color: var(--nt-color-9); +} +.nt-contribs .nt-group-10 { + background-color: var(--nt-color-10); +} +.nt-contribs .nt-group-11 { + background-color: var(--nt-color-11); +} +.nt-contribs .nt-group-12 { + background-color: var(--nt-color-12); +} +.nt-contribs .nt-group-13 { + background-color: var(--nt-color-13); +} +.nt-contribs .nt-group-14 { + background-color: var(--nt-color-14); +} +.nt-contribs .nt-group-15 { + color: #000; + background-color: var(--nt-color-15); +} +.nt-contribs .nt-group-16 { + background-color: var(--nt-color-16); +} +.nt-contribs .nt-group-17 { + color: #000; + background-color: var(--nt-color-17); +} +.nt-contribs .nt-group-18 { + background-color: var(--nt-color-18); +} +.nt-contribs .nt-group-19 { + background-color: var(--nt-color-19); +} +.nt-contribs .nt-group-20 { + color: #000; + background-color: var(--nt-color-20); +} +.nt-contribs .nt-group-21 { + color: #000; + background-color: var(--nt-color-21); +} +.nt-contribs .nt-group-22 { + color: #000; + background-color: var(--nt-color-22); +} +.nt-contribs .nt-group-23 { + color: #000; + background-color: var(--nt-color-23); +} +.nt-contribs .nt-group-24 { + color: #000; + background-color: var(--nt-color-24); +} +.nt-contribs .nt-group-25 { + color: #000; + background-color: var(--nt-color-25); +} +.nt-contribs .nt-group-26 { + color: #000; + background-color: var(--nt-color-26); +} +.nt-contribs .nt-group-27 { + background-color: var(--nt-color-27); +} +.nt-contribs .nt-group-28 { + color: #000; + background-color: var(--nt-color-28); +} +.nt-contribs .nt-group-29 { + color: #000; + background-color: var(--nt-color-29); +} +.nt-contribs .nt-group-30 { + background-color: var(--nt-color-30); +} +.nt-contribs .nt-group-31 { + background-color: var(--nt-color-31); +} +.nt-contribs .nt-group-32 { + color: #000; + background-color: var(--nt-color-32); +} +.nt-contribs .nt-group-33 { + background-color: var(--nt-color-33); +} +.nt-contribs .nt-group-34 { + background-color: var(--nt-color-34); +} +.nt-contribs .nt-group-35 { + background-color: var(--nt-color-35); +} +.nt-contribs .nt-group-36 { + background-color: var(--nt-color-36); +} +.nt-contribs .nt-group-37 { + background-color: var(--nt-color-37); +} +.nt-contribs .nt-group-38 { + background-color: var(--nt-color-38); +} +.nt-contribs .nt-group-39 { + color: #000; + background-color: var(--nt-color-39); +} +.nt-contribs .nt-group-40 { + color: #000; + background-color: var(--nt-color-40); +} +.nt-contribs .nt-group-41 { + color: #000; + background-color: var(--nt-color-41); +} +.nt-contribs .nt-group-42 { + color: #000; + background-color: var(--nt-color-42); +} +.nt-contribs .nt-group-43 { + color: #000; + background-color: var(--nt-color-43); +} +.nt-contribs .nt-group-44 { + color: #000; + background-color: var(--nt-color-44); +} +.nt-contribs .nt-group-45 { + background-color: var(--nt-color-45); +} +.nt-contribs .nt-group-46 { + color: #000; + background-color: var(--nt-color-46); +} +.nt-contribs .nt-group-47 { + background-color: var(--nt-color-47); +} +.nt-contribs .nt-group-48 { + background-color: var(--nt-color-48); +} +.nt-contribs .nt-group-49 { + background-color: var(--nt-color-49); +} + +/** + * CSS for OpenAPI HTML generated with PyMdown Extensions option. + * + * This CSS file works when using the OAD plugin with pymdownx. + * See here how to use it: + * https://www.neoteroi.dev/mkdocs-plugins/web/oad/ + * + * https://github.com/Neoteroi/mkdocs-plugins +**/ +:root { + --http-get-color: green; + --http-delete-color: #dc0101; + --http-head-color: slateblue; + --http-options-color: steelblue; + --http-patch-color: darkorange; + --http-post-color: darkblue; + --http-put-color: darkmagenta; + --http-trace-color: darkcyan; + --http-route-param-color: rgb(51, 128, 210); + --oad-operation-separator-border-color: gray; + --oad-block-border-color: #00bfa5; + --oad-small-note-color: #666; + --oad-indent-border-color: #c5c5c5; +} + +@media screen { + /* Slate theme, i.e. dark mode */ + [data-md-color-scheme=slate] { + --http-get-color: #2ea82e; + --http-post-color: #0093c0; + --http-put-color: #c333c3; + --oad-small-note-color: #afafaf; + } +} +.api-tag { + font-weight: bold; +} + +span[class^=http-] { + font-weight: bold; + color: #fff; + padding: 4px 1rem; + border-radius: 2px; + margin-right: 0.5rem; +} + +.http-get { + background-color: var(--http-get-color); +} + +.http-delete { + background-color: var(--http-delete-color); +} + +.http-post { + background-color: var(--http-post-color); +} + +.http-patch { + background-color: var(--http-patch-color); +} + +.http-trace { + background-color: var(--http-trace-color); +} + +.http-put { + background-color: var(--http-put-color); +} + +.http-head { + background-color: var(--http-head-color); +} + +.http-options { + background-color: var(--http-options-color); +} + +.route-param { + color: var(--http-route-param-color); +} + +.operation-separator + h3[id^=get] .route-param { + color: var(--http-get-color); +} + +.operation-separator + h3[id^=delete] .route-param { + color: var(--http-delete-color); +} + +.operation-separator + h3[id^=post] .route-param { + color: var(--http-post-color); +} + +.operation-separator + h3[id^=patch] .route-param { + color: var(--http-patch-color); +} + +.operation-separator + h3[id^=trace] .route-param { + color: var(--http-trace-color); +} + +.operation-separator + h3[id^=put] .route-param { + color: var(--http-put-color); +} + +.operation-separator + h3[id^=head] .route-param { + color: var(--http-head-color); +} + +.operation-separator + h3[id^=options] .route-param { + color: var(--http-options-color); +} + +.api-version { + font-size: 1.2rem; +} + +.operation-separator { + margin: 0 !important; + border-bottom: 2px dotted var(--oad-operation-separator-border-color) !important; + padding-top: 0.5rem; +} + +.operation-separator + h3 { + margin-top: 1rem; +} + +.string-type { + color: var(--md-code-hl-string-color); +} + +.integer-type, .number-type { + color: var(--md-code-hl-number-color); +} + +.boolean-type { + color: var(--md-code-hl-keyword-color); +} + +.format { + color: var(--md-code-hl-name-color); +} + +.null-type { + color: var(--md-code-hl-keyword-color); +} + +a.ref-link { + color: var(--md-code-hl-special-color); +} + +.request-block + div { + padding-left: 1rem; + border-left: 2px dashed var(--oad-block-border-color); +} + +.small-note { + font-size: 14px; + color: var(--oad-small-note-color); +} + +.request-body-title { + margin-bottom: 4px; +} + +.request-body-title + .tabbed-set, +.response-title + .tabbed-set, +.message-separator + .tabbed-set, +.common-response, +.response-section { + margin-top: 2px; + padding-left: 1rem; + border-left: 2px dotted var(--oad-indent-border-color); +} + +.info-data { + font-size: 0.6rem; +} + +.message-separator { + visibility: hidden; +} + +.sub-section-title { + font-style: italic; + font-size: 14px; +} diff --git a/apidocs/docs.go b/docs/docs.go similarity index 53% rename from apidocs/docs.go rename to docs/docs.go index 4b521b1..f46ac36 100644 --- a/apidocs/docs.go +++ b/docs/docs.go @@ -1,5 +1,5 @@ -// Package apidocs Code generated by swaggo/swag. DO NOT EDIT -package apidocs +// Package docs Code generated by swaggo/swag. DO NOT EDIT +package docs import "github.com/swaggo/swag" @@ -19,7 +19,7 @@ const docTemplate = `{ "host": "{{.Host}}", "basePath": "{{.BasePath}}", "paths": { - "/backends/llama-cpp/devices": { + "/api/v1/backends/llama-cpp/devices": { "get": { "security": [ { @@ -28,7 +28,7 @@ const docTemplate = `{ ], "description": "Returns a list of available devices for the llama server", "tags": [ - "backends" + "Backends" ], "summary": "List available devices for llama server", "responses": { @@ -47,7 +47,7 @@ const docTemplate = `{ } } }, - "/backends/llama-cpp/help": { + "/api/v1/backends/llama-cpp/help": { "get": { "security": [ { @@ -56,7 +56,7 @@ const docTemplate = `{ ], "description": "Returns the help text for the llama server command", "tags": [ - "backends" + "Backends" ], "summary": "Get help for llama server", "responses": { @@ -75,7 +75,7 @@ const docTemplate = `{ } } }, - "/backends/llama-cpp/parse-command": { + "/api/v1/backends/llama-cpp/parse-command": { "post": { "security": [ { @@ -90,7 +90,7 @@ const docTemplate = `{ "application/json" ], "tags": [ - "backends" + "Backends" ], "summary": "Parse llama-server command", "parameters": [ @@ -108,7 +108,7 @@ const docTemplate = `{ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -132,7 +132,7 @@ const docTemplate = `{ } } }, - "/backends/llama-cpp/version": { + "/api/v1/backends/llama-cpp/version": { "get": { "security": [ { @@ -141,7 +141,7 @@ const docTemplate = `{ ], "description": "Returns the version of the llama server command", "tags": [ - "backends" + "Backends" ], "summary": "Get version of llama server", "responses": { @@ -160,7 +160,7 @@ const docTemplate = `{ } } }, - "/backends/mlx/parse-command": { + "/api/v1/backends/mlx/parse-command": { "post": { "security": [ { @@ -175,7 +175,7 @@ const docTemplate = `{ "application/json" ], "tags": [ - "backends" + "Backends" ], "summary": "Parse mlx_lm.server command", "parameters": [ @@ -193,7 +193,7 @@ const docTemplate = `{ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -208,7 +208,7 @@ const docTemplate = `{ } } }, - "/backends/vllm/parse-command": { + "/api/v1/backends/vllm/parse-command": { "post": { "security": [ { @@ -223,7 +223,7 @@ const docTemplate = `{ "application/json" ], "tags": [ - "backends" + "Backends" ], "summary": "Parse vllm serve command", "parameters": [ @@ -241,7 +241,7 @@ const docTemplate = `{ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -256,7 +256,7 @@ const docTemplate = `{ } } }, - "/instances": { + "/api/v1/instances": { "get": { "security": [ { @@ -265,7 +265,7 @@ const docTemplate = `{ ], "description": "Returns a list of all instances managed by the server", "tags": [ - "instances" + "Instances" ], "summary": "List all instances", "responses": { @@ -274,7 +274,7 @@ const docTemplate = `{ "schema": { "type": "array", "items": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } } }, @@ -287,7 +287,7 @@ const docTemplate = `{ } } }, - "/instances/{name}": { + "/api/v1/instances/{name}": { "get": { "security": [ { @@ -296,7 +296,7 @@ const docTemplate = `{ ], "description": "Returns the details of a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Get details of a specific instance", "parameters": [ @@ -312,7 +312,7 @@ const docTemplate = `{ "200": { "description": "Instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -340,7 +340,7 @@ const docTemplate = `{ "application/json" ], "tags": [ - "instances" + "Instances" ], "summary": "Update an instance's configuration", "parameters": [ @@ -357,7 +357,7 @@ const docTemplate = `{ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -365,7 +365,7 @@ const docTemplate = `{ "200": { "description": "Updated instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -393,7 +393,7 @@ const docTemplate = `{ "application/json" ], "tags": [ - "instances" + "Instances" ], "summary": "Create and start a new instance", "parameters": [ @@ -410,7 +410,7 @@ const docTemplate = `{ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -418,7 +418,7 @@ const docTemplate = `{ "201": { "description": "Created instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -443,7 +443,7 @@ const docTemplate = `{ ], "description": "Stops and removes a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Delete an instance", "parameters": [ @@ -474,7 +474,7 @@ const docTemplate = `{ } } }, - "/instances/{name}/logs": { + "/api/v1/instances/{name}/logs": { "get": { "security": [ { @@ -483,7 +483,7 @@ const docTemplate = `{ ], "description": "Returns the logs from a specific instance by name with optional line limit", "tags": [ - "instances" + "Instances" ], "summary": "Get logs from a specific instance", "parameters": [ @@ -523,7 +523,7 @@ const docTemplate = `{ } } }, - "/instances/{name}/proxy": { + "/api/v1/instances/{name}/proxy": { "get": { "security": [ { @@ -532,9 +532,9 @@ const docTemplate = `{ ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ - "instances" + "Instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -576,9 +576,9 @@ const docTemplate = `{ ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ - "instances" + "Instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -613,7 +613,7 @@ const docTemplate = `{ } } }, - "/instances/{name}/restart": { + "/api/v1/instances/{name}/restart": { "post": { "security": [ { @@ -622,7 +622,7 @@ const docTemplate = `{ ], "description": "Restarts a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Restart a running instance", "parameters": [ @@ -638,7 +638,7 @@ const docTemplate = `{ "200": { "description": "Restarted instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -656,7 +656,7 @@ const docTemplate = `{ } } }, - "/instances/{name}/start": { + "/api/v1/instances/{name}/start": { "post": { "security": [ { @@ -665,7 +665,7 @@ const docTemplate = `{ ], "description": "Starts a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Start a stopped instance", "parameters": [ @@ -681,7 +681,7 @@ const docTemplate = `{ "200": { "description": "Started instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -699,7 +699,7 @@ const docTemplate = `{ } } }, - "/instances/{name}/stop": { + "/api/v1/instances/{name}/stop": { "post": { "security": [ { @@ -708,7 +708,7 @@ const docTemplate = `{ ], "description": "Stops a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Stop a running instance", "parameters": [ @@ -724,7 +724,7 @@ const docTemplate = `{ "200": { "description": "Stopped instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -742,6 +742,675 @@ const docTemplate = `{ } } }, + "/api/v1/nodes": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)", + "tags": [ + "Nodes" + ], + "summary": "List all configured nodes", + "responses": { + "200": { + "description": "Map of nodes", + "schema": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/server.NodeResponse" + } + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/api/v1/nodes/{name}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the details of a specific node by name", + "tags": [ + "Nodes" + ], + "summary": "Get details of a specific node", + "parameters": [ + { + "type": "string", + "description": "Node Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Node details", + "schema": { + "$ref": "#/definitions/server.NodeResponse" + } + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Node not found", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/api/v1/version": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the version of the llamactl command", + "tags": [ + "System" + ], + "summary": "Get llamactl version", + "responses": { + "200": { + "description": "Version information", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the llama.cpp UI for the specified instance", + "produces": [ + "text/html" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp UI for the instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied HTML response", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/apply-template": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/completion": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/detokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/embeddings": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/infill": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/metrics": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/props": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/reranking": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/slots": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/tokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/": { "post": { "security": [ @@ -754,7 +1423,7 @@ const docTemplate = `{ "application/json" ], "tags": [ - "openai" + "OpenAI" ], "summary": "OpenAI-compatible proxy endpoint", "responses": { @@ -785,7 +1454,7 @@ const docTemplate = `{ ], "description": "Returns a list of instances in a format compatible with OpenAI API", "tags": [ - "openai" + "OpenAI" ], "summary": "List instances in OpenAI-compatible format", "responses": { @@ -803,63 +1472,34 @@ const docTemplate = `{ } } } - }, - "/version": { - "get": { - "security": [ - { - "ApiKeyAuth": [] - } - ], - "description": "Returns the version of the llamactl command", - "tags": [ - "version" - ], - "summary": "Get llamactl version", - "responses": { - "200": { - "description": "Version information", - "schema": { - "type": "string" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "string" - } - } - } - } } }, "definitions": { - "backends.BackendType": { - "type": "string", - "enum": [ - "llama_cpp", - "mlx_lm", - "vllm" - ], - "x-enum-varnames": [ - "BackendTypeLlamaCpp", - "BackendTypeMlxLm", - "BackendTypeVllm" - ] + "instance.Instance": { + "type": "object", + "properties": { + "created": { + "description": "Unix timestamp when the instance was created", + "type": "integer" + }, + "name": { + "type": "string" + } + } }, - "instance.CreateInstanceOptions": { + "instance.Options": { "type": "object", "properties": { "auto_restart": { "description": "Auto restart", "type": "boolean" }, - "backend_options": { + "environment": { + "description": "Environment variables", "type": "object", - "additionalProperties": {} - }, - "backend_type": { - "$ref": "#/definitions/backends.BackendType" + "additionalProperties": { + "type": "string" + } }, "idle_timeout": { "description": "Idle timeout", @@ -878,36 +1518,11 @@ const docTemplate = `{ } } }, - "instance.InstanceStatus": { - "type": "integer", - "enum": [ - 0, - 1, - 2 - ], - "x-enum-varnames": [ - "Stopped", - "Running", - "Failed" - ] - }, - "instance.Process": { + "server.NodeResponse": { "type": "object", "properties": { - "created": { - "description": "Creation time", - "type": "integer" - }, - "name": { + "address": { "type": "string" - }, - "status": { - "description": "Status", - "allOf": [ - { - "$ref": "#/definitions/instance.InstanceStatus" - } - ] } } }, @@ -950,6 +1565,13 @@ const docTemplate = `{ } } } + }, + "securityDefinitions": { + "ApiKeyAuth": { + "type": "apiKey", + "name": "X-API-Key", + "in": "header" + } } }` diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md deleted file mode 100644 index b6846e3..0000000 --- a/docs/getting-started/quick-start.md +++ /dev/null @@ -1,190 +0,0 @@ -# Quick Start - -This guide will help you get Llamactl up and running in just a few minutes. - -## Step 1: Start Llamactl - -Start the Llamactl server: - -```bash -llamactl -``` - -By default, Llamactl will start on `http://localhost:8080`. - -## Step 2: Access the Web UI - -Open your web browser and navigate to: - -``` -http://localhost:8080 -``` - -Login with the management API key. By default it is generated during server startup. Copy it from the terminal output. - -You should see the Llamactl web interface. - -## Step 3: Create Your First Instance - -1. Click the "Add Instance" button -2. Fill in the instance configuration: - - **Name**: Give your instance a descriptive name - - **Backend Type**: Choose from llama.cpp, MLX, or vLLM - - **Model**: Model path or identifier for your chosen backend - - **Additional Options**: Backend-specific parameters - -3. Click "Create Instance" - -## Step 4: Start Your Instance - -Once created, you can: - -- **Start** the instance by clicking the start button -- **Monitor** its status in real-time -- **View logs** by clicking the logs button -- **Stop** the instance when needed - -## Example Configurations - -Here are basic example configurations for each backend: - -**llama.cpp backend:** -```json -{ - "name": "llama2-7b", - "backend_type": "llama_cpp", - "backend_options": { - "model": "/path/to/llama-2-7b-chat.gguf", - "threads": 4, - "ctx_size": 2048, - "gpu_layers": 32 - } -} -``` - -**MLX backend (macOS only):** -```json -{ - "name": "mistral-mlx", - "backend_type": "mlx_lm", - "backend_options": { - "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", - "temp": 0.7, - "max_tokens": 2048 - } -} -``` - -**vLLM backend:** -```json -{ - "name": "dialogpt-vllm", - "backend_type": "vllm", - "backend_options": { - "model": "microsoft/DialoGPT-medium", - "tensor_parallel_size": 2, - "gpu_memory_utilization": 0.9 - } -} -``` - -## Docker Support - -Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below: - -```yaml -backends: - vllm: - command: "vllm" - args: ["serve"] - docker: - enabled: true - image: "vllm/vllm-openai:latest" - args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] -``` - -## Using the API - -You can also manage instances via the REST API: - -```bash -# List all instances -curl http://localhost:8080/api/instances - -# Create a new llama.cpp instance -curl -X POST http://localhost:8080/api/instances/my-model \ - -H "Content-Type: application/json" \ - -d '{ - "backend_type": "llama_cpp", - "backend_options": { - "model": "/path/to/model.gguf" - } - }' - -# Start an instance -curl -X POST http://localhost:8080/api/instances/my-model/start -``` - -## OpenAI Compatible API - -Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools. - -### Chat Completions - -Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint: - -```bash -curl -X POST http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "my-model", - "messages": [ - { - "role": "user", - "content": "Hello! Can you help me write a Python function?" - } - ], - "max_tokens": 150, - "temperature": 0.7 - }' -``` - -### Using with Python OpenAI Client - -You can also use the official OpenAI Python client: - -```python -from openai import OpenAI - -# Point the client to your Llamactl server -client = OpenAI( - base_url="http://localhost:8080/v1", - api_key="not-needed" # Llamactl doesn't require API keys by default -) - -# Create a chat completion -response = client.chat.completions.create( - model="my-model", # Use the name of your instance - messages=[ - {"role": "user", "content": "Explain quantum computing in simple terms"} - ], - max_tokens=200, - temperature=0.7 -) - -print(response.choices[0].message.content) -``` - -### List Available Models - -Get a list of running instances (models) in OpenAI-compatible format: - -```bash -curl http://localhost:8080/v1/models -``` - -## Next Steps - -- Manage instances [Managing Instances](../user-guide/managing-instances.md) -- Explore the [API Reference](../user-guide/api-reference.md) -- Configure advanced settings in the [Configuration](configuration.md) guide diff --git a/docs/images/create_instance.png b/docs/images/create_instance.png index c1ce856..9972f40 100644 Binary files a/docs/images/create_instance.png and b/docs/images/create_instance.png differ diff --git a/docs/images/dashboard.png b/docs/images/dashboard.png index 393f374..55fe728 100644 Binary files a/docs/images/dashboard.png and b/docs/images/dashboard.png differ diff --git a/docs/index.md b/docs/index.md index 501d426..e81fed3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,20 +14,20 @@ Welcome to the Llamactl documentation! ## Quick Links -- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running -- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options -- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl -- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management -- [API Reference](user-guide/api-reference.md) - Complete API documentation +- [Installation Guide](installation.md) - Get Llamactl up and running +- [Configuration Guide](configuration.md) - Detailed configuration options +- [Quick Start](quick-start.md) - Your first steps with Llamactl +- [Managing Instances](managing-instances.md) - Instance lifecycle management +- [API Reference](api-reference.md) - Complete API documentation ## Getting Help If you need help or have questions: -- Check the [Troubleshooting](user-guide/troubleshooting.md) guide +- Check the [Troubleshooting](troubleshooting.md) guide - Visit the [GitHub repository](https://github.com/lordmathis/llamactl) -- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings +- Review the [Configuration Guide](configuration.md) for advanced settings ## License diff --git a/docs/getting-started/installation.md b/docs/installation.md similarity index 93% rename from docs/getting-started/installation.md rename to docs/installation.md index 413e1fc..1e4f4ae 100644 --- a/docs/getting-started/installation.md +++ b/docs/installation.md @@ -42,15 +42,10 @@ Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc vLLM provides high-throughput distributed serving for LLMs. Install vLLM: ```bash -# Install via pip (requires Python 3.8+, GPU required) -pip install vllm - -# Or in a virtual environment (recommended) +# Install in a virtual environment python -m venv vllm-env source vllm-env/bin/activate pip install vllm - -# For production deployments, consider container-based installation ``` ## Installation Methods @@ -82,7 +77,7 @@ llamactl provides Dockerfiles for creating Docker images with backends pre-insta **Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html). -#### Using Docker Compose +**Using Docker Compose** ```bash # Clone the repository @@ -103,9 +98,9 @@ Access the dashboard at: - llamactl with llama.cpp: http://localhost:8080 - llamactl with vLLM: http://localhost:8081 -#### Using Docker Build and Run +**Using Docker Build and Run** -**llamactl with llama.cpp CUDA:** +1. llamactl with llama.cpp CUDA: ```bash docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda . docker run -d \ @@ -116,7 +111,7 @@ docker run -d \ llamactl:llamacpp-cuda ``` -**llamactl with vLLM CUDA:** +2. llamactl with vLLM CUDA: ```bash docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda . docker run -d \ @@ -127,7 +122,7 @@ docker run -d \ llamactl:vllm-cuda ``` -**llamactl built from source:** +3. llamactl built from source: ```bash docker build -f docker/Dockerfile.source -t llamactl:source . docker run -d \ diff --git a/docs/user-guide/managing-instances.md b/docs/managing-instances.md similarity index 65% rename from docs/user-guide/managing-instances.md rename to docs/managing-instances.md index b02de2d..d67c0c8 100644 --- a/docs/user-guide/managing-instances.md +++ b/docs/managing-instances.md @@ -9,13 +9,17 @@ Llamactl provides two ways to manage instances: - **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard - **REST API**: Programmatic access for automation and integration -![Dashboard Screenshot](../images/dashboard.png) +![Dashboard Screenshot](images/dashboard.png) ### Authentication -If authentication is enabled: +Llamactl uses a **Management API Key** to authenticate requests to the management API (creating, starting, stopping instances). All curl examples below use `` as a placeholder - replace this with your actual Management API Key. + +By default, authentication is required. If you don't configure a management API key in your configuration file, llamactl will auto-generate one and print it to the terminal on startup. See the [Configuration](configuration.md) guide for details. + +For Web UI access: 1. Navigate to the web UI -2. Enter your credentials +2. Enter your Management API Key 3. Bearer token is stored for the session ### Theme Support @@ -33,9 +37,9 @@ Each instance is displayed as a card showing: ## Create Instance -### Via Web UI +**Via Web UI** -![Create Instance Screenshot](../images/create_instance.png) +![Create Instance Screenshot](images/create_instance.png) 1. Click the **"Create Instance"** button on the dashboard 2. Enter a unique **Name** for your instance (only required field) @@ -59,14 +63,19 @@ Each instance is displayed as a card showing: - **llama.cpp**: Threads, context size, GPU layers, port, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc. - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc. + +!!! tip "Auto-Assignment" + Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values. + 8. Click **"Create"** to save the instance -### Via API +**Via API** ```bash # Create llama.cpp instance with local model file -curl -X POST http://localhost:8080/api/instances/my-llama-instance \ +curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "backend_type": "llama_cpp", "backend_options": { @@ -74,12 +83,14 @@ curl -X POST http://localhost:8080/api/instances/my-llama-instance \ "threads": 8, "ctx_size": 4096, "gpu_layers": 32 - } + }, + "nodes": ["main"] }' # Create MLX instance (macOS only) -curl -X POST http://localhost:8080/api/instances/my-mlx-instance \ +curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "backend_type": "mlx_lm", "backend_options": { @@ -89,12 +100,14 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \ "max_tokens": 2048 }, "auto_restart": true, - "max_restarts": 3 + "max_restarts": 3, + "nodes": ["main"] }' # Create vLLM instance -curl -X POST http://localhost:8080/api/instances/my-vllm-instance \ +curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "backend_type": "vllm", "backend_options": { @@ -108,24 +121,28 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \ "CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO", "PYTHONPATH": "/custom/path" - } + }, + "nodes": ["main"] }' # Create llama.cpp instance with HuggingFace model -curl -X POST http://localhost:8080/api/instances/gemma-3-27b \ +curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "backend_type": "llama_cpp", "backend_options": { "hf_repo": "unsloth/gemma-3-27b-it-GGUF", "hf_file": "gemma-3-27b-it-GGUF.gguf", "gpu_layers": 32 - } + }, + "nodes": ["main"] }' # Create instance on specific remote node -curl -X POST http://localhost:8080/api/instances/remote-llama \ +curl -X POST http://localhost:8080/api/v1/instances/remote-llama \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "backend_type": "llama_cpp", "backend_options": { @@ -134,46 +151,62 @@ curl -X POST http://localhost:8080/api/instances/remote-llama \ }, "nodes": ["worker1"] }' + +# Create instance on multiple nodes for high availability +curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "backend_type": "llama_cpp", + "backend_options": { + "model": "/models/llama-7b.gguf", + "gpu_layers": 32 + }, + "nodes": ["worker1", "worker2", "worker3"] + }' ``` ## Start Instance -### Via Web UI +**Via Web UI** 1. Click the **"Start"** button on an instance card 2. Watch the status change to "Unknown" 3. Monitor progress in the logs 4. Instance status changes to "Ready" when ready -### Via API +**Via API** ```bash -curl -X POST http://localhost:8080/api/instances/{name}/start +curl -X POST http://localhost:8080/api/v1/instances/{name}/start \ + -H "Authorization: Bearer " ``` ## Stop Instance -### Via Web UI +**Via Web UI** 1. Click the **"Stop"** button on an instance card 2. Instance gracefully shuts down -### Via API +**Via API** ```bash -curl -X POST http://localhost:8080/api/instances/{name}/stop +curl -X POST http://localhost:8080/api/v1/instances/{name}/stop \ + -H "Authorization: Bearer " ``` ## Edit Instance -### Via Web UI +**Via Web UI** 1. Click the **"Edit"** button on an instance card 2. Modify settings in the configuration dialog 3. Changes require instance restart to take effect 4. Click **"Update & Restart"** to apply changes -### Via API +**Via API** Modify instance settings: ```bash -curl -X PUT http://localhost:8080/api/instances/{name} \ +curl -X PUT http://localhost:8080/api/v1/instances/{name} \ -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ -d '{ "backend_options": { "threads": 8, @@ -188,29 +221,31 @@ curl -X PUT http://localhost:8080/api/instances/{name} \ ## View Logs -### Via Web UI +**Via Web UI** 1. Click the **"Logs"** button on any instance card 2. Real-time log viewer opens -### Via API +**Via API** Check instance status in real-time: ```bash -# Get instance details -curl http://localhost:8080/api/instances/{name}/logs +# Get instance logs +curl http://localhost:8080/api/v1/instances/{name}/logs \ + -H "Authorization: Bearer " ``` ## Delete Instance -### Via Web UI +**Via Web UI** 1. Click the **"Delete"** button on an instance card 2. Only stopped instances can be deleted 3. Confirm deletion in the dialog -### Via API +**Via API** ```bash -curl -X DELETE http://localhost:8080/api/instances/{name} +curl -X DELETE http://localhost:8080/api/v1/instances/{name} \ + -H "Authorization: Bearer " ``` ## Instance Proxy @@ -218,8 +253,9 @@ curl -X DELETE http://localhost:8080/api/instances/{name} Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM). ```bash -# Get instance details -curl http://localhost:8080/api/instances/{name}/proxy/ +# Proxy requests to the instance +curl http://localhost:8080/api/v1/instances/{name}/proxy/ \ + -H "Authorization: Bearer " ``` All backends provide OpenAI-compatible endpoints. Check the respective documentation: @@ -229,15 +265,16 @@ All backends provide OpenAI-compatible endpoints. Check the respective documenta ### Instance Health -#### Via Web UI +**Via Web UI** 1. The health status badge is displayed on each instance card -#### Via API +**Via API** Check the health status of your instances: ```bash -curl http://localhost:8080/api/instances/{name}/proxy/health +curl http://localhost:8080/api/v1/instances/{name}/proxy/health \ + -H "Authorization: Bearer " ``` diff --git a/docs/quick-start.md b/docs/quick-start.md new file mode 100644 index 0000000..f65f9b2 --- /dev/null +++ b/docs/quick-start.md @@ -0,0 +1,263 @@ +# Quick Start + +This guide will help you get Llamactl up and running in just a few minutes. + +**Before you begin:** Ensure you have at least one backend installed (llama.cpp, MLX, or vLLM). See the [Installation Guide](installation.md#prerequisites) for backend setup. + +## Core Concepts + +Before you start, let's clarify a few key terms: + +- **Instance**: A running backend server that serves a specific model. Each instance has a unique name and runs independently. +- **Backend**: The inference engine that actually runs the model (llama.cpp, MLX, or vLLM). You need at least one backend installed before creating instances. +- **Node**: In multi-machine setups, a node represents one machine. Most users will just use the default "main" node for single-machine deployments. +- **Proxy Architecture**: Llamactl acts as a proxy in front of your instances. You make requests to llamactl (e.g., `http://localhost:8080/v1/chat/completions`), and it routes them to the appropriate backend instance. This means you don't need to track individual instance ports or endpoints. + +## Authentication + +Llamactl uses two types of API keys: + +- **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances). +- **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.). + +By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide. + +## Start Llamactl + +Start the Llamactl server: + +```bash +llamactl +``` + +``` +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +⚠️ MANAGEMENT AUTHENTICATION REQUIRED +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +🔑 Generated Management API Key: + + sk-management-... + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +⚠️ INFERENCE AUTHENTICATION REQUIRED +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +🔑 Generated Inference API Key: + + sk-inference-... + +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +⚠️ IMPORTANT +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +• These keys are auto-generated and will change on restart +• For production, add explicit keys to your configuration +• Copy these keys before they disappear from the terminal +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Llamactl server listening on 0.0.0.0:8080 +``` + +Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests. + +By default, Llamactl will start on `http://localhost:8080`. + +## Access the Web UI + +Open your web browser and navigate to: + +``` +http://localhost:8080 +``` + +Login with the management API key from the terminal output. + +You should see the Llamactl web interface. + +## Create Your First Instance + +1. Click the "Add Instance" button +2. Fill in the instance configuration: + - **Name**: Give your instance a descriptive name + - **Node**: Select which node to deploy the instance to (defaults to "main" for single-node setups) + - **Backend Type**: Choose from llama.cpp, MLX, or vLLM + - **Model**: Model path or huggingface repo + - **Additional Options**: Backend-specific parameters + + !!! tip "Auto-Assignment" + Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values. + + !!! note "Remote Node Deployment" + If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes. + +3. Click "Create Instance" + +## Start Your Instance + +Once created, you can: + +- **Start** the instance by clicking the start button +- **Monitor** its status in real-time +- **View logs** by clicking the logs button +- **Stop** the instance when needed + +## Example Configurations + +Here are basic example configurations for each backend: + +**llama.cpp backend:** +```json +{ + "name": "llama2-7b", + "backend_type": "llama_cpp", + "backend_options": { + "model": "/path/to/llama-2-7b-chat.gguf", + "threads": 4, + "ctx_size": 2048, + "gpu_layers": 32 + }, + "nodes": ["main"] +} +``` + +**MLX backend (macOS only):** +```json +{ + "name": "mistral-mlx", + "backend_type": "mlx_lm", + "backend_options": { + "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", + "temp": 0.7, + "max_tokens": 2048 + }, + "nodes": ["main"] +} +``` + +**vLLM backend:** +```json +{ + "name": "dialogpt-vllm", + "backend_type": "vllm", + "backend_options": { + "model": "microsoft/DialoGPT-medium", + "tensor_parallel_size": 2, + "gpu_memory_utilization": 0.9 + }, + "nodes": ["main"] +} +``` + +**Remote node deployment example:** +```json +{ + "name": "distributed-model", + "backend_type": "llama_cpp", + "backend_options": { + "model": "/path/to/model.gguf", + "gpu_layers": 32 + }, + "nodes": ["worker1"] +} +``` + +## Docker Support + +Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below: + +```yaml +backends: + vllm: + command: "vllm" + args: ["serve"] + docker: + enabled: true + image: "vllm/vllm-openai:latest" + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] +``` + +## Using the API + +You can also manage instances via the REST API: + +```bash +# List all instances +curl http://localhost:8080/api/v1/instances + +# Create a new llama.cpp instance +curl -X POST http://localhost:8080/api/v1/instances/my-model \ + -H "Content-Type: application/json" \ + -d '{ + "backend_type": "llama_cpp", + "backend_options": { + "model": "/path/to/model.gguf" + } + }' + +# Start an instance +curl -X POST http://localhost:8080/api/v1/instances/my-model/start +``` + +## OpenAI Compatible API + +Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools. + +### Chat Completions + +Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint: + +```bash +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "my-model", + "messages": [ + { + "role": "user", + "content": "Hello! Can you help me write a Python function?" + } + ], + "max_tokens": 150, + "temperature": 0.7 + }' +``` + +### Using with Python OpenAI Client + +You can also use the official OpenAI Python client: + +```python +from openai import OpenAI + +# Point the client to your Llamactl server +client = OpenAI( + base_url="http://localhost:8080/v1", + api_key="your-inference-api-key" # Use the inference API key from terminal or config +) + +# Create a chat completion +response = client.chat.completions.create( + model="my-model", # Use the name of your instance + messages=[ + {"role": "user", "content": "Explain quantum computing in simple terms"} + ], + max_tokens=200, + temperature=0.7 +) + +print(response.choices[0].message.content) +``` + +!!! note "API Key" + If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup. + +### List Available Models + +Get a list of running instances (models) in OpenAI-compatible format: + +```bash +curl http://localhost:8080/v1/models +``` + +## Next Steps + +- Manage instances [Managing Instances](managing-instances.md) +- Explore the [API Reference](api-reference.md) +- Configure advanced settings in the [Configuration](configuration.md) guide diff --git a/apidocs/swagger.json b/docs/swagger.json similarity index 53% rename from apidocs/swagger.json rename to docs/swagger.json index 71471e6..26f9662 100644 --- a/apidocs/swagger.json +++ b/docs/swagger.json @@ -12,7 +12,7 @@ }, "basePath": "/api/v1", "paths": { - "/backends/llama-cpp/devices": { + "/api/v1/backends/llama-cpp/devices": { "get": { "security": [ { @@ -21,7 +21,7 @@ ], "description": "Returns a list of available devices for the llama server", "tags": [ - "backends" + "Backends" ], "summary": "List available devices for llama server", "responses": { @@ -40,7 +40,7 @@ } } }, - "/backends/llama-cpp/help": { + "/api/v1/backends/llama-cpp/help": { "get": { "security": [ { @@ -49,7 +49,7 @@ ], "description": "Returns the help text for the llama server command", "tags": [ - "backends" + "Backends" ], "summary": "Get help for llama server", "responses": { @@ -68,7 +68,7 @@ } } }, - "/backends/llama-cpp/parse-command": { + "/api/v1/backends/llama-cpp/parse-command": { "post": { "security": [ { @@ -83,7 +83,7 @@ "application/json" ], "tags": [ - "backends" + "Backends" ], "summary": "Parse llama-server command", "parameters": [ @@ -101,7 +101,7 @@ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -125,7 +125,7 @@ } } }, - "/backends/llama-cpp/version": { + "/api/v1/backends/llama-cpp/version": { "get": { "security": [ { @@ -134,7 +134,7 @@ ], "description": "Returns the version of the llama server command", "tags": [ - "backends" + "Backends" ], "summary": "Get version of llama server", "responses": { @@ -153,7 +153,7 @@ } } }, - "/backends/mlx/parse-command": { + "/api/v1/backends/mlx/parse-command": { "post": { "security": [ { @@ -168,7 +168,7 @@ "application/json" ], "tags": [ - "backends" + "Backends" ], "summary": "Parse mlx_lm.server command", "parameters": [ @@ -186,7 +186,7 @@ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -201,7 +201,7 @@ } } }, - "/backends/vllm/parse-command": { + "/api/v1/backends/vllm/parse-command": { "post": { "security": [ { @@ -216,7 +216,7 @@ "application/json" ], "tags": [ - "backends" + "Backends" ], "summary": "Parse vllm serve command", "parameters": [ @@ -234,7 +234,7 @@ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -249,7 +249,7 @@ } } }, - "/instances": { + "/api/v1/instances": { "get": { "security": [ { @@ -258,7 +258,7 @@ ], "description": "Returns a list of all instances managed by the server", "tags": [ - "instances" + "Instances" ], "summary": "List all instances", "responses": { @@ -267,7 +267,7 @@ "schema": { "type": "array", "items": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } } }, @@ -280,7 +280,7 @@ } } }, - "/instances/{name}": { + "/api/v1/instances/{name}": { "get": { "security": [ { @@ -289,7 +289,7 @@ ], "description": "Returns the details of a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Get details of a specific instance", "parameters": [ @@ -305,7 +305,7 @@ "200": { "description": "Instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -333,7 +333,7 @@ "application/json" ], "tags": [ - "instances" + "Instances" ], "summary": "Update an instance's configuration", "parameters": [ @@ -350,7 +350,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -358,7 +358,7 @@ "200": { "description": "Updated instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -386,7 +386,7 @@ "application/json" ], "tags": [ - "instances" + "Instances" ], "summary": "Create and start a new instance", "parameters": [ @@ -403,7 +403,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -411,7 +411,7 @@ "201": { "description": "Created instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -436,7 +436,7 @@ ], "description": "Stops and removes a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Delete an instance", "parameters": [ @@ -467,7 +467,7 @@ } } }, - "/instances/{name}/logs": { + "/api/v1/instances/{name}/logs": { "get": { "security": [ { @@ -476,7 +476,7 @@ ], "description": "Returns the logs from a specific instance by name with optional line limit", "tags": [ - "instances" + "Instances" ], "summary": "Get logs from a specific instance", "parameters": [ @@ -516,7 +516,7 @@ } } }, - "/instances/{name}/proxy": { + "/api/v1/instances/{name}/proxy": { "get": { "security": [ { @@ -525,9 +525,9 @@ ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ - "instances" + "Instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -569,9 +569,9 @@ ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ - "instances" + "Instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -606,7 +606,7 @@ } } }, - "/instances/{name}/restart": { + "/api/v1/instances/{name}/restart": { "post": { "security": [ { @@ -615,7 +615,7 @@ ], "description": "Restarts a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Restart a running instance", "parameters": [ @@ -631,7 +631,7 @@ "200": { "description": "Restarted instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -649,7 +649,7 @@ } } }, - "/instances/{name}/start": { + "/api/v1/instances/{name}/start": { "post": { "security": [ { @@ -658,7 +658,7 @@ ], "description": "Starts a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Start a stopped instance", "parameters": [ @@ -674,7 +674,7 @@ "200": { "description": "Started instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -692,7 +692,7 @@ } } }, - "/instances/{name}/stop": { + "/api/v1/instances/{name}/stop": { "post": { "security": [ { @@ -701,7 +701,7 @@ ], "description": "Stops a specific instance by name", "tags": [ - "instances" + "Instances" ], "summary": "Stop a running instance", "parameters": [ @@ -717,7 +717,7 @@ "200": { "description": "Stopped instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -735,6 +735,675 @@ } } }, + "/api/v1/nodes": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)", + "tags": [ + "Nodes" + ], + "summary": "List all configured nodes", + "responses": { + "200": { + "description": "Map of nodes", + "schema": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/server.NodeResponse" + } + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/api/v1/nodes/{name}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the details of a specific node by name", + "tags": [ + "Nodes" + ], + "summary": "Get details of a specific node", + "parameters": [ + { + "type": "string", + "description": "Node Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Node details", + "schema": { + "$ref": "#/definitions/server.NodeResponse" + } + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Node not found", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/api/v1/version": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the version of the llamactl command", + "tags": [ + "System" + ], + "summary": "Get llamactl version", + "responses": { + "200": { + "description": "Version information", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the llama.cpp UI for the specified instance", + "produces": [ + "text/html" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp UI for the instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied HTML response", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/apply-template": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/completion": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/detokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/embeddings": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/infill": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/metrics": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/props": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/reranking": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/slots": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/tokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "Llama.cpp" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/": { "post": { "security": [ @@ -747,7 +1416,7 @@ "application/json" ], "tags": [ - "openai" + "OpenAI" ], "summary": "OpenAI-compatible proxy endpoint", "responses": { @@ -778,7 +1447,7 @@ ], "description": "Returns a list of instances in a format compatible with OpenAI API", "tags": [ - "openai" + "OpenAI" ], "summary": "List instances in OpenAI-compatible format", "responses": { @@ -796,63 +1465,34 @@ } } } - }, - "/version": { - "get": { - "security": [ - { - "ApiKeyAuth": [] - } - ], - "description": "Returns the version of the llamactl command", - "tags": [ - "version" - ], - "summary": "Get llamactl version", - "responses": { - "200": { - "description": "Version information", - "schema": { - "type": "string" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "string" - } - } - } - } } }, "definitions": { - "backends.BackendType": { - "type": "string", - "enum": [ - "llama_cpp", - "mlx_lm", - "vllm" - ], - "x-enum-varnames": [ - "BackendTypeLlamaCpp", - "BackendTypeMlxLm", - "BackendTypeVllm" - ] + "instance.Instance": { + "type": "object", + "properties": { + "created": { + "description": "Unix timestamp when the instance was created", + "type": "integer" + }, + "name": { + "type": "string" + } + } }, - "instance.CreateInstanceOptions": { + "instance.Options": { "type": "object", "properties": { "auto_restart": { "description": "Auto restart", "type": "boolean" }, - "backend_options": { + "environment": { + "description": "Environment variables", "type": "object", - "additionalProperties": {} - }, - "backend_type": { - "$ref": "#/definitions/backends.BackendType" + "additionalProperties": { + "type": "string" + } }, "idle_timeout": { "description": "Idle timeout", @@ -871,36 +1511,11 @@ } } }, - "instance.InstanceStatus": { - "type": "integer", - "enum": [ - 0, - 1, - 2 - ], - "x-enum-varnames": [ - "Stopped", - "Running", - "Failed" - ] - }, - "instance.Process": { + "server.NodeResponse": { "type": "object", "properties": { - "created": { - "description": "Creation time", - "type": "integer" - }, - "name": { + "address": { "type": "string" - }, - "status": { - "description": "Status", - "allOf": [ - { - "$ref": "#/definitions/instance.InstanceStatus" - } - ] } } }, @@ -943,5 +1558,12 @@ } } } + }, + "securityDefinitions": { + "ApiKeyAuth": { + "type": "apiKey", + "name": "X-API-Key", + "in": "header" + } } } \ No newline at end of file diff --git a/apidocs/swagger.yaml b/docs/swagger.yaml similarity index 50% rename from apidocs/swagger.yaml rename to docs/swagger.yaml index a5db184..7506036 100644 --- a/apidocs/swagger.yaml +++ b/docs/swagger.yaml @@ -1,25 +1,23 @@ basePath: /api/v1 definitions: - backends.BackendType: - enum: - - llama_cpp - - mlx_lm - - vllm - type: string - x-enum-varnames: - - BackendTypeLlamaCpp - - BackendTypeMlxLm - - BackendTypeVllm - instance.CreateInstanceOptions: + instance.Instance: + properties: + created: + description: Unix timestamp when the instance was created + type: integer + name: + type: string + type: object + instance.Options: properties: auto_restart: description: Auto restart type: boolean - backend_options: - additionalProperties: {} + environment: + additionalProperties: + type: string + description: Environment variables type: object - backend_type: - $ref: '#/definitions/backends.BackendType' idle_timeout: description: Idle timeout type: integer @@ -32,27 +30,10 @@ definitions: description: seconds type: integer type: object - instance.InstanceStatus: - enum: - - 0 - - 1 - - 2 - type: integer - x-enum-varnames: - - Stopped - - Running - - Failed - instance.Process: + server.NodeResponse: properties: - created: - description: Creation time - type: integer - name: + address: type: string - status: - allOf: - - $ref: '#/definitions/instance.InstanceStatus' - description: Status type: object server.OpenAIInstance: properties: @@ -88,7 +69,7 @@ info: title: llamactl API version: "1.0" paths: - /backends/llama-cpp/devices: + /api/v1/backends/llama-cpp/devices: get: description: Returns a list of available devices for the llama server responses: @@ -104,8 +85,8 @@ paths: - ApiKeyAuth: [] summary: List available devices for llama server tags: - - backends - /backends/llama-cpp/help: + - Backends + /api/v1/backends/llama-cpp/help: get: description: Returns the help text for the llama server command responses: @@ -121,8 +102,8 @@ paths: - ApiKeyAuth: [] summary: Get help for llama server tags: - - backends - /backends/llama-cpp/parse-command: + - Backends + /api/v1/backends/llama-cpp/parse-command: post: consumes: - application/json @@ -140,7 +121,7 @@ paths: "200": description: Parsed options schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: @@ -157,8 +138,8 @@ paths: - ApiKeyAuth: [] summary: Parse llama-server command tags: - - backends - /backends/llama-cpp/version: + - Backends + /api/v1/backends/llama-cpp/version: get: description: Returns the version of the llama server command responses: @@ -174,8 +155,8 @@ paths: - ApiKeyAuth: [] summary: Get version of llama server tags: - - backends - /backends/mlx/parse-command: + - Backends + /api/v1/backends/mlx/parse-command: post: consumes: - application/json @@ -193,7 +174,7 @@ paths: "200": description: Parsed options schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: @@ -204,8 +185,8 @@ paths: - ApiKeyAuth: [] summary: Parse mlx_lm.server command tags: - - backends - /backends/vllm/parse-command: + - Backends + /api/v1/backends/vllm/parse-command: post: consumes: - application/json @@ -223,7 +204,7 @@ paths: "200": description: Parsed options schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: @@ -234,8 +215,8 @@ paths: - ApiKeyAuth: [] summary: Parse vllm serve command tags: - - backends - /instances: + - Backends + /api/v1/instances: get: description: Returns a list of all instances managed by the server responses: @@ -243,7 +224,7 @@ paths: description: List of instances schema: items: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' type: array "500": description: Internal Server Error @@ -253,8 +234,8 @@ paths: - ApiKeyAuth: [] summary: List all instances tags: - - instances - /instances/{name}: + - Instances + /api/v1/instances/{name}: delete: description: Stops and removes a specific instance by name parameters: @@ -278,7 +259,7 @@ paths: - ApiKeyAuth: [] summary: Delete an instance tags: - - instances + - Instances get: description: Returns the details of a specific instance by name parameters: @@ -291,7 +272,7 @@ paths: "200": description: Instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -304,7 +285,7 @@ paths: - ApiKeyAuth: [] summary: Get details of a specific instance tags: - - instances + - Instances post: consumes: - application/json @@ -320,12 +301,12 @@ paths: name: options required: true schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' responses: "201": description: Created instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid request body schema: @@ -338,7 +319,7 @@ paths: - ApiKeyAuth: [] summary: Create and start a new instance tags: - - instances + - Instances put: consumes: - application/json @@ -354,12 +335,12 @@ paths: name: options required: true schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' responses: "200": description: Updated instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -372,8 +353,8 @@ paths: - ApiKeyAuth: [] summary: Update an instance's configuration tags: - - instances - /instances/{name}/logs: + - Instances + /api/v1/instances/{name}/logs: get: description: Returns the logs from a specific instance by name with optional line limit @@ -404,8 +385,8 @@ paths: - ApiKeyAuth: [] summary: Get logs from a specific instance tags: - - instances - /instances/{name}/proxy: + - Instances + /api/v1/instances/{name}/proxy: get: description: Forwards HTTP requests to the llama-server instance running on a specific port @@ -432,9 +413,10 @@ paths: type: string security: - ApiKeyAuth: [] - summary: Proxy requests to a specific instance + summary: Proxy requests to a specific instance, does not autostart instance + if stopped tags: - - instances + - Instances post: description: Forwards HTTP requests to the llama-server instance running on a specific port @@ -461,10 +443,11 @@ paths: type: string security: - ApiKeyAuth: [] - summary: Proxy requests to a specific instance + summary: Proxy requests to a specific instance, does not autostart instance + if stopped tags: - - instances - /instances/{name}/restart: + - Instances + /api/v1/instances/{name}/restart: post: description: Restarts a specific instance by name parameters: @@ -477,7 +460,7 @@ paths: "200": description: Restarted instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -490,8 +473,8 @@ paths: - ApiKeyAuth: [] summary: Restart a running instance tags: - - instances - /instances/{name}/start: + - Instances + /api/v1/instances/{name}/start: post: description: Starts a specific instance by name parameters: @@ -504,7 +487,7 @@ paths: "200": description: Started instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -517,8 +500,8 @@ paths: - ApiKeyAuth: [] summary: Start a stopped instance tags: - - instances - /instances/{name}/stop: + - Instances + /api/v1/instances/{name}/stop: post: description: Stops a specific instance by name parameters: @@ -531,7 +514,7 @@ paths: "200": description: Stopped instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -544,7 +527,444 @@ paths: - ApiKeyAuth: [] summary: Stop a running instance tags: - - instances + - Instances + /api/v1/nodes: + get: + description: Returns a map of all nodes configured in the server (node name + -> node config) + responses: + "200": + description: Map of nodes + schema: + additionalProperties: + $ref: '#/definitions/server.NodeResponse' + type: object + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: List all configured nodes + tags: + - Nodes + /api/v1/nodes/{name}: + get: + description: Returns the details of a specific node by name + parameters: + - description: Node Name + in: path + name: name + required: true + type: string + responses: + "200": + description: Node details + schema: + $ref: '#/definitions/server.NodeResponse' + "400": + description: Invalid name format + schema: + type: string + "404": + description: Node not found + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Get details of a specific node + tags: + - Nodes + /api/v1/version: + get: + description: Returns the version of the llamactl command + responses: + "200": + description: Version information + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Get llamactl version + tags: + - System + /llama-cpp/{name}/: + get: + description: Proxies requests to the llama.cpp UI for the specified instance + parameters: + - description: Instance Name + in: query + name: name + required: true + type: string + produces: + - text/html + responses: + "200": + description: Proxied HTML response + schema: + type: string + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp UI for the instance + tags: + - Llama.cpp + /llama-cpp/{name}/apply-template: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/completion: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/detokenize: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/embeddings: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/infill: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/metrics: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/props: + get: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/reranking: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/slots: + get: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp + /llama-cpp/{name}/tokenize: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - Llama.cpp /v1/: post: consumes: @@ -567,7 +987,7 @@ paths: - ApiKeyAuth: [] summary: OpenAI-compatible proxy endpoint tags: - - openai + - OpenAI /v1/models: get: description: Returns a list of instances in a format compatible with OpenAI @@ -585,22 +1005,10 @@ paths: - ApiKeyAuth: [] summary: List instances in OpenAI-compatible format tags: - - openai - /version: - get: - description: Returns the version of the llamactl command - responses: - "200": - description: Version information - schema: - type: string - "500": - description: Internal Server Error - schema: - type: string - security: - - ApiKeyAuth: [] - summary: Get llamactl version - tags: - - version + - OpenAI +securityDefinitions: + ApiKeyAuth: + in: header + name: X-API-Key + type: apiKey swagger: "2.0" diff --git a/docs/user-guide/troubleshooting.md b/docs/troubleshooting.md similarity index 54% rename from docs/user-guide/troubleshooting.md rename to docs/troubleshooting.md index ca2b4df..1f33c0a 100644 --- a/docs/user-guide/troubleshooting.md +++ b/docs/troubleshooting.md @@ -26,62 +26,67 @@ Issues specific to Llamactl deployment and operation. ## Instance Management Issues -### Model Loading Failures +### Instance Fails to Start -**Problem:** Instance fails to start with model loading errors - -**Common Solutions:** -- **llama-server not found:** Ensure `llama-server` binary is in PATH -- **Wrong model format:** Ensure model is in GGUF format -- **Insufficient memory:** Use smaller model or reduce context size -- **Path issues:** Use absolute paths to model files - -### Memory Issues - -**Problem:** Out of memory errors or system becomes unresponsive +**Problem:** Instance fails to start or immediately stops **Solutions:** -1. **Reduce context size:** - ```json - { - "n_ctx": 1024 - } + +1. **Check instance logs** to see the actual error: + ```bash + curl http://localhost:8080/api/v1/instances/{name}/logs + # Or check log files directly + tail -f ~/.local/share/llamactl/logs/{instance-name}.log ``` -2. **Use quantized models:** - - Try Q4_K_M instead of higher precision models - - Use smaller model variants (7B instead of 13B) +2. **Verify backend is installed:** + - **llama.cpp**: Ensure `llama-server` is in PATH + - **MLX**: Ensure `mlx-lm` Python package is installed + - **vLLM**: Ensure `vllm` Python package is installed -### GPU Configuration +3. **Check model path and format:** + - Use absolute paths to model files + - Verify model format matches backend (GGUF for llama.cpp, etc.) -**Problem:** GPU not being used effectively +4. **Verify backend command configuration:** + - Check that the backend `command` is correctly configured in the global config + - For virtual environments, specify the full path to the command (e.g., `/path/to/venv/bin/mlx_lm.server`) + - See the [Configuration Guide](configuration.md) for backend configuration details + - Test the backend directly (see [Backend-Specific Issues](#backend-specific-issues) below) -**Solutions:** -1. **Configure GPU layers:** - ```json - { - "n_gpu_layers": 35 - } - ``` +### Backend-Specific Issues -### Advanced Instance Issues +**Problem:** Model loading, memory, GPU, or performance issues -**Problem:** Complex model loading, performance, or compatibility issues +Most model-specific issues (memory, GPU configuration, performance tuning) are backend-specific and should be resolved by consulting the respective backend documentation: -Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting: +**llama.cpp:** +- [llama.cpp GitHub](https://github.com/ggml-org/llama.cpp) +- [llama-server README](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) -**Resources:** -- **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp) -- **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues) -- **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions) +**MLX:** +- [MLX-LM GitHub](https://github.com/ml-explore/mlx-lm) +- [MLX-LM Server Guide](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md) + +**vLLM:** +- [vLLM Documentation](https://docs.vllm.ai/en/stable/) +- [OpenAI Compatible Server](https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html) +- [vllm serve Command](https://docs.vllm.ai/en/stable/cli/serve.html#vllm-serve) + +**Testing backends directly:** + +Testing your model and configuration directly with the backend helps determine if the issue is with llamactl or the backend itself: -**Testing directly with llama-server:** ```bash -# Test your model and parameters directly with llama-server -llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35 -``` +# llama.cpp +llama-server --model /path/to/model.gguf --port 8081 -This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server. +# MLX +mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --port 8081 + +# vLLM +vllm serve microsoft/DialoGPT-medium --port 8081 +``` ## API and Network Issues diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md deleted file mode 100644 index 472cd0b..0000000 --- a/docs/user-guide/api-reference.md +++ /dev/null @@ -1,560 +0,0 @@ -# API Reference - -Complete reference for the Llamactl REST API. - -## Base URL - -All API endpoints are relative to the base URL: - -``` -http://localhost:8080/api/v1 -``` - -## Authentication - -Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header: - -```bash -curl -H "Authorization: Bearer " \ - http://localhost:8080/api/v1/instances -``` - -The server supports two types of API keys: -- **Management API Keys**: Required for instance management operations (CRUD operations on instances) -- **Inference API Keys**: Required for OpenAI-compatible inference endpoints - -## System Endpoints - -### Get Llamactl Version - -Get the version information of the llamactl server. - -```http -GET /api/v1/version -``` - -**Response:** -``` -Version: 1.0.0 -Commit: abc123 -Build Time: 2024-01-15T10:00:00Z -``` - -### Get Llama Server Help - -Get help text for the llama-server command. - -```http -GET /api/v1/server/help -``` - -**Response:** Plain text help output from `llama-server --help` - -### Get Llama Server Version - -Get version information of the llama-server binary. - -```http -GET /api/v1/server/version -``` - -**Response:** Plain text version output from `llama-server --version` - -### List Available Devices - -List available devices for llama-server. - -```http -GET /api/v1/server/devices -``` - -**Response:** Plain text device list from `llama-server --list-devices` - -## Instances - -### List All Instances - -Get a list of all instances. - -```http -GET /api/v1/instances -``` - -**Response:** -```json -[ - { - "name": "llama2-7b", - "status": "running", - "created": 1705312200 - } -] -``` - -### Get Instance Details - -Get detailed information about a specific instance. - -```http -GET /api/v1/instances/{name} -``` - -**Response:** -```json -{ - "name": "llama2-7b", - "status": "running", - "created": 1705312200 -} -``` - -### Create Instance - -Create and start a new instance. - -```http -POST /api/v1/instances/{name} -``` - -**Request Body:** JSON object with instance configuration. Common fields include: - -- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`) -- `backend_options`: Backend-specific configuration -- `auto_restart`: Enable automatic restart on failure -- `max_restarts`: Maximum restart attempts -- `restart_delay`: Delay between restarts in seconds -- `on_demand_start`: Start instance when receiving requests -- `idle_timeout`: Idle timeout in minutes -- `environment`: Environment variables as key-value pairs -- `nodes`: Array with single node name to deploy the instance to (for remote deployments) - -See [Managing Instances](managing-instances.md) for complete configuration options. - -**Response:** -```json -{ - "name": "llama2-7b", - "status": "running", - "created": 1705312200 -} -``` - -### Update Instance - -Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options. - -```http -PUT /api/v1/instances/{name} -``` - -**Request Body:** JSON object with configuration fields to update. - -**Response:** -```json -{ - "name": "llama2-7b", - "status": "running", - "created": 1705312200 -} -``` - -### Delete Instance - -Stop and remove an instance. - -```http -DELETE /api/v1/instances/{name} -``` - -**Response:** `204 No Content` - -## Instance Operations - -### Start Instance - -Start a stopped instance. - -```http -POST /api/v1/instances/{name}/start -``` - -**Response:** -```json -{ - "name": "llama2-7b", - "status": "running", - "created": 1705312200 -} -``` - -**Error Responses:** -- `409 Conflict`: Maximum number of running instances reached -- `500 Internal Server Error`: Failed to start instance - -### Stop Instance - -Stop a running instance. - -```http -POST /api/v1/instances/{name}/stop -``` - -**Response:** -```json -{ - "name": "llama2-7b", - "status": "stopped", - "created": 1705312200 -} -``` - -### Restart Instance - -Restart an instance (stop then start). - -```http -POST /api/v1/instances/{name}/restart -``` - -**Response:** -```json -{ - "name": "llama2-7b", - "status": "running", - "created": 1705312200 -} -``` - -### Get Instance Logs - -Retrieve instance logs. - -```http -GET /api/v1/instances/{name}/logs -``` - -**Query Parameters:** -- `lines`: Number of lines to return (default: all lines, use -1 for all) - -**Response:** Plain text log output - -**Example:** -```bash -curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100" -``` - -### Proxy to Instance - -Proxy HTTP requests directly to the llama-server instance. - -```http -GET /api/v1/instances/{name}/proxy/* -POST /api/v1/instances/{name}/proxy/* -``` - -This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance. - -**Example - Check Instance Health:** -```bash -curl -H "Authorization: Bearer your-api-key" \ - http://localhost:8080/api/v1/instances/my-model/proxy/health -``` - -This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance. - -**Error Responses:** -- `503 Service Unavailable`: Instance is not running - -## OpenAI-Compatible API - -Llamactl provides OpenAI-compatible endpoints for inference operations. - -### List Models - -List all instances in OpenAI-compatible format. - -```http -GET /v1/models -``` - -**Response:** -```json -{ - "object": "list", - "data": [ - { - "id": "llama2-7b", - "object": "model", - "created": 1705312200, - "owned_by": "llamactl" - } - ] -} -``` - -### Chat Completions, Completions, Embeddings - -All OpenAI-compatible inference endpoints are available: - -```http -POST /v1/chat/completions -POST /v1/completions -POST /v1/embeddings -POST /v1/rerank -POST /v1/reranking -``` - -**Request Body:** Standard OpenAI format with `model` field specifying the instance name - -**Example:** -```json -{ - "model": "llama2-7b", - "messages": [ - { - "role": "user", - "content": "Hello, how are you?" - } - ] -} -``` - -The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md). - -**Error Responses:** -- `400 Bad Request`: Invalid request body or missing instance name -- `503 Service Unavailable`: Instance is not running and on-demand start is disabled -- `409 Conflict`: Cannot start instance due to maximum instances limit - -## Instance Status Values - -Instances can have the following status values: -- `stopped`: Instance is not running -- `running`: Instance is running and ready to accept requests -- `failed`: Instance failed to start or crashed - -## Error Responses - -All endpoints may return error responses in the following format: - -```json -{ - "error": "Error message description" -} -``` - -### Common HTTP Status Codes - -- `200`: Success -- `201`: Created -- `204`: No Content (successful deletion) -- `400`: Bad Request (invalid parameters or request body) -- `401`: Unauthorized (missing or invalid API key) -- `403`: Forbidden (insufficient permissions) -- `404`: Not Found (instance not found) -- `409`: Conflict (instance already exists, max instances reached) -- `500`: Internal Server Error -- `503`: Service Unavailable (instance not running) - -## Examples - -### Complete Instance Lifecycle - -```bash -# Create and start instance -curl -X POST http://localhost:8080/api/v1/instances/my-model \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer your-api-key" \ - -d '{ - "backend_type": "llama_cpp", - "backend_options": { - "model": "/models/llama-2-7b.gguf", - "gpu_layers": 32 - }, - "environment": { - "CUDA_VISIBLE_DEVICES": "0", - "OMP_NUM_THREADS": "8" - } - }' - -# Check instance status -curl -H "Authorization: Bearer your-api-key" \ - http://localhost:8080/api/v1/instances/my-model - -# Get instance logs -curl -H "Authorization: Bearer your-api-key" \ - "http://localhost:8080/api/v1/instances/my-model/logs?lines=50" - -# Use OpenAI-compatible chat completions -curl -X POST http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer your-inference-api-key" \ - -d '{ - "model": "my-model", - "messages": [ - {"role": "user", "content": "Hello!"} - ], - "max_tokens": 100 - }' - -# Stop instance -curl -X POST -H "Authorization: Bearer your-api-key" \ - http://localhost:8080/api/v1/instances/my-model/stop - -# Delete instance -curl -X DELETE -H "Authorization: Bearer your-api-key" \ - http://localhost:8080/api/v1/instances/my-model -``` - -### Remote Node Instance Example - -```bash -# Create instance on specific remote node -curl -X POST http://localhost:8080/api/v1/instances/remote-model \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer your-api-key" \ - -d '{ - "backend_type": "llama_cpp", - "backend_options": { - "model": "/models/llama-2-7b.gguf", - "gpu_layers": 32 - }, - "nodes": ["worker1"] - }' - -# Check status of remote instance -curl -H "Authorization: Bearer your-api-key" \ - http://localhost:8080/api/v1/instances/remote-model - -# Use remote instance with OpenAI-compatible API -curl -X POST http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer your-inference-api-key" \ - -d '{ - "model": "remote-model", - "messages": [ - {"role": "user", "content": "Hello from remote node!"} - ] - }' -``` - -### Using the Proxy Endpoint - -You can also directly proxy requests to the llama-server instance: - -```bash -# Direct proxy to instance (bypasses OpenAI compatibility layer) -curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer your-api-key" \ - -d '{ - "prompt": "Hello, world!", - "n_predict": 50 - }' -``` - -## Backend-Specific Endpoints - -### Parse Commands - -Llamactl provides endpoints to parse command strings from different backends into instance configuration options. - -#### Parse Llama.cpp Command - -Parse a llama-server command string into instance options. - -```http -POST /api/v1/backends/llama-cpp/parse-command -``` - -**Request Body:** -```json -{ - "command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080" -} -``` - -**Response:** -```json -{ - "backend_type": "llama_cpp", - "llama_server_options": { - "model": "/path/to/model.gguf", - "ctx_size": 2048, - "port": 8080 - } -} -``` - -#### Parse MLX-LM Command - -Parse an MLX-LM server command string into instance options. - -```http -POST /api/v1/backends/mlx/parse-command -``` - -**Request Body:** -```json -{ - "command": "mlx_lm.server --model /path/to/model --port 8080" -} -``` - -**Response:** -```json -{ - "backend_type": "mlx_lm", - "mlx_server_options": { - "model": "/path/to/model", - "port": 8080 - } -} -``` - -#### Parse vLLM Command - -Parse a vLLM serve command string into instance options. - -```http -POST /api/v1/backends/vllm/parse-command -``` - -**Request Body:** -```json -{ - "command": "vllm serve /path/to/model --port 8080" -} -``` - -**Response:** -```json -{ - "backend_type": "vllm", - "vllm_server_options": { - "model": "/path/to/model", - "port": 8080 - } -} -``` - -**Error Responses for Parse Commands:** -- `400 Bad Request`: Invalid request body, empty command, or parse error -- `500 Internal Server Error`: Encoding error - -## Auto-Generated Documentation - -The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation: - -1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest` -2. Generate docs: `swag init -g cmd/server/main.go -o apidocs` - -## Swagger Documentation - -If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at: - -``` -http://localhost:8080/swagger/ -``` - -This provides a complete interactive interface for testing all API endpoints. diff --git a/mkdocs.yml b/mkdocs.yml index 70cbef3..df2ded7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,8 +25,8 @@ theme: name: Switch to light mode features: - navigation.tabs - - navigation.sections - - navigation.expand + - navigation.tabs.sticky + - toc.integrate - navigation.top - search.highlight - search.share @@ -49,14 +49,12 @@ markdown_extensions: nav: - Home: index.md - - Getting Started: - - Installation: getting-started/installation.md - - Quick Start: getting-started/quick-start.md - - Configuration: getting-started/configuration.md - - User Guide: - - Managing Instances: user-guide/managing-instances.md - - API Reference: user-guide/api-reference.md - - Troubleshooting: user-guide/troubleshooting.md + - Installation: installation.md + - Quick Start: quick-start.md + - Configuration: configuration.md + - Managing Instances: managing-instances.md + - API Reference: api-reference.md + - Troubleshooting: troubleshooting.md plugins: - search @@ -66,6 +64,8 @@ plugins: css_dir: css javascript_dir: js canonical_version: null + - neoteroi.mkdocsoad: + use_pymdownx: true hooks: - docs/readme_sync.py @@ -78,3 +78,6 @@ extra: social: - icon: fontawesome/brands/github link: https://github.com/lordmathis/llamactl + +extra_css: +- css/css-v1.1.3.css diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go index 47ef02d..390ecb0 100644 --- a/pkg/server/handlers_backends.go +++ b/pkg/server/handlers_backends.go @@ -44,7 +44,7 @@ func (h *Handler) stripLlamaCppPrefix(r *http.Request, instName string) { // LlamaCppUIProxy godoc // @Summary Proxy requests to llama.cpp UI for the instance // @Description Proxies requests to the llama.cpp UI for the specified instance -// @Tags backends +// @Tags Llama.cpp // @Security ApiKeyAuth // @Produce html // @Param name query string true "Instance Name" @@ -83,14 +83,24 @@ func (h *Handler) LlamaCppUIProxy() http.HandlerFunc { // LlamaCppProxy godoc // @Summary Proxy requests to llama.cpp server instance // @Description Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured -// @Tags backends +// @Tags Llama.cpp // @Security ApiKeyAuth // @Produce json -// @Param name query string true "Instance Name" +// @Param name path string true "Instance Name" // @Success 200 {object} map[string]any "Proxied response" // @Failure 400 {string} string "Invalid instance" // @Failure 500 {string} string "Internal Server Error" -// @Router /llama-cpp/{name}/* [post] +// @Router /llama-cpp/{name}/props [get] +// @Router /llama-cpp/{name}/slots [get] +// @Router /llama-cpp/{name}/apply-template [post] +// @Router /llama-cpp/{name}/completion [post] +// @Router /llama-cpp/{name}/detokenize [post] +// @Router /llama-cpp/{name}/embeddings [post] +// @Router /llama-cpp/{name}/infill [post] +// @Router /llama-cpp/{name}/metrics [post] +// @Router /llama-cpp/{name}/props [post] +// @Router /llama-cpp/{name}/reranking [post] +// @Router /llama-cpp/{name}/tokenize [post] func (h *Handler) LlamaCppProxy() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { @@ -150,7 +160,7 @@ func parseHelper(w http.ResponseWriter, r *http.Request, backend interface { // ParseLlamaCommand godoc // @Summary Parse llama-server command // @Description Parses a llama-server command string into instance options -// @Tags backends +// @Tags Backends // @Security ApiKeyAuth // @Accept json // @Produce json @@ -158,7 +168,7 @@ func parseHelper(w http.ResponseWriter, r *http.Request, backend interface { // @Success 200 {object} instance.Options "Parsed options" // @Failure 400 {object} map[string]string "Invalid request or command" // @Failure 500 {object} map[string]string "Internal Server Error" -// @Router /backends/llama-cpp/parse-command [post] +// @Router /api/v1/backends/llama-cpp/parse-command [post] func (h *Handler) ParseLlamaCommand() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { parsedOptions, ok := parseHelper(w, r, &backends.LlamaServerOptions{}) @@ -180,14 +190,14 @@ func (h *Handler) ParseLlamaCommand() http.HandlerFunc { // ParseMlxCommand godoc // @Summary Parse mlx_lm.server command // @Description Parses MLX-LM server command string into instance options -// @Tags backends +// @Tags Backends // @Security ApiKeyAuth // @Accept json // @Produce json // @Param request body ParseCommandRequest true "Command to parse" // @Success 200 {object} instance.Options "Parsed options" // @Failure 400 {object} map[string]string "Invalid request or command" -// @Router /backends/mlx/parse-command [post] +// @Router /api/v1/backends/mlx/parse-command [post] func (h *Handler) ParseMlxCommand() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { parsedOptions, ok := parseHelper(w, r, &backends.MlxServerOptions{}) @@ -209,14 +219,14 @@ func (h *Handler) ParseMlxCommand() http.HandlerFunc { // ParseVllmCommand godoc // @Summary Parse vllm serve command // @Description Parses a vLLM serve command string into instance options -// @Tags backends +// @Tags Backends // @Security ApiKeyAuth // @Accept json // @Produce json // @Param request body ParseCommandRequest true "Command to parse" // @Success 200 {object} instance.Options "Parsed options" // @Failure 400 {object} map[string]string "Invalid request or command" -// @Router /backends/vllm/parse-command [post] +// @Router /api/v1/backends/vllm/parse-command [post] func (h *Handler) ParseVllmCommand() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { parsedOptions, ok := parseHelper(w, r, &backends.VllmServerOptions{}) @@ -251,12 +261,12 @@ func (h *Handler) executeLlamaServerCommand(flag, errorMsg string) http.HandlerF // LlamaServerHelpHandler godoc // @Summary Get help for llama server // @Description Returns the help text for the llama server command -// @Tags backends +// @Tags Backends // @Security ApiKeyAuth // @Produces text/plain // @Success 200 {string} string "Help text" // @Failure 500 {string} string "Internal Server Error" -// @Router /backends/llama-cpp/help [get] +// @Router /api/v1/backends/llama-cpp/help [get] func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc { return h.executeLlamaServerCommand("--help", "Failed to get help") } @@ -264,12 +274,12 @@ func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc { // LlamaServerVersionHandler godoc // @Summary Get version of llama server // @Description Returns the version of the llama server command -// @Tags backends +// @Tags Backends // @Security ApiKeyAuth // @Produces text/plain // @Success 200 {string} string "Version information" // @Failure 500 {string} string "Internal Server Error" -// @Router /backends/llama-cpp/version [get] +// @Router /api/v1/backends/llama-cpp/version [get] func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc { return h.executeLlamaServerCommand("--version", "Failed to get version") } @@ -277,12 +287,12 @@ func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc { // LlamaServerListDevicesHandler godoc // @Summary List available devices for llama server // @Description Returns a list of available devices for the llama server -// @Tags backends +// @Tags Backends // @Security ApiKeyAuth // @Produces text/plain // @Success 200 {string} string "List of devices" // @Failure 500 {string} string "Internal Server Error" -// @Router /backends/llama-cpp/devices [get] +// @Router /api/v1/backends/llama-cpp/devices [get] func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc { return h.executeLlamaServerCommand("--list-devices", "Failed to list devices") } diff --git a/pkg/server/handlers_instances.go b/pkg/server/handlers_instances.go index 24fe3e7..0480f22 100644 --- a/pkg/server/handlers_instances.go +++ b/pkg/server/handlers_instances.go @@ -16,12 +16,12 @@ import ( // ListInstances godoc // @Summary List all instances // @Description Returns a list of all instances managed by the server -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Produces json -// @Success 200 {array} instance.Process "List of instances" +// @Success 200 {array} instance.Instance "List of instances" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances [get] +// @Router /api/v1/instances [get] func (h *Handler) ListInstances() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { instances, err := h.InstanceManager.ListInstances() @@ -37,16 +37,16 @@ func (h *Handler) ListInstances() http.HandlerFunc { // CreateInstance godoc // @Summary Create and start a new instance // @Description Creates a new instance with the provided configuration options -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Accept json // @Produces json // @Param name path string true "Instance Name" // @Param options body instance.Options true "Instance configuration options" -// @Success 201 {object} instance.Process "Created instance details" +// @Success 201 {object} instance.Instance "Created instance details" // @Failure 400 {string} string "Invalid request body" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [post] +// @Router /api/v1/instances/{name} [post] func (h *Handler) CreateInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -75,14 +75,14 @@ func (h *Handler) CreateInstance() http.HandlerFunc { // GetInstance godoc // @Summary Get details of a specific instance // @Description Returns the details of a specific instance by name -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Instance details" +// @Success 200 {object} instance.Instance "Instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [get] +// @Router /api/v1/instances/{name} [get] func (h *Handler) GetInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -105,16 +105,16 @@ func (h *Handler) GetInstance() http.HandlerFunc { // UpdateInstance godoc // @Summary Update an instance's configuration // @Description Updates the configuration of a specific instance by name -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Accept json // @Produces json // @Param name path string true "Instance Name" // @Param options body instance.Options true "Instance configuration options" -// @Success 200 {object} instance.Process "Updated instance details" +// @Success 200 {object} instance.Instance "Updated instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [put] +// @Router /api/v1/instances/{name} [put] func (h *Handler) UpdateInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -143,14 +143,14 @@ func (h *Handler) UpdateInstance() http.HandlerFunc { // StartInstance godoc // @Summary Start a stopped instance // @Description Starts a specific instance by name -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Started instance details" +// @Success 200 {object} instance.Instance "Started instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/start [post] +// @Router /api/v1/instances/{name}/start [post] func (h *Handler) StartInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -179,14 +179,14 @@ func (h *Handler) StartInstance() http.HandlerFunc { // StopInstance godoc // @Summary Stop a running instance // @Description Stops a specific instance by name -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Stopped instance details" +// @Success 200 {object} instance.Instance "Stopped instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/stop [post] +// @Router /api/v1/instances/{name}/stop [post] func (h *Handler) StopInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -209,14 +209,14 @@ func (h *Handler) StopInstance() http.HandlerFunc { // RestartInstance godoc // @Summary Restart a running instance // @Description Restarts a specific instance by name -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Restarted instance details" +// @Success 200 {object} instance.Instance "Restarted instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/restart [post] +// @Router /api/v1/instances/{name}/restart [post] func (h *Handler) RestartInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -239,13 +239,13 @@ func (h *Handler) RestartInstance() http.HandlerFunc { // DeleteInstance godoc // @Summary Delete an instance // @Description Stops and removes a specific instance by name -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Param name path string true "Instance Name" // @Success 204 "No Content" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [delete] +// @Router /api/v1/instances/{name} [delete] func (h *Handler) DeleteInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -267,7 +267,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc { // GetInstanceLogs godoc // @Summary Get logs from a specific instance // @Description Returns the logs from a specific instance by name with optional line limit -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Param name path string true "Instance Name" // @Param lines query string false "Number of lines to retrieve (default: all lines)" @@ -275,7 +275,7 @@ func (h *Handler) DeleteInstance() http.HandlerFunc { // @Success 200 {string} string "Instance logs" // @Failure 400 {string} string "Invalid name format or lines parameter" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/logs [get] +// @Router /api/v1/instances/{name}/logs [get] func (h *Handler) GetInstanceLogs() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -310,15 +310,15 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc { // InstanceProxy godoc // @Summary Proxy requests to a specific instance, does not autostart instance if stopped // @Description Forwards HTTP requests to the llama-server instance running on a specific port -// @Tags instances +// @Tags Instances // @Security ApiKeyAuth // @Param name path string true "Instance Name" // @Success 200 "Request successfully proxied to instance" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" // @Failure 503 {string} string "Instance is not running" -// @Router /instances/{name}/proxy [get] -// @Router /instances/{name}/proxy [post] +// @Router /api/v1/instances/{name}/proxy [get] +// @Router /api/v1/instances/{name}/proxy [post] func (h *Handler) InstanceProxy() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { inst, err := h.getInstance(r) diff --git a/pkg/server/handlers_nodes.go b/pkg/server/handlers_nodes.go index 7c84b0a..4aef53a 100644 --- a/pkg/server/handlers_nodes.go +++ b/pkg/server/handlers_nodes.go @@ -14,12 +14,12 @@ type NodeResponse struct { // ListNodes godoc // @Summary List all configured nodes // @Description Returns a map of all nodes configured in the server (node name -> node config) -// @Tags nodes +// @Tags Nodes // @Security ApiKeyAuth // @Produces json // @Success 200 {object} map[string]NodeResponse "Map of nodes" // @Failure 500 {string} string "Internal Server Error" -// @Router /nodes [get] +// @Router /api/v1/nodes [get] func (h *Handler) ListNodes() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { // Convert to sanitized response format (map of name -> NodeResponse) @@ -37,7 +37,7 @@ func (h *Handler) ListNodes() http.HandlerFunc { // GetNode godoc // @Summary Get details of a specific node // @Description Returns the details of a specific node by name -// @Tags nodes +// @Tags Nodes // @Security ApiKeyAuth // @Produces json // @Param name path string true "Node Name" @@ -45,7 +45,7 @@ func (h *Handler) ListNodes() http.HandlerFunc { // @Failure 400 {string} string "Invalid name format" // @Failure 404 {string} string "Node not found" // @Failure 500 {string} string "Internal Server Error" -// @Router /nodes/{name} [get] +// @Router /api/v1/nodes/{name} [get] func (h *Handler) GetNode() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") diff --git a/pkg/server/handlers_openai.go b/pkg/server/handlers_openai.go index 35ac746..d221200 100644 --- a/pkg/server/handlers_openai.go +++ b/pkg/server/handlers_openai.go @@ -25,7 +25,7 @@ type OpenAIInstance struct { // OpenAIListInstances godoc // @Summary List instances in OpenAI-compatible format // @Description Returns a list of instances in a format compatible with OpenAI API -// @Tags openai +// @Tags OpenAI // @Security ApiKeyAuth // @Produces json // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances" @@ -61,7 +61,7 @@ func (h *Handler) OpenAIListInstances() http.HandlerFunc { // OpenAIProxy godoc // @Summary OpenAI-compatible proxy endpoint // @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header. -// @Tags openai +// @Tags OpenAI // @Security ApiKeyAuth // @Accept json // @Produces json diff --git a/pkg/server/handlers_system.go b/pkg/server/handlers_system.go index 2e61288..46410f3 100644 --- a/pkg/server/handlers_system.go +++ b/pkg/server/handlers_system.go @@ -8,12 +8,12 @@ import ( // VersionHandler godoc // @Summary Get llamactl version // @Description Returns the version of the llamactl command -// @Tags version +// @Tags System // @Security ApiKeyAuth // @Produces text/plain // @Success 200 {string} string "Version information" // @Failure 500 {string} string "Internal Server Error" -// @Router /version [get] +// @Router /api/v1/version [get] func (h *Handler) VersionHandler() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { versionInfo := fmt.Sprintf("Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime) diff --git a/pkg/server/routes.go b/pkg/server/routes.go index ffe89ec..6587601 100644 --- a/pkg/server/routes.go +++ b/pkg/server/routes.go @@ -8,7 +8,7 @@ import ( "github.com/go-chi/cors" httpSwagger "github.com/swaggo/http-swagger" - _ "llamactl/apidocs" + _ "llamactl/docs" "llamactl/webui" )