From b940b38e46180afe6b1b3cc91dd783b8220b6c17 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 17:51:16 +0200 Subject: [PATCH] Initial support for docker --- .dockerignore | 45 +++++++++++++++++++ Dockerfile.llamacpp | 22 +++++++++ Dockerfile.vllm | 22 +++++++++ README.md | 25 +++++++++-- docker-compose.yml | 60 +++++++++++++++++++++++++ docs/getting-started/installation.md | 67 +++++++++++++++++++++++++++- 6 files changed, 237 insertions(+), 4 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile.llamacpp create mode 100644 Dockerfile.vllm create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..bb48f8b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,45 @@ +# Git and version control +.git/ +.gitignore + +# Documentation +*.md +docs/ + +# Development files +.vscode/ +.idea/ + +# Build artifacts +webui/node_modules/ +webui/dist/ +webui/.next/ +*.log +*.tmp + +# Data directories +data/ +models/ +logs/ + +# Test files +*_test.go +**/*_test.go + +# CI/CD +.github/ + +# Local configuration +llamactl.yaml +config.yaml +.env +.env.local + +# OS files +.DS_Store +Thumbs.db + +# Backup files +*.bak +*.backup +*~ \ No newline at end of file diff --git a/Dockerfile.llamacpp b/Dockerfile.llamacpp new file mode 100644 index 0000000..511e759 --- /dev/null +++ b/Dockerfile.llamacpp @@ -0,0 +1,22 @@ +FROM ghcr.io/ggml-org/llama.cpp:server + +# Install curl for downloading llamactl +RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* + +# Download and install the latest llamactl release +RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \ + curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \ + mv llamactl /usr/local/bin/ && \ + chmod +x /usr/local/bin/llamactl + +# Create data directory for llamactl +RUN mkdir -p /data + +# Set working directory +WORKDIR /data + +# Expose the default llamactl port +EXPOSE 8080 + +# Set llamactl as the entrypoint +ENTRYPOINT ["llamactl"] \ No newline at end of file diff --git a/Dockerfile.vllm b/Dockerfile.vllm new file mode 100644 index 0000000..9c05912 --- /dev/null +++ b/Dockerfile.vllm @@ -0,0 +1,22 @@ +FROM vllm/vllm-openai:latest + +# Install curl for downloading llamactl +RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* + +# Download and install the latest llamactl release +RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \ + curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \ + mv llamactl /usr/local/bin/ && \ + chmod +x /usr/local/bin/llamactl + +# Create data directory for llamactl +RUN mkdir -p /data + +# Set working directory +WORKDIR /data + +# Expose the default llamactl port +EXPOSE 8080 + +# Set llamactl as the entrypoint +ENTRYPOINT ["llamactl"] \ No newline at end of file diff --git a/README.md b/README.md index dc68e4f..f11d7f3 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,26 @@ sudo mv llamactl /usr/local/bin/ # Windows - Download from releases page ``` -### Option 2: Build from Source +### Option 2: Docker (No local backend installation required) + +```bash +# Clone repository and build Docker images +git clone https://github.com/lordmathis/llamactl.git +cd llamactl +mkdir -p data/llamacpp data/vllm models + +# Build and start llamactl with llama.cpp CUDA backend +docker-compose up llamactl-llamacpp -d + +# Build and start llamactl with vLLM CUDA backend +docker-compose up llamactl-vllm -d +``` + +**Features:** Full CUDA support, automatic latest release installation, no backend dependencies. + +For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md). + +### Option 3: Build from Source Requires Go 1.24+ and Node.js 22+ ```bash git clone https://github.com/lordmathis/llamactl.git @@ -147,9 +166,9 @@ pip install vllm # Or use Docker - no local installation required ``` -## Docker Support +## Backend Docker Support -llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration: +llamactl can run backends in Docker containers: ```yaml backends: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2190c9e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,60 @@ +version: '3.8' + +services: + llamactl-llamacpp: + build: + context: . + dockerfile: Dockerfile.llamacpp + image: llamactl:llamacpp-cuda + container_name: llamactl-llamacpp + ports: + - "8080:8080" + volumes: + - ./data/llamacpp:/data + - ./models:/models # Mount models directory + environment: + # Configure llamactl to use llama-server from the base image + - LLAMACTL_LLAMACPP_COMMAND=llama-server + # Enable Docker mode for nested containers (if needed) + - LLAMACTL_LLAMACPP_DOCKER_ENABLED=false + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + restart: unless-stopped + + llamactl-vllm: + build: + context: . + dockerfile: Dockerfile.vllm + image: llamactl:vllm-cuda + container_name: llamactl-vllm + ports: + - "8081:8080" # Use different port to avoid conflicts + volumes: + - ./data/vllm:/data + - ./models:/models # Mount models directory + - ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache + environment: + # Configure llamactl to use vllm from the base image + - LLAMACTL_VLLM_COMMAND=vllm + - LLAMACTL_VLLM_ARGS=serve + # Enable Docker mode for nested containers (if needed) + - LLAMACTL_VLLM_DOCKER_ENABLED=false + # vLLM specific environment variables + - CUDA_VISIBLE_DEVICES=all + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + restart: unless-stopped + +networks: + default: + name: llamactl-network \ No newline at end of file diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 6f52fff..f4d4b3d 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/ # Windows - Download from releases page ``` -### Option 2: Build from Source +### Option 2: Docker + +llamactl provides Dockerfiles for creating Docker images with CUDA support for llama.cpp and vLLM backends. The resulting images include the latest llamactl release with the respective backend pre-installed. + +**Available Dockerfiles:** +- **llamactl with llama.cpp CUDA**: `Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server`) +- **llamactl with vLLM CUDA**: `Dockerfile.vllm` (based on `vllm/vllm-openai:latest`) + +#### Using Docker Compose + +```bash +# Clone the repository +git clone https://github.com/lordmathis/llamactl.git +cd llamactl + +# Create directories for data and models +mkdir -p data/llamacpp data/vllm models + +# Start llamactl with llama.cpp backend +docker-compose up llamactl-llamacpp -d + +# Or start llamactl with vLLM backend +docker-compose up llamactl-vllm -d +``` + +Access the dashboard at: +- llamactl with llama.cpp: http://localhost:8080 +- llamactl with vLLM: http://localhost:8081 + +#### Using Docker Build and Run + +**llamactl with llama.cpp CUDA:** +```bash +docker build -f Dockerfile.llamacpp -t llamactl:llamacpp-cuda . +docker run -d \ + --name llamactl-llamacpp \ + --gpus all \ + -p 8080:8080 \ + -v $(pwd)/data/llamacpp:/data \ + -v $(pwd)/models:/models \ + -e LLAMACTL_LLAMACPP_COMMAND=llama-server \ + llamactl:llamacpp-cuda +``` + +**llamactl with vLLM CUDA:** +```bash +docker build -f Dockerfile.vllm -t llamactl:vllm-cuda . +docker run -d \ + --name llamactl-vllm \ + --gpus all \ + -p 8080:8080 \ + -v $(pwd)/data/vllm:/data \ + -v $(pwd)/models:/models \ + -v ~/.cache/huggingface:/root/.cache/huggingface \ + -e LLAMACTL_VLLM_COMMAND=vllm \ + -e LLAMACTL_VLLM_ARGS=serve \ + llamactl:vllm-cuda +``` + +**Docker-Specific Configuration:** +- Set `LLAMACTL_LLAMACPP_COMMAND=llama-server` to use the pre-installed llama-server +- Set `LLAMACTL_VLLM_COMMAND=vllm` to use the pre-installed vLLM +- Volume mount `/data` for llamactl data and `/models` for your model files +- Use `--gpus all` for GPU access + +### Option 3: Build from Source Requirements: - Go 1.24 or later