Initial support for docker

2025-12-22 17:14:22 +00:00 · 2025-09-28 17:51:16 +02:00
parent 92cb57e816
commit b940b38e46
6 changed files with 237 additions and 4 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,45 @@
 # Git and version control
 .git/
 .gitignore
 # Documentation
 *.md
 docs/
 # Development files
 .vscode/
 .idea/
 # Build artifacts
 webui/node_modules/
 webui/dist/
 webui/.next/
 *.log
 *.tmp
 # Data directories
 data/
 models/
 logs/
 # Test files
 *_test.go
 **/*_test.go
 # CI/CD
 .github/
 # Local configuration
 llamactl.yaml
 config.yaml
 .env
 .env.local
 # OS files
 .DS_Store
 Thumbs.db
 # Backup files
 *.bak
 *.backup
 *~
--- a/Dockerfile.llamacpp
+++ b/Dockerfile.llamacpp
@@ -0,0 +1,22 @@
 FROM ghcr.io/ggml-org/llama.cpp:server
 # Install curl for downloading llamactl
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 # Download and install the latest llamactl release
 RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
    mv llamactl /usr/local/bin/ && \
    chmod +x /usr/local/bin/llamactl
 # Create data directory for llamactl
 RUN mkdir -p /data
 # Set working directory
 WORKDIR /data
 # Expose the default llamactl port
 EXPOSE 8080
 # Set llamactl as the entrypoint
 ENTRYPOINT ["llamactl"]
--- a/Dockerfile.vllm
+++ b/Dockerfile.vllm
@@ -0,0 +1,22 @@
 FROM vllm/vllm-openai:latest
 # Install curl for downloading llamactl
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 # Download and install the latest llamactl release
 RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
    mv llamactl /usr/local/bin/ && \
    chmod +x /usr/local/bin/llamactl
 # Create data directory for llamactl
 RUN mkdir -p /data
 # Set working directory
 WORKDIR /data
 # Expose the default llamactl port
 EXPOSE 8080
 # Set llamactl as the entrypoint
 ENTRYPOINT ["llamactl"]
--- a/README.md
+++ b/README.md
@@ -95,7 +95,26 @@ sudo mv llamactl /usr/local/bin/
 # Windows - Download from releases page
 ```
-### Option 2: Build from Source
+### Option 2: Docker (No local backend installation required)
 ```bash
 # Clone repository and build Docker images
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 mkdir -p data/llamacpp data/vllm models
 # Build and start llamactl with llama.cpp CUDA backend
 docker-compose up llamactl-llamacpp -d
 # Build and start llamactl with vLLM CUDA backend
 docker-compose up llamactl-vllm -d
 ```
 **Features:** Full CUDA support, automatic latest release installation, no backend dependencies.
 For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
 ### Option 3: Build from Source
 Requires Go 1.24+ and Node.js 22+
 ```bash
 git clone https://github.com/lordmathis/llamactl.git
@@ -147,9 +166,9 @@ pip install vllm
 # Or use Docker - no local installation required
 ```
-## Docker Support
+## Backend Docker Support
-llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
+llamactl can run backends in Docker containers:
 ```yaml
 backends:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,60 @@
 version: '3.8'
 services:
  llamactl-llamacpp:
    build:
      context: .
      dockerfile: Dockerfile.llamacpp
    image: llamactl:llamacpp-cuda
    container_name: llamactl-llamacpp
    ports:
      - "8080:8080"
    volumes:
      - ./data/llamacpp:/data
      - ./models:/models  # Mount models directory
    environment:
      # Configure llamactl to use llama-server from the base image
      - LLAMACTL_LLAMACPP_COMMAND=llama-server
      # Enable Docker mode for nested containers (if needed)
      - LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped
  llamactl-vllm:
    build:
      context: .
      dockerfile: Dockerfile.vllm
    image: llamactl:vllm-cuda
    container_name: llamactl-vllm
    ports:
      - "8081:8080"  # Use different port to avoid conflicts
    volumes:
      - ./data/vllm:/data
      - ./models:/models  # Mount models directory
      - ~/.cache/huggingface:/root/.cache/huggingface  # HuggingFace cache
    environment:
      # Configure llamactl to use vllm from the base image
      - LLAMACTL_VLLM_COMMAND=vllm
      - LLAMACTL_VLLM_ARGS=serve
      # Enable Docker mode for nested containers (if needed)
      - LLAMACTL_VLLM_DOCKER_ENABLED=false
      # vLLM specific environment variables
      - CUDA_VISIBLE_DEVICES=all
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped
 networks:
  default:
    name: llamactl-network
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
 # Windows - Download from releases page
 ```
-### Option 2: Build from Source
+### Option 2: Docker
 llamactl provides Dockerfiles for creating Docker images with CUDA support for llama.cpp and vLLM backends. The resulting images include the latest llamactl release with the respective backend pre-installed.
 **Available Dockerfiles:**
 - **llamactl with llama.cpp CUDA**: `Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server`)
 - **llamactl with vLLM CUDA**: `Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
 #### Using Docker Compose
 ```bash
 # Clone the repository
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 # Create directories for data and models
 mkdir -p data/llamacpp data/vllm models
 # Start llamactl with llama.cpp backend
 docker-compose up llamactl-llamacpp -d
 # Or start llamactl with vLLM backend
 docker-compose up llamactl-vllm -d
 ```
 Access the dashboard at:
 - llamactl with llama.cpp: http://localhost:8080
 - llamactl with vLLM: http://localhost:8081
 #### Using Docker Build and Run
 **llamactl with llama.cpp CUDA:**
 ```bash
 docker build -f Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
 docker run -d \
  --name llamactl-llamacpp \
  --gpus all \
  -p 8080:8080 \
  -v $(pwd)/data/llamacpp:/data \
  -v $(pwd)/models:/models \
  -e LLAMACTL_LLAMACPP_COMMAND=llama-server \
  llamactl:llamacpp-cuda
 ```
 **llamactl with vLLM CUDA:**
 ```bash
 docker build -f Dockerfile.vllm -t llamactl:vllm-cuda .
 docker run -d \
  --name llamactl-vllm \
  --gpus all \
  -p 8080:8080 \
  -v $(pwd)/data/vllm:/data \
  -v $(pwd)/models:/models \
  -v ~/.cache/huggingface:/root/.cache/huggingface \
  -e LLAMACTL_VLLM_COMMAND=vllm \
  -e LLAMACTL_VLLM_ARGS=serve \
  llamactl:vllm-cuda
 ```
 **Docker-Specific Configuration:**
 - Set `LLAMACTL_LLAMACPP_COMMAND=llama-server` to use the pre-installed llama-server
 - Set `LLAMACTL_VLLM_COMMAND=vllm` to use the pre-installed vLLM
 - Volume mount `/data` for llamactl data and `/models` for your model files
 - Use `--gpus all` for GPU access
 ### Option 3: Build from Source
 Requirements:
 - Go 1.24 or later