Initial support for docker

2025-11-07 01:24:27 +00:00 · 2025-09-28 17:51:16 +02:00
parent 92cb57e816
commit b940b38e46
6 changed files with 237 additions and 4 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,60 @@
+version: '3.8'
+
+services:
+  llamactl-llamacpp:
+    build:
+      context: .
+      dockerfile: Dockerfile.llamacpp
+    image: llamactl:llamacpp-cuda
+    container_name: llamactl-llamacpp
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./data/llamacpp:/data
+      - ./models:/models  # Mount models directory
+    environment:
+      # Configure llamactl to use llama-server from the base image
+      - LLAMACTL_LLAMACPP_COMMAND=llama-server
+      # Enable Docker mode for nested containers (if needed)
+      - LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+
+  llamactl-vllm:
+    build:
+      context: .
+      dockerfile: Dockerfile.vllm
+    image: llamactl:vllm-cuda
+    container_name: llamactl-vllm
+    ports:
+      - "8081:8080"  # Use different port to avoid conflicts
+    volumes:
+      - ./data/vllm:/data
+      - ./models:/models  # Mount models directory
+      - ~/.cache/huggingface:/root/.cache/huggingface  # HuggingFace cache
+    environment:
+      # Configure llamactl to use vllm from the base image
+      - LLAMACTL_VLLM_COMMAND=vllm
+      - LLAMACTL_VLLM_ARGS=serve
+      # Enable Docker mode for nested containers (if needed)
+      - LLAMACTL_VLLM_DOCKER_ENABLED=false
+      # vLLM specific environment variables
+      - CUDA_VISIBLE_DEVICES=all
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+
+networks:
+  default:
+    name: llamactl-network