Initial support for docker

This commit is contained in:
2025-09-28 17:51:16 +02:00
parent 92cb57e816
commit b940b38e46
6 changed files with 237 additions and 4 deletions

60
docker-compose.yml Normal file
View File

@@ -0,0 +1,60 @@
version: '3.8'
services:
llamactl-llamacpp:
build:
context: .
dockerfile: Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
environment:
# Configure llamactl to use llama-server from the base image
- LLAMACTL_LLAMACPP_COMMAND=llama-server
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: .
dockerfile: Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Configure llamactl to use vllm from the base image
- LLAMACTL_VLLM_COMMAND=vllm
- LLAMACTL_VLLM_ARGS=serve
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
networks:
default:
name: llamactl-network