Files
llamactl/docker/docker-compose.yml

57 lines
1.6 KiB
YAML

version: '3.8'
services:
llamactl-llamacpp:
build:
context: ..
dockerfile: docker/Dockerfile.llamacpp
image: llamactl:llamacpp-cuda
container_name: llamactl-llamacpp
ports:
- "8080:8080"
volumes:
- ./data/llamacpp:/data
- ./models:/models # Mount models directory
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped
llamactl-vllm:
build:
context: ..
dockerfile: docker/Dockerfile.vllm
image: llamactl:vllm-cuda
container_name: llamactl-vllm
ports:
- "8081:8080" # Use different port to avoid conflicts
volumes:
- ./data/vllm:/data
- ./models:/models # Mount models directory
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
environment:
# Set data directory for persistence
- LLAMACTL_DATA_DIR=/data
# Enable Docker mode for nested containers (if needed)
- LLAMACTL_VLLM_DOCKER_ENABLED=false
# vLLM specific environment variables
- CUDA_VISIBLE_DEVICES=all
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
restart: unless-stopped