mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
57 lines
1.6 KiB
YAML
57 lines
1.6 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
llamactl-llamacpp:
|
|
build:
|
|
context: ..
|
|
dockerfile: docker/Dockerfile.llamacpp
|
|
image: llamactl:llamacpp-cuda
|
|
container_name: llamactl-llamacpp
|
|
ports:
|
|
- "8080:8080"
|
|
volumes:
|
|
- ./data/llamacpp:/data
|
|
- ./models:/models # Mount models directory
|
|
- ~/.cache/llama.cpp:/root/.cache/llama.cpp # Llama.cpp cache
|
|
environment:
|
|
# Set data directory for persistence
|
|
- LLAMACTL_DATA_DIR=/data
|
|
# Enable Docker mode for nested containers (if needed)
|
|
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
restart: unless-stopped
|
|
|
|
llamactl-vllm:
|
|
build:
|
|
context: ..
|
|
dockerfile: docker/Dockerfile.vllm
|
|
image: llamactl:vllm-cuda
|
|
container_name: llamactl-vllm
|
|
ports:
|
|
- "8081:8080" # Use different port to avoid conflicts
|
|
volumes:
|
|
- ./data/vllm:/data
|
|
- ./models:/models # Mount models directory
|
|
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
|
|
environment:
|
|
# Set data directory for persistence
|
|
- LLAMACTL_DATA_DIR=/data
|
|
# Enable Docker mode for nested containers (if needed)
|
|
- LLAMACTL_VLLM_DOCKER_ENABLED=false
|
|
# vLLM specific environment variables
|
|
- CUDA_VISIBLE_DEVICES=all
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
restart: unless-stopped
|