mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
60 lines
1.6 KiB
YAML
60 lines
1.6 KiB
YAML
version: '3.8'
|
|
|
|
services:
|
|
llamactl-llamacpp:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.llamacpp
|
|
image: llamactl:llamacpp-cuda
|
|
container_name: llamactl-llamacpp
|
|
ports:
|
|
- "8080:8080"
|
|
volumes:
|
|
- ./data/llamacpp:/data
|
|
- ./models:/models # Mount models directory
|
|
environment:
|
|
# Configure llamactl to use llama-server from the base image
|
|
- LLAMACTL_LLAMACPP_COMMAND=llama-server
|
|
# Enable Docker mode for nested containers (if needed)
|
|
- LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
restart: unless-stopped
|
|
|
|
llamactl-vllm:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.vllm
|
|
image: llamactl:vllm-cuda
|
|
container_name: llamactl-vllm
|
|
ports:
|
|
- "8081:8080" # Use different port to avoid conflicts
|
|
volumes:
|
|
- ./data/vllm:/data
|
|
- ./models:/models # Mount models directory
|
|
- ~/.cache/huggingface:/root/.cache/huggingface # HuggingFace cache
|
|
environment:
|
|
# Configure llamactl to use vllm from the base image
|
|
- LLAMACTL_VLLM_COMMAND=vllm
|
|
- LLAMACTL_VLLM_ARGS=serve
|
|
# Enable Docker mode for nested containers (if needed)
|
|
- LLAMACTL_VLLM_DOCKER_ENABLED=false
|
|
# vLLM specific environment variables
|
|
- CUDA_VISIBLE_DEVICES=all
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
restart: unless-stopped
|
|
|
|
networks:
|
|
default:
|
|
name: llamactl-network |