FROM ghcr.io/ggml-org/llama.cpp:server-cuda # Install curl for downloading llamactl RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* # Download and install the latest llamactl release RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \ curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \ mv llamactl /usr/local/bin/ && \ chmod +x /usr/local/bin/llamactl # Set working directory RUN mkdir -p /data WORKDIR /data # Expose the default llamactl port EXPOSE 8080 ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib" # Set llamactl as the entrypoint ENTRYPOINT ["llamactl"]