FROM ghcr.io/ggml-org/llama.cpp:server-cuda

# Install curl for downloading llamactl
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*

# Download and install the latest llamactl release
RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
    mv llamactl /usr/local/bin/ && \
    chmod +x /usr/local/bin/llamactl

# Set working directory
RUN mkdir -p /data
WORKDIR /data

# Expose the default llamactl port
EXPOSE 8080

ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"

# Set llamactl as the entrypoint
ENTRYPOINT ["llamactl"]