FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 ENV HOME=/app/aphrodite-engine WORKDIR $HOME # Upgrade OS Packages + Prepare Python Environment RUN set -eux; \ export DEBIAN_FRONTEND=noninteractive \ && apt-get update \ && apt-get upgrade -y \ && apt-get install -y bzip2 g++ git make python3-pip tzdata \ && rm -fr /var/lib/apt/lists/* # Alias python3 to python RUN ln -s /usr/bin/python3 /usr/bin/python RUN python3 -m pip install --no-cache-dir --upgrade pip RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine \ && mv /tmp/aphrodite-engine/* . \ && rm -fr /tmp/aphrodite-engine \ && chmod +x docker/entrypoint.sh # Allow build servers to limit ninja build jobs. For reference # see https://github.com/PygmalionAI/aphrodite-engine/wiki/1.-Installation#build-from-source ARG MAX_JOBS ENV MAX_JOBS=${MAX_JOBS} # Export the CUDA_HOME variable correctly ENV CUDA_HOME=/usr/local/cuda ENV HF_HOME=/tmp ENV NUMBA_CACHE_DIR=$HF_HOME/numba_cache ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX" RUN python3 -m pip install --no-cache-dir -e . # Workaround to properly install flash-attn. For reference # see: https://github.com/Dao-AILab/flash-attention/issues/453 RUN python3 -m pip install 'flash-attn>=2.5.8' --no-build-isolation # Entrypoint exec form doesn't do variable substitution automatically ($HOME) ENTRYPOINT ["/app/aphrodite-engine/docker/entrypoint.sh"] EXPOSE 7860 # Service UID needs write access to $HOME to create temporary folders, see #458 RUN chown 1000:1000 ${HOME} USER 1000:0 VOLUME ["/tmp"]