Dockerfile 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
  2. ENV HOME=/app/aphrodite-engine
  3. WORKDIR $HOME
  4. # Upgrade OS Packages + Prepare Python Environment
  5. RUN set -eux; \
  6. export DEBIAN_FRONTEND=noninteractive \
  7. && apt-get update \
  8. && apt-get upgrade -y \
  9. && apt-get install -y bzip2 g++ git make python3-pip tzdata \
  10. && rm -fr /var/lib/apt/lists/*
  11. # Alias python3 to python
  12. RUN ln -s /usr/bin/python3 /usr/bin/python
  13. RUN python3 -m pip install --no-cache-dir --upgrade pip
  14. RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine \
  15. && mv /tmp/aphrodite-engine/* . \
  16. && rm -fr /tmp/aphrodite-engine \
  17. && chmod +x docker/entrypoint.sh
  18. # Allow build servers to limit ninja build jobs. For reference
  19. # see https://github.com/PygmalionAI/aphrodite-engine/wiki/1.-Installation#build-from-source
  20. ARG MAX_JOBS
  21. ENV MAX_JOBS=${MAX_JOBS}
  22. # Export the CUDA_HOME variable correctly
  23. ENV CUDA_HOME=/usr/local/cuda
  24. ENV HF_HOME=/tmp
  25. ENV NUMBA_CACHE_DIR=$HF_HOME/numba_cache
  26. ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX"
  27. RUN python3 -m pip install --no-cache-dir -e .
  28. # Workaround to properly install flash-attn. For reference
  29. # see: https://github.com/Dao-AILab/flash-attention/issues/453
  30. RUN python3 -m pip install 'flash-attn>=2.5.8' --no-build-isolation
  31. # Entrypoint exec form doesn't do variable substitution automatically ($HOME)
  32. ENTRYPOINT ["/app/aphrodite-engine/docker/entrypoint.sh"]
  33. EXPOSE 7860
  34. # Service UID needs write access to $HOME to create temporary folders, see #458
  35. RUN chown 1000:1000 ${HOME}
  36. USER 1000:0
  37. VOLUME ["/tmp"]