Dockerfile.aarch64 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used
  2. # to run the OpenAI compatible server.
  3. ARG CUDA_VERSION=12.4.1
  4. #################### BASE BUILD IMAGE ####################
  5. # prepare basic build environment
  6. FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
  7. ARG CUDA_VERSION=12.4.1
  8. ARG PYTHON_VERSION=3
  9. ENV DEBIAN_FRONTEND=noninteractive
  10. RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
  11. && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
  12. && apt-get update -y \
  13. && apt-get install -y ccache software-properties-common \
  14. && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
  15. && add-apt-repository ppa:deadsnakes/ppa \
  16. && apt-get update -y \
  17. && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
  18. && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
  19. && python3 --version \
  20. && python3 -m pip --version
  21. RUN apt-get update -y \
  22. && apt-get install -y python3-pip git curl libibverbs-dev
  23. # Workaround for https://github.com/openai/triton/issues/2507 and
  24. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  25. # this won't be needed for future versions of this docker image
  26. # or future versions of triton.
  27. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
  28. WORKDIR /workspace
  29. # install build and runtime dependencies
  30. COPY requirements-common.txt requirements-common.txt
  31. RUN pip install packaging wheel
  32. RUN pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu124
  33. RUN --mount=type=cache,target=/root/.cache/pip \
  34. python3 -m pip install nvidia-ml-py==12.555.43 protobuf==3.20.2 ninja msgspec coloredlogs portalocker pytimeparse -r requirements-common.txt
  35. # cuda arch list used by torch
  36. # can be useful for both `dev` and `test`
  37. # explicitly set the list to avoid issues with torch 2.2
  38. # see https://github.com/pytorch/pytorch/pull/123243
  39. ARG torch_cuda_arch_list='6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX'
  40. ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
  41. #################### BASE BUILD IMAGE ####################
  42. #################### WHEEL BUILD IMAGE ####################
  43. FROM base AS build
  44. ARG PYTHON_VERSION=3
  45. # install compiler cache to speed up compilation leveraging local or remote caching
  46. RUN apt-get update -y && apt-get install -y ccache
  47. # files and directories related to build wheels
  48. COPY kernels kernels
  49. COPY setup.py setup.py
  50. COPY cmake cmake
  51. COPY CMakeLists.txt CMakeLists.txt
  52. COPY requirements-common.txt requirements-common.txt
  53. COPY pyproject.toml pyproject.toml
  54. COPY aphrodite aphrodite
  55. # max jobs used by Ninja to build extensions
  56. ARG max_jobs=2
  57. ENV MAX_JOBS=${max_jobs}
  58. # number of threads used by nvcc
  59. ARG nvcc_threads=8
  60. ENV NVCC_THREADS=${nvcc_threads}
  61. ENV CCACHE_DIR=/root/.cache/ccache
  62. RUN python3 -m pip install "cmake==3.26"
  63. RUN --mount=type=cache,target=/root/.cache/ccache \
  64. --mount=type=cache,target=/root/.cache/pip \
  65. python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
  66. #################### EXTENSION Build IMAGE ####################
  67. #################### DEV IMAGE ####################
  68. FROM base as dev
  69. COPY requirements-dev.txt requirements-dev.txt
  70. RUN --mount=type=cache,target=/root/.cache/pip \
  71. python3 -m pip install -r requirements-dev.txt
  72. #################### DEV IMAGE ####################
  73. #################### Aphrodite installation IMAGE ####################
  74. # image with Aphrodite installed
  75. FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS aphrodite-base
  76. ARG CUDA_VERSION=12.4.1
  77. WORKDIR /aphrodite-workspace
  78. RUN apt-get update -y \
  79. && apt-get install -y python3-pip git vim
  80. # Workaround for https://github.com/openai/triton/issues/2507 and
  81. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  82. # this won't be needed for future versions of this docker image
  83. # or future versions of triton.
  84. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
  85. # install aphrodite wheel first, so that torch etc will be installed
  86. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \
  87. --mount=type=cache,target=/root/.cache/pip \
  88. python3 -m pip install dist/*.whl --verbose
  89. RUN --mount=type=cache,target=/root/.cache/pip \
  90. python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/flashinfer-0.1.4-cp311-cp311-linux_aarch64.whl
  91. RUN --mount=type=cache,target=/root/.cache/pip \
  92. python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/xformers-0.0.28+1fc661fe.d20241223-cp38-abi3-linux_aarch64.whl
  93. RUN --mount=type=cache,target=/root/.cache/pip \
  94. python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/aphrodite_flash_attn-2.6.1.post2-cp38-abi3-linux_aarch64.whl
  95. RUN --mount=type=cache,target=/root/.cache/pip \
  96. python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/triton-3.2.0+gitf27f6a72-cp38-abi3-linux_aarch64.whl
  97. #################### Aphrodite installation IMAGE ####################
  98. #################### OPENAI API SERVER ####################
  99. # openai api server alternative
  100. FROM aphrodite-base AS aphrodite-aarch64
  101. # install additional dependencies for openai api server
  102. RUN --mount=type=cache,target=/root/.cache/pip \
  103. python3 -m pip install accelerate hf_transfer 'modelscope!=1.15.0'
  104. ENV NUMBA_CACHE_DIR=$HOME/.numba_cache
  105. ENTRYPOINT ["python3", "-m", "aphrodite.endpoints.openai.api_server"]
  106. #################### OPENAI API SERVER ####################