Dockerfile 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used
  2. # to run the OpenAI compatible server.
  3. ARG CUDA_VERSION=12.4.1
  4. #################### BASE BUILD IMAGE ####################
  5. # prepare basic build environment
  6. FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
  7. ARG CUDA_VERSION=12.4.1
  8. ARG PYTHON_VERSION=3
  9. ENV DEBIAN_FRONTEND=noninteractive
  10. RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
  11. && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
  12. && apt-get update -y \
  13. && apt-get install -y ccache software-properties-common \
  14. && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
  15. && add-apt-repository ppa:deadsnakes/ppa \
  16. && apt-get update -y \
  17. && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
  18. && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
  19. && python3 --version \
  20. && python3 -m pip --version
  21. RUN apt-get update -y \
  22. && apt-get install -y python3-pip git curl libibverbs-dev
  23. # Workaround for https://github.com/openai/triton/issues/2507 and
  24. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  25. # this won't be needed for future versions of this docker image
  26. # or future versions of triton.
  27. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
  28. WORKDIR /workspace
  29. # install build and runtime dependencies
  30. COPY requirements-common.txt requirements-common.txt
  31. COPY requirements-cuda.txt requirements-cuda.txt
  32. RUN pip install packaging wheel
  33. RUN --mount=type=cache,target=/root/.cache/pip \
  34. python3 -m pip install -r requirements-cuda.txt
  35. # cuda arch list used by torch
  36. # can be useful for both `dev` and `test`
  37. # explicitly set the list to avoid issues with torch 2.2
  38. # see https://github.com/pytorch/pytorch/pull/123243
  39. ARG torch_cuda_arch_list='6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX'
  40. ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
  41. #################### BASE BUILD IMAGE ####################
  42. #################### WHEEL BUILD IMAGE ####################
  43. FROM base AS build
  44. ARG PYTHON_VERSION=3
  45. # install build dependencies
  46. COPY requirements-build.txt requirements-build.txt
  47. RUN --mount=type=cache,target=/root/.cache/pip \
  48. python3 -m pip install -r requirements-build.txt
  49. # install compiler cache to speed up compilation leveraging local or remote caching
  50. RUN apt-get update -y && apt-get install -y ccache
  51. # files and directories related to build wheels
  52. COPY kernels kernels
  53. COPY setup.py setup.py
  54. COPY cmake cmake
  55. COPY CMakeLists.txt CMakeLists.txt
  56. COPY requirements-common.txt requirements-common.txt
  57. COPY requirements-cuda.txt requirements-cuda.txt
  58. COPY pyproject.toml pyproject.toml
  59. COPY aphrodite aphrodite
  60. # max jobs used by Ninja to build extensions
  61. ARG max_jobs=2
  62. ENV MAX_JOBS=${max_jobs}
  63. # number of threads used by nvcc
  64. ARG nvcc_threads=8
  65. ENV NVCC_THREADS=${nvcc_threads}
  66. ENV CCACHE_DIR=/root/.cache/ccache
  67. RUN --mount=type=cache,target=/root/.cache/ccache \
  68. --mount=type=cache,target=/root/.cache/pip \
  69. python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
  70. #################### EXTENSION Build IMAGE ####################
  71. #################### DEV IMAGE ####################
  72. FROM base as dev
  73. COPY requirements-dev.txt requirements-dev.txt
  74. RUN --mount=type=cache,target=/root/.cache/pip \
  75. python3 -m pip install -r requirements-dev.txt
  76. #################### DEV IMAGE ####################
  77. #################### Aphrodite installation IMAGE ####################
  78. # image with Aphrodite installed
  79. FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS aphrodite-base
  80. ARG CUDA_VERSION=12.4.1
  81. WORKDIR /aphrodite-workspace
  82. RUN apt-get update -y \
  83. && apt-get install -y python3-pip git vim
  84. # Workaround for https://github.com/openai/triton/issues/2507 and
  85. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  86. # this won't be needed for future versions of this docker image
  87. # or future versions of triton.
  88. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
  89. # install aphrodite wheel first, so that torch etc will be installed
  90. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \
  91. --mount=type=cache,target=/root/.cache/pip \
  92. python3 -m pip install dist/*.whl --verbose
  93. RUN --mount=type=cache,target=/root/.cache/pip \
  94. python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.4/flashinfer-0.1.4+cu121torch2.4-cp310-cp310-linux_x86_64.whl
  95. #################### Aphrodite installation IMAGE ####################
  96. #################### OPENAI API SERVER ####################
  97. # openai api server alternative
  98. FROM aphrodite-base AS aphrodite-openai
  99. # install additional dependencies for openai api server
  100. RUN --mount=type=cache,target=/root/.cache/pip \
  101. python3 -m pip install accelerate hf_transfer 'modelscope!=1.15.0'
  102. ENV NUMBA_CACHE_DIR=$HOME/.numba_cache
  103. ENTRYPOINT ["python3", "-m", "aphrodite.endpoints.openai.api_server"]
  104. #################### OPENAI API SERVER ####################