Dockerfile 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. # The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used
  2. # to run the OpenAI compatible server.
  3. ARG CUDA_VERSION=12.4.1
  4. #################### BASE BUILD IMAGE ####################
  5. # prepare basic build environment
  6. FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
  7. ARG CUDA_VERSION=12.4.1
  8. ARG PYTHON_VERSION=3
  9. ENV DEBIAN_FRONTEND=noninteractive
  10. RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
  11. && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
  12. && apt-get update -y \
  13. && apt-get install -y ccache software-properties-common \
  14. && add-apt-repository ppa:deadsnakes/ppa \
  15. && apt-get update -y \
  16. && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
  17. && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
  18. && python3 --version \
  19. && python3 -m pip --version
  20. RUN apt-get update -y \
  21. && apt-get install -y python3-pip git curl libibverbs-dev
  22. # Workaround for https://github.com/openai/triton/issues/2507 and
  23. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  24. # this won't be needed for future versions of this docker image
  25. # or future versions of triton.
  26. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
  27. WORKDIR /workspace
  28. # install build and runtime dependencies
  29. COPY requirements-common.txt requirements-common.txt
  30. COPY requirements-cuda.txt requirements-cuda.txt
  31. RUN pip install packaging wheel
  32. RUN --mount=type=cache,target=/root/.cache/pip \
  33. python3 -m pip install -r requirements-cuda.txt
  34. # cuda arch list used by torch
  35. # can be useful for both `dev` and `test`
  36. # explicitly set the list to avoid issues with torch 2.2
  37. # see https://github.com/pytorch/pytorch/pull/123243
  38. ARG torch_cuda_arch_list='6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX'
  39. ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
  40. #################### BASE BUILD IMAGE ####################
  41. #################### WHEEL BUILD IMAGE ####################
  42. FROM base AS build
  43. ARG PYTHON_VERSION=3
  44. # install build dependencies
  45. COPY requirements-build.txt requirements-build.txt
  46. RUN --mount=type=cache,target=/root/.cache/pip \
  47. python3 -m pip install -r requirements-build.txt
  48. # install compiler cache to speed up compilation leveraging local or remote caching
  49. RUN apt-get update -y && apt-get install -y ccache
  50. # files and directories related to build wheels
  51. COPY kernels kernels
  52. COPY setup.py setup.py
  53. COPY cmake cmake
  54. COPY CMakeLists.txt CMakeLists.txt
  55. COPY requirements-common.txt requirements-common.txt
  56. COPY requirements-cuda.txt requirements-cuda.txt
  57. COPY pyproject.toml pyproject.toml
  58. COPY aphrodite aphrodite
  59. # max jobs used by Ninja to build extensions
  60. ARG max_jobs=2
  61. ENV MAX_JOBS=${max_jobs}
  62. # number of threads used by nvcc
  63. ARG nvcc_threads=8
  64. ENV NVCC_THREADS=${nvcc_threads}
  65. # make sure punica kernels are built (for LoRA)
  66. ENV APHRODITE_INSTALL_PUNICA_KERNELS=1
  67. ENV CCACHE_DIR=/root/.cache/ccache
  68. RUN --mount=type=cache,target=/root/.cache/ccache \
  69. --mount=type=cache,target=/root/.cache/pip \
  70. python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
  71. #################### EXTENSION Build IMAGE ####################
  72. #################### DEV IMAGE ####################
  73. FROM base as dev
  74. COPY requirements-dev.txt requirements-dev.txt
  75. RUN --mount=type=cache,target=/root/.cache/pip \
  76. python3 -m pip install -r requirements-dev.txt
  77. #################### DEV IMAGE ####################
  78. #################### Aphrodite installation IMAGE ####################
  79. # image with Aphrodite installed
  80. FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS aphrodite-base
  81. ARG CUDA_VERSION=12.4.1
  82. WORKDIR /aphrodite-workspace
  83. RUN apt-get update -y \
  84. && apt-get install -y python3-pip git vim
  85. # Workaround for https://github.com/openai/triton/issues/2507 and
  86. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  87. # this won't be needed for future versions of this docker image
  88. # or future versions of triton.
  89. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
  90. # install aphrodite wheel first, so that torch etc will be installed
  91. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \
  92. --mount=type=cache,target=/root/.cache/pip \
  93. python3 -m pip install dist/*.whl --verbose
  94. RUN --mount=type=cache,target=/root/.cache/pip \
  95. python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.9/flashinfer-0.0.9+cu121torch2.3-cp310-cp310-linux_x86_64.whl
  96. #################### Aphrodite installation IMAGE ####################
  97. #################### OPENAI API SERVER ####################
  98. # openai api server alternative
  99. FROM aphrodite-base AS aphrodite-openai
  100. # install additional dependencies for openai api server
  101. RUN --mount=type=cache,target=/root/.cache/pip \
  102. python3 -m pip install accelerate hf_transfer 'modelscope!=1.15.0'
  103. ENV NUMBA_CACHE_DIR=$HOME/.numba_cache
  104. ENTRYPOINT ["python3", "-m", "aphrodite.endpoints.openai.api_server"]
  105. #################### OPENAI API SERVER ####################