1
0

Dockerfile 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used
  2. # to run the OpenAI compatible server.
  3. #################### BASE BUILD IMAGE ####################
  4. # prepare basic build environment
  5. FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev
  6. RUN apt-get update -y \
  7. && apt-get install -y python3-pip git
  8. # Workaround for https://github.com/openai/triton/issues/2507 and
  9. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  10. # this won't be needed for future versions of this docker image
  11. # or future versions of triton.
  12. RUN ldconfig /usr/local/cuda-12.4/compat/
  13. WORKDIR /workspace
  14. # install build and runtime dependencies
  15. COPY requirements-common.txt requirements-common.txt
  16. COPY requirements-cuda.txt requirements-cuda.txt
  17. RUN --mount=type=cache,target=/root/.cache/pip \
  18. pip install -r requirements-cuda.txt
  19. # install development dependencies
  20. COPY requirements-dev.txt requirements-dev.txt
  21. RUN --mount=type=cache,target=/root/.cache/pip \
  22. pip install -r requirements-dev.txt
  23. # cuda arch list used by torch
  24. # can be useful for both `dev` and `test`
  25. # explicitly set the list to avoid issues with torch 2.2
  26. # see https://github.com/pytorch/pytorch/pull/123243
  27. ARG torch_cuda_arch_list='6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX'
  28. ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
  29. #################### BASE BUILD IMAGE ####################
  30. #################### WHEEL BUILD IMAGE ####################
  31. FROM dev AS build
  32. # install build dependencies
  33. COPY requirements-build.txt requirements-build.txt
  34. RUN --mount=type=cache,target=/root/.cache/pip \
  35. pip install -r requirements-build.txt
  36. # install compiler cache to speed up compilation leveraging local or remote caching
  37. RUN apt-get update -y && apt-get install -y ccache
  38. # files and directories related to build wheels
  39. COPY kernels kernels
  40. COPY setup.py setup.py
  41. COPY cmake cmake
  42. COPY CMakeLists.txt CMakeLists.txt
  43. COPY requirements-common.txt requirements-common.txt
  44. COPY requirements-cuda.txt requirements-cuda.txt
  45. COPY pyproject.toml pyproject.toml
  46. COPY aphrodite aphrodite
  47. # max jobs used by Ninja to build extensions
  48. ARG max_jobs=2
  49. ENV MAX_JOBS=${max_jobs}
  50. # number of threads used by nvcc
  51. ARG nvcc_threads=8
  52. ENV NVCC_THREADS=${nvcc_threads}
  53. # make sure punica kernels are built (for LoRA)
  54. ENV APHRODITE_INSTALL_PUNICA_KERNELS=1
  55. # make sure quant kernels are built
  56. ENV APHRODITE_INSTALL_QUANT_KERNELS=1
  57. # make sure hadamard kernels are build
  58. ENV APHRODITE_INSTALL_HADAMARD_KERNELS=1
  59. ENV CCACHE_DIR=/root/.cache/ccache
  60. RUN --mount=type=cache,target=/root/.cache/ccache \
  61. --mount=type=cache,target=/root/.cache/pip \
  62. python3 setup.py bdist_wheel --dist-dir=dist
  63. #################### EXTENSION Build IMAGE ####################
  64. #################### Aphrodite installation IMAGE ####################
  65. # image with Aphrodite installed
  66. FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS aphrodite-base
  67. WORKDIR /aphrodite-workspace
  68. RUN apt-get update -y \
  69. && apt-get install -y python3-pip git vim
  70. # Workaround for https://github.com/openai/triton/issues/2507 and
  71. # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
  72. # this won't be needed for future versions of this docker image
  73. # or future versions of triton.
  74. RUN ldconfig /usr/local/cuda-12.4/compat/
  75. # install vllm wheel first, so that torch etc will be installed
  76. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \
  77. --mount=type=cache,target=/root/.cache/pip \
  78. pip install dist/*.whl --verbose
  79. #################### Aphrodite installation IMAGE ####################
  80. #################### OPENAI API SERVER ####################
  81. # openai api server alternative
  82. FROM aphrodite-base AS aphrodite-openai
  83. # install additional dependencies for openai api server
  84. RUN --mount=type=cache,target=/root/.cache/pip \
  85. pip install accelerate hf_transfer modelscope
  86. ENTRYPOINT ["python3", "-m", "aphrodite.endpoints.openai.api_server"]
  87. #################### OPENAI API SERVER ####################