# The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used # to run the OpenAI compatible server. ARG CUDA_VERSION=12.4.1 #################### BASE BUILD IMAGE #################### # prepare basic build environment FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base ARG CUDA_VERSION=12.4.1 ARG PYTHON_VERSION=3 ENV DEBIAN_FRONTEND=noninteractive RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ && apt-get install -y ccache software-properties-common \ && add-apt-repository ppa:deadsnakes/ppa \ && apt-get update -y \ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \ && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \ && python3 --version \ && python3 -m pip --version RUN apt-get update -y \ && apt-get install -y python3-pip git curl libibverbs-dev # Workaround for https://github.com/openai/triton/issues/2507 and # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ WORKDIR /workspace # install build and runtime dependencies COPY requirements-common.txt requirements-common.txt COPY requirements-adag.txt requirements-adag.txt COPY requirements-cuda.txt requirements-cuda.txt RUN pip install packaging wheel RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-cuda.txt # cuda arch list used by torch # can be useful for both `dev` and `test` # explicitly set the list to avoid issues with torch 2.2 # see https://github.com/pytorch/pytorch/pull/123243 ARG torch_cuda_arch_list='6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX' ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} #################### BASE BUILD IMAGE #################### #################### WHEEL BUILD IMAGE #################### FROM base AS build ARG PYTHON_VERSION=3 # install build dependencies COPY requirements-build.txt requirements-build.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-build.txt # install compiler cache to speed up compilation leveraging local or remote caching RUN apt-get update -y && apt-get install -y ccache # files and directories related to build wheels COPY kernels kernels COPY setup.py setup.py COPY cmake cmake COPY CMakeLists.txt CMakeLists.txt COPY requirements-common.txt requirements-common.txt COPY requirements-adag.txt requirements-adag.txt COPY requirements-cuda.txt requirements-cuda.txt COPY pyproject.toml pyproject.toml COPY aphrodite aphrodite # max jobs used by Ninja to build extensions ARG max_jobs=2 ENV MAX_JOBS=${max_jobs} # number of threads used by nvcc ARG nvcc_threads=8 ENV NVCC_THREADS=${nvcc_threads} ENV CCACHE_DIR=/root/.cache/ccache RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/pip \ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 #################### EXTENSION Build IMAGE #################### #################### DEV IMAGE #################### FROM base as dev COPY requirements-dev.txt requirements-dev.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-dev.txt #################### DEV IMAGE #################### #################### Aphrodite installation IMAGE #################### # image with Aphrodite installed FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS aphrodite-base ARG CUDA_VERSION=12.4.1 WORKDIR /aphrodite-workspace RUN apt-get update -y \ && apt-get install -y python3-pip git vim # Workaround for https://github.com/openai/triton/issues/2507 and # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # install aphrodite wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install dist/*.whl --verbose RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp310-cp310-linux_x86_64.whl #################### Aphrodite installation IMAGE #################### #################### OPENAI API SERVER #################### # openai api server alternative FROM aphrodite-base AS aphrodite-openai # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install accelerate hf_transfer 'modelscope!=1.15.0' ENV NUMBA_CACHE_DIR=$HOME/.numba_cache ENTRYPOINT ["python3", "-m", "aphrodite.endpoints.openai.api_server"] #################### OPENAI API SERVER ####################