123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- # The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used
- # to run the OpenAI compatible server.
- ARG CUDA_VERSION=12.4.1
- #################### BASE BUILD IMAGE ####################
- # prepare basic build environment
- FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
- ARG CUDA_VERSION=12.4.1
- ARG PYTHON_VERSION=3
- ENV DEBIAN_FRONTEND=noninteractive
- RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
- && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
- && apt-get update -y \
- && apt-get install -y ccache software-properties-common \
- && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
- && add-apt-repository ppa:deadsnakes/ppa \
- && apt-get update -y \
- && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
- && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
- && python3 --version \
- && python3 -m pip --version
- RUN apt-get update -y \
- && apt-get install -y python3-pip git curl libibverbs-dev
- # Workaround for https://github.com/openai/triton/issues/2507 and
- # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
- # this won't be needed for future versions of this docker image
- # or future versions of triton.
- RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
- WORKDIR /workspace
- # install build and runtime dependencies
- COPY requirements-common.txt requirements-common.txt
- RUN pip install packaging wheel
- RUN pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu124
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install nvidia-ml-py==12.555.43 protobuf==3.20.2 ninja msgspec coloredlogs portalocker pytimeparse -r requirements-common.txt
- # cuda arch list used by torch
- # can be useful for both `dev` and `test`
- # explicitly set the list to avoid issues with torch 2.2
- # see https://github.com/pytorch/pytorch/pull/123243
- ARG torch_cuda_arch_list='6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0+PTX'
- ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
- #################### BASE BUILD IMAGE ####################
- #################### WHEEL BUILD IMAGE ####################
- FROM base AS build
- ARG PYTHON_VERSION=3
- # install compiler cache to speed up compilation leveraging local or remote caching
- RUN apt-get update -y && apt-get install -y ccache
- # files and directories related to build wheels
- COPY kernels kernels
- COPY setup.py setup.py
- COPY cmake cmake
- COPY CMakeLists.txt CMakeLists.txt
- COPY requirements-common.txt requirements-common.txt
- COPY pyproject.toml pyproject.toml
- COPY aphrodite aphrodite
- # max jobs used by Ninja to build extensions
- ARG max_jobs=2
- ENV MAX_JOBS=${max_jobs}
- # number of threads used by nvcc
- ARG nvcc_threads=8
- ENV NVCC_THREADS=${nvcc_threads}
- ENV CCACHE_DIR=/root/.cache/ccache
- RUN python3 -m pip install "cmake==3.26"
- RUN --mount=type=cache,target=/root/.cache/ccache \
- --mount=type=cache,target=/root/.cache/pip \
- python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
- #################### EXTENSION Build IMAGE ####################
- #################### DEV IMAGE ####################
- FROM base as dev
- COPY requirements-dev.txt requirements-dev.txt
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install -r requirements-dev.txt
- #################### DEV IMAGE ####################
- #################### Aphrodite installation IMAGE ####################
- # image with Aphrodite installed
- FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS aphrodite-base
- ARG CUDA_VERSION=12.4.1
- WORKDIR /aphrodite-workspace
- RUN apt-get update -y \
- && apt-get install -y python3-pip git vim
- # Workaround for https://github.com/openai/triton/issues/2507 and
- # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
- # this won't be needed for future versions of this docker image
- # or future versions of triton.
- RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
- # install aphrodite wheel first, so that torch etc will be installed
- RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \
- --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install dist/*.whl --verbose
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/flashinfer-0.1.4-cp311-cp311-linux_aarch64.whl
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/xformers-0.0.28+1fc661fe.d20241223-cp38-abi3-linux_aarch64.whl
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/aphrodite_flash_attn-2.6.1.post2-cp38-abi3-linux_aarch64.whl
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install https://github.com/PygmalionAI/aphrodite-engine/releases/download/v0.6.5/triton-3.2.0+gitf27f6a72-cp38-abi3-linux_aarch64.whl
- #################### Aphrodite installation IMAGE ####################
- #################### OPENAI API SERVER ####################
- # openai api server alternative
- FROM aphrodite-base AS aphrodite-aarch64
- # install additional dependencies for openai api server
- RUN --mount=type=cache,target=/root/.cache/pip \
- python3 -m pip install accelerate hf_transfer 'modelscope!=1.15.0'
- ENV NUMBA_CACHE_DIR=$HOME/.numba_cache
- ENTRYPOINT ["python3", "-m", "aphrodite.endpoints.openai.api_server"]
- #################### OPENAI API SERVER ####################
|