|
@@ -1,9 +1,26 @@
|
|
|
# The Aphrodite Dockerfile is used to construct Aphrodite image that can be directly used
|
|
|
# to run the OpenAI compatible server.
|
|
|
|
|
|
+ARG CUDA_VERSION=12.4.1
|
|
|
#################### BASE BUILD IMAGE ####################
|
|
|
# prepare basic build environment
|
|
|
-FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev
|
|
|
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
|
|
|
+
|
|
|
+ARG CUDA_VERSION=12.4.1
|
|
|
+ARG PYTHON_VERSION=3
|
|
|
+
|
|
|
+ENV DEBIAN_FRONTEND=noninteractive
|
|
|
+
|
|
|
+RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
|
|
|
+ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
|
|
|
+ && apt-get update -y \
|
|
|
+ && apt-get install -y ccache software-properties-common \
|
|
|
+ && add-apt-repository ppa:deadsnakes/ppa \
|
|
|
+ && apt-get update -y \
|
|
|
+ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
|
|
|
+ && if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
|
|
|
+ && python3 --version \
|
|
|
+ && python3 -m pip --version
|
|
|
|
|
|
RUN apt-get update -y \
|
|
|
&& apt-get install -y python3-pip git
|
|
@@ -12,7 +29,7 @@ RUN apt-get update -y \
|
|
|
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
|
|
|
# this won't be needed for future versions of this docker image
|
|
|
# or future versions of triton.
|
|
|
-RUN ldconfig /usr/local/cuda-12.4/compat/
|
|
|
+RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
|
|
|
|
|
WORKDIR /workspace
|
|
|
|
|
@@ -20,12 +37,7 @@ WORKDIR /workspace
|
|
|
COPY requirements-common.txt requirements-common.txt
|
|
|
COPY requirements-cuda.txt requirements-cuda.txt
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
|
- pip install -r requirements-cuda.txt
|
|
|
-
|
|
|
-# install development dependencies
|
|
|
-COPY requirements-dev.txt requirements-dev.txt
|
|
|
-RUN --mount=type=cache,target=/root/.cache/pip \
|
|
|
- pip install -r requirements-dev.txt
|
|
|
+ python3 -m pip install -r requirements-cuda.txt
|
|
|
|
|
|
# cuda arch list used by torch
|
|
|
# can be useful for both `dev` and `test`
|
|
@@ -37,12 +49,14 @@ ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
|
|
|
|
|
|
|
|
#################### WHEEL BUILD IMAGE ####################
|
|
|
-FROM dev AS build
|
|
|
+FROM base AS build
|
|
|
+
|
|
|
+ARG PYTHON_VERSION=3
|
|
|
|
|
|
# install build dependencies
|
|
|
COPY requirements-build.txt requirements-build.txt
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
|
- pip install -r requirements-build.txt
|
|
|
+ python3 -m pip install -r requirements-build.txt
|
|
|
|
|
|
# install compiler cache to speed up compilation leveraging local or remote caching
|
|
|
RUN apt-get update -y && apt-get install -y ccache
|
|
@@ -73,9 +87,19 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
|
|
|
|
|
|
#################### EXTENSION Build IMAGE ####################
|
|
|
|
|
|
+#################### DEV IMAGE ####################
|
|
|
+FROM base as dev
|
|
|
+
|
|
|
+COPY requirements-dev.txt requirements-dev.txt
|
|
|
+RUN --mount=type=cache,target=/root/.cache/pip \
|
|
|
+ python3 -m pip install -r requirements-dev.txt
|
|
|
+
|
|
|
+#################### DEV IMAGE ####################
|
|
|
+
|
|
|
#################### Aphrodite installation IMAGE ####################
|
|
|
# image with Aphrodite installed
|
|
|
-FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS aphrodite-base
|
|
|
+FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS aphrodite-base
|
|
|
+ARG CUDA_VERSION=12.4.1
|
|
|
WORKDIR /aphrodite-workspace
|
|
|
|
|
|
RUN apt-get update -y \
|
|
@@ -85,12 +109,12 @@ RUN apt-get update -y \
|
|
|
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
|
|
|
# this won't be needed for future versions of this docker image
|
|
|
# or future versions of triton.
|
|
|
-RUN ldconfig /usr/local/cuda-12.4/compat/
|
|
|
+RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
|
|
|
|
|
# install aphrodite wheel first, so that torch etc will be installed
|
|
|
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/aphrodite-workspace/dist \
|
|
|
--mount=type=cache,target=/root/.cache/pip \
|
|
|
- pip install dist/*.whl --verbose
|
|
|
+ python3 -m pip install dist/*.whl --verbose
|
|
|
#################### Aphrodite installation IMAGE ####################
|
|
|
|
|
|
|
|
@@ -100,7 +124,7 @@ FROM aphrodite-base AS aphrodite-openai
|
|
|
|
|
|
# install additional dependencies for openai api server
|
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
|
- pip install accelerate hf_transfer modelscope
|
|
|
+ python3 -m pip install accelerate hf_transfer modelscope
|
|
|
|
|
|
ENV NUMBA_CACHE_DIR=$HOME/.numba_cache
|
|
|
|