Browse Source

fix+feat: docker compose (#264)

* Add docker-compose.yml and corresponding .env

* Remove the now unnecessary entrypoint

* Remove the now unnecessary entrypoint

* Add example .env for mixtral-instruct-awq

* improved rework

* quick entrypoint fix

* quick Dockerfile fix

* another Dockerfile fix

* only build-essential's essentials

* add build context (commented out)

* Fix Dockerfile

Entrypoint exec form doesn't do variable substitution automatically ($HOME)

* Fix Dockerfile

Make entrypoint executable

* Update .env

Changed SSL path to work for non-root user

* Update docker-compose.yml

Changed SSL path to work for non-root user

* Update entrypoint.sh

Changed SSL path to work for non-root user

* Fix OpenAI endpoint in entrypoint.sh

Make it work when ENDPOINT is undefined

* make entrypoint script executable

---------

Co-authored-by: Stefan Schwarz <s.schwarz@mps-solutions.de>
Co-authored-by: AlpinDale <alpindale@gmail.com>
Stefan Daniel Schwarz 1 year ago
parent
commit
810ca83066
4 changed files with 93 additions and 66 deletions
  1. 22 0
      docker/.env
  2. 23 19
      docker/Dockerfile
  3. 23 23
      docker/docker-compose.yml
  4. 25 24
      docker/entrypoint.sh

+ 22 - 0
docker/.env

@@ -0,0 +1,22 @@
+#HUGGING_FACE_HUB_TOKEN=<secret>
+
+#HF_CACHE=~/.cache/huggingface
+#UID=1000
+#GID=0
+#TZ=UTC
+
+#ENDPOINT=openai
+#PORT=5000
+#API_KEY=sk-example # ENDPOINT != kobold
+#SSL_KEYFILE=~/ssl/server.key # ENDPOINT != kobold
+#SSL_CERTFILE=~/ssl/server.crt # ENDPOINT != kobold
+#MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.2
+#REVISION=main
+#DATATYPE=half # FP16. Recommended for AWQ quantization.
+#KVCACHE=fp8_e5m2 # It reduces the GPU memory footprint and boosts the performance. But it may cause slight accuracy drop.
+#CONTEXT_LENGTH=32768 # If unspecified, will be automatically derived from the model.
+#NUM_GPUS=1
+#GPU_MEMORY_UTILIZATION=0.8 # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode.
+#QUANTIZATION=awq
+#ENFORCE_EAGER=true # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode.
+#CMD_ADDITIONAL_ARGUMENTS="--seed 0"

+ 23 - 19
docker/Dockerfile

@@ -1,35 +1,39 @@
 FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
 
-WORKDIR /workspace/aphrodite-engine
+ENV HOME=/app/aphrodite-engine
 
-# Upgrade OS Packages
+WORKDIR $HOME
+
+# Upgrade OS Packages + Prepare Python Environment
 RUN set -eux; \
-    apt-get update \
+    export DEBIAN_FRONTEND=noninteractive \
+    && apt-get update \
     && apt-get upgrade -y \
-    && rm -rf /var/lib/apt/lists/*
-
-# Preparing Conda Environment
-RUN apt-get update \
-    && apt-get install -y git build-essential \
-    && apt-get install python3 python3-pip -y \
+    && apt-get install -y bzip2 g++ git make python3-pip tzdata \
     && rm -fr /var/lib/apt/lists/*
 
-COPY entrypoint.sh /workspace/aphrodite-engine
-
-ENV PATH /opt/conda/envs/aphrodite-engine/bin:$PATH
-
-# alias python3 to python
+# Alias python3 to python
 RUN ln -s /usr/bin/python3 /usr/bin/python
 
-RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install --no-cache-dir --upgrade pip
 
-RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine
-RUN mv /tmp/aphrodite-engine/* /workspace/aphrodite-engine/
-RUN rm -rf /tmp/aphrodite-engine
+RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine \
+    && mv /tmp/aphrodite-engine/* . \
+    && rm -fr /tmp/aphrodite-engine \
+    && chmod +x docker/entrypoint.sh
 
 # Export the CUDA_HOME variable correctly
 ENV CUDA_HOME=/usr/local/cuda
 
+ENV HF_HOME=/tmp
+
 RUN python3 -m pip install --no-cache-dir -e .
 
-ENTRYPOINT [ "/app/aphrodite-engine/entrypoint.sh" ]
+# Entrypoint exec form doesn't do variable substitution automatically ($HOME)
+ENTRYPOINT ["/app/aphrodite-engine/docker/entrypoint.sh"]
+
+EXPOSE 5000
+
+USER 1000:0
+
+VOLUME ["/tmp"]

+ 23 - 23
docker/docker-compose.yml

@@ -1,29 +1,29 @@
+version: "3.7"
+
 services:
   aphrodite-engine:
-    build:
-      context: .
-    restart: on-failure:5
-    environment:
-      - NUM_GPUS=${NUM_GPUS}
-      - MODEL_NAME=${MODEL_NAME}
-      - REVISION=${REVISION}
-      - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
-      - QUANTIZATION=${QUANTIZATION}
-      - KVCACHE=${KVCACHE}
-      - API_KEY=${API_KEY}
-      - CONTEXT_LENGTH=${CONTEXT_LENGTH}
-      - GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION}
-      - ENFORCE_EAGER=${ENFORCE_EAGER}
-    volumes:
-      - ${HOME}/.cache:/root/.cache/
-    shm_size: 10g
-    ports:
-      - "7860:7860"
-      - "2242:2242"
+    #build:
+    #  context: .
+    container_name: aphrodite-engine
     deploy:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            count: all
-            capabilities: [gpu]
+            - capabilities: [gpu]
+              count: all
+              driver: nvidia
+    env_file: .env
+    hostname: aphrodite-engine
+    image: alpindale/aphrodite-engine
+    ipc: host
+    ports:
+      - "${PORT:-5000}:5000"
+    restart: on-failure:5
+    user: "${UID:-1000}:${GID:-0}"
+    volumes:
+      - ${HF_CACHE:-hf-cache}:/tmp
+      - ${SSL_CERTFILE:-/dev/null}:/app/aphrodite-engine/server.crt:ro
+      - ${SSL_KEYFILE:-/dev/null}:/app/aphrodite-engine/server.key:ro
+
+volumes:
+  hf-cache:

+ 25 - 24
docker/entrypoint.sh

@@ -1,29 +1,30 @@
-#!/bin/bash
+#!/bin/bash -e
 
-set -xe
-
-cd /app/aphrodite-engine
 echo 'Starting Aphrodite Engine API server...'
-CMD="python3 -m aphrodite.endpoints.openai.api_server \
-             --host 0.0.0.0 \
-             --port 7860 \
-             --model $MODEL_NAME \
-             --tensor-parallel-size $NUM_GPUS \
-             --dtype $DATATYPE \
-             --max-model-len $CONTEXT_LENGTH \
-             --gmu $GPU_MEMORY_UTILIZATION"
 
-if [ -n "$QUANTIZATION" ]; then
-    CMD="$CMD --quantization $QUANTIZATION --dtype half"
-fi
-if [ -n "$API_KEY" ]; then
-    CMD="$CMD --api-keys $API_KEY"
-fi
-if [ -n "$ENFORCE_EAGER" ]; then
-    CMD="$CMD --enforce-eager"
-fi
-if [ -n "$KVCACHE" ]; then
-    CMD="$CMD --kv-cache-dtype $KVCACHE"
+CMD="python3 -m aphrodite.endpoints.${ENDPOINT:-openai}.api_server
+             --host 0.0.0.0
+             --port 5000
+             --download-dir ${HF_HOME:?}/hub
+             ${MODEL_NAME:+--model $MODEL_NAME}
+             ${REVISION:+--revision $REVISION}
+             ${DATATYPE:+--dtype $DATATYPE}
+             ${KVCACHE:+--kv-cache-dtype $KVCACHE}
+             ${CONTEXT_LENGTH:+--max-model-len $CONTEXT_LENGTH}
+             ${NUM_GPUS:+--tensor-parallel-size $NUM_GPUS}
+             ${GPU_MEMORY_UTILIZATION:+--gpu-memory-utilization $GPU_MEMORY_UTILIZATION}
+             ${QUANTIZATION:+--quantization $QUANTIZATION}
+             ${ENFORCE_EAGER:+--enforce-eager}
+             ${CMD_ADDITIONAL_ARGUMENTS}"
+
+# Only the 'openai' endpoint currently supports api-keys and ssl
+if [ "${ENDPOINT:-openai}" = "openai" ]; then
+  CMD+=" ${API_KEY:+--api-keys "$API_KEY"} ${SSL_KEYFILE:+--ssl-keyfile server.key} ${SSL_CERTFILE:+--ssl-certfile server.crt}"
 fi
 
-exec $CMD
+# set umask to ensure group read / write at runtime
+umask 002
+
+set -x
+
+exec $CMD