launch_tgi.sh 441 B

12345678910111213141516
  1. #!/bin/bash
  2. PORT=8000
  3. MODEL=$1
  4. TOKENS=$2
  5. docker run --gpus all --shm-size 1g -p $PORT:80 \
  6. -v $PWD/data:/data \
  7. ghcr.io/huggingface/text-generation-inference:1.4.0 \
  8. --model-id $MODEL \
  9. --sharded false \
  10. --max-input-length 1024 \
  11. --max-total-tokens 2048 \
  12. --max-best-of 5 \
  13. --max-concurrent-requests 5000 \
  14. --max-batch-total-tokens $TOKENS