12345678910111213141516171819202122 |
- #HUGGING_FACE_HUB_TOKEN=<secret>
- #HF_CACHE=~/.cache/huggingface
- #UID=1000
- #GID=0
- #TZ=UTC
- #ENDPOINT=openai
- #PORT=5000
- #API_KEY=sk-example # ENDPOINT != kobold
- #SSL_KEYFILE=~/ssl/server.key # ENDPOINT != kobold
- #SSL_CERTFILE=~/ssl/server.crt # ENDPOINT != kobold
- #MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.2
- #REVISION=main
- #DATATYPE=half # FP16. Recommended for AWQ quantization.
- #KVCACHE=fp8_e5m2 # It reduces the GPU memory footprint and boosts the performance. But it may cause slight accuracy drop.
- #CONTEXT_LENGTH=32768 # If unspecified, will be automatically derived from the model.
- #NUM_GPUS=1
- #GPU_MEMORY_UTILIZATION=0.8 # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode.
- #QUANTIZATION=awq
- #ENFORCE_EAGER=true # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode.
- #CMD_ADDITIONAL_ARGUMENTS="--seed 0"
|