12345678910111213141516171819202122 |
- import os
- MAX_MODEL_LEN = 1024
- MODEL_NAME = os.environ.get(
- "MODEL_NAME", "robertgshaw2/zephyr-7b-beta-channelwise-gptq"
- )
- REVISION = os.environ.get("REVISION", "main")
- QUANTIZATION = os.environ.get("QUANTIZATION", "gptq_marlin")
- def test_weight_loading(aphrodite_runner):
- with aphrodite_runner(
- model_name=MODEL_NAME,
- revision=REVISION,
- dtype="auto",
- quantization=QUANTIZATION,
- max_model_len=MAX_MODEL_LEN,
- tensor_parallel_size=2,
- ) as model:
- output = model.generate_greedy("Hello world!", max_tokens=20)
- print(output)
- assert output
|