test_qwen.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. from typing import Type
  2. import pytest
  3. from ..conftest import AphroditeRunner, HfRunner
  4. from .utils import check_logprobs_close
  5. models = ["qwen/qwen-vl"]
  6. @pytest.mark.parametrize("dtype", ["half"])
  7. @pytest.mark.parametrize("max_tokens", [32])
  8. @pytest.mark.parametrize("num_logprobs", [5])
  9. @pytest.mark.parametrize("model", models)
  10. def test_text_only_qwen_model(
  11. hf_runner: Type[HfRunner],
  12. aphrodite_runner: Type[AphroditeRunner],
  13. example_prompts,
  14. model: str,
  15. *,
  16. dtype: str,
  17. max_tokens: int,
  18. num_logprobs: int,
  19. ):
  20. # This test checks language inputs only, since the visual component
  21. # for qwen-vl is still unsupported in Aphrodite. In the near-future, the
  22. # implementation and this test will be extended to consider
  23. # visual inputs as well.
  24. with hf_runner(model, dtype=dtype, is_vision_model=False) as hf_model:
  25. hf_outputs = hf_model.generate_greedy_logprobs_limit(
  26. example_prompts,
  27. max_tokens,
  28. num_logprobs=num_logprobs,
  29. )
  30. with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
  31. aphrodite_outputs = aphrodite_model.generate_greedy_logprobs(
  32. example_prompts,
  33. max_tokens,
  34. num_logprobs=num_logprobs,
  35. )
  36. check_logprobs_close(
  37. outputs_0_lst=hf_outputs,
  38. outputs_1_lst=aphrodite_outputs,
  39. name_0="hf",
  40. name_1="aphrodite",
  41. )