test_mistral.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. """Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
  2. Run `pytest tests/models/test_mistral.py`.
  3. """
  4. import pytest
  5. from .utils import check_logprobs_close
  6. MODELS = [
  7. "mistralai/Mistral-7B-Instruct-v0.1",
  8. "mistralai/Mistral-7B-Instruct-v0.3",
  9. ]
  10. @pytest.mark.parametrize("model", MODELS)
  11. @pytest.mark.parametrize("dtype", ["bfloat16"])
  12. @pytest.mark.parametrize("max_tokens", [64])
  13. @pytest.mark.parametrize("num_logprobs", [5])
  14. def test_models(
  15. hf_runner,
  16. aphrodite_runner,
  17. example_prompts,
  18. model: str,
  19. dtype: str,
  20. max_tokens: int,
  21. num_logprobs: int,
  22. ) -> None:
  23. # TODO: Sliding window should be tested separately.
  24. with hf_runner(model, dtype=dtype) as hf_model:
  25. hf_outputs = hf_model.generate_greedy_logprobs_limit(
  26. example_prompts, max_tokens, num_logprobs)
  27. with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
  28. aphrodite_outputs = aphrodite_model.generate_greedy_logprobs(
  29. example_prompts, max_tokens, num_logprobs)
  30. check_logprobs_close(
  31. outputs_0_lst=hf_outputs,
  32. outputs_1_lst=aphrodite_outputs,
  33. name_0="hf",
  34. name_1="aphrodite",
  35. )