test_multimodal_broadcast.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. """Compare the outputs of HF and distributed Aphrodite when using greedy
  2. sampling.
  3. Run:
  4. ```sh
  5. pytest -s -v test_multimodal_broadcast.py
  6. ```
  7. """
  8. import pytest
  9. from aphrodite.common.utils import cuda_device_count_stateless
  10. from ..utils import fork_new_process_for_each_test
  11. @pytest.mark.skipif(cuda_device_count_stateless() < 2,
  12. reason="Need at least 2 GPUs to run the test.")
  13. @pytest.mark.parametrize("model, distributed_executor_backend", [
  14. ("llava-hf/llava-1.5-7b-hf", "ray"),
  15. ("llava-hf/llava-v1.6-mistral-7b-hf", "ray"),
  16. ("facebook/chameleon-7b", "ray"),
  17. ("llava-hf/llava-1.5-7b-hf", "mp"),
  18. ("llava-hf/llava-v1.6-mistral-7b-hf", "mp"),
  19. ("facebook/chameleon-7b", "mp"),
  20. ])
  21. @fork_new_process_for_each_test
  22. def test_models(hf_runner, aphrodite_runner, image_assets, model: str,
  23. distributed_executor_backend: str) -> None:
  24. dtype = "half"
  25. max_tokens = 5
  26. num_logprobs = 5
  27. tensor_parallel_size = 2
  28. if model.startswith("llava-hf/llava-1.5"):
  29. from ..models.test_llava import models, run_test
  30. elif model.startswith("llava-hf/llava-v1.6"):
  31. from ..models.test_llava_next import models, run_test
  32. elif model.startswith("facebook/chameleon"):
  33. from ..models.test_chameleon import models, run_test
  34. else:
  35. raise NotImplementedError(f"Unsupported model: {model}")
  36. run_test(
  37. hf_runner,
  38. aphrodite_runner,
  39. image_assets,
  40. model=models[0],
  41. # So that LLaVA-NeXT processor may return nested list
  42. size_factors=[0.25, 0.5, 1.0],
  43. dtype=dtype,
  44. max_tokens=max_tokens,
  45. num_logprobs=num_logprobs,
  46. tensor_parallel_size=tensor_parallel_size,
  47. distributed_executor_backend=distributed_executor_backend,
  48. )