test_broadcast.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import pytest
  2. from ....utils import multi_gpu_test
  3. @multi_gpu_test(num_gpus=2)
  4. @pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
  5. @pytest.mark.parametrize("model", [
  6. "llava-hf/llava-1.5-7b-hf",
  7. "llava-hf/llava-v1.6-mistral-7b-hf",
  8. "facebook/chameleon-7b",
  9. ])
  10. def test_models(hf_runner, vllm_runner, image_assets,
  11. distributed_executor_backend, model) -> None:
  12. dtype = "half"
  13. max_tokens = 5
  14. num_logprobs = 5
  15. tensor_parallel_size = 2
  16. if model.startswith("llava-hf/llava-1.5"):
  17. from .test_llava import models, run_test
  18. elif model.startswith("llava-hf/llava-v1.6"):
  19. from .test_llava_next import models, run_test # type: ignore[no-redef]
  20. elif model.startswith("facebook/chameleon"):
  21. from .test_chameleon import models, run_test # type: ignore[no-redef]
  22. else:
  23. raise NotImplementedError(f"Unsupported model: {model}")
  24. run_test(
  25. hf_runner,
  26. vllm_runner,
  27. image_assets,
  28. model=models[0],
  29. # So that LLaVA-NeXT processor may return nested list
  30. size_factors=[0.25, 0.5, 1.0],
  31. dtype=dtype,
  32. max_tokens=max_tokens,
  33. num_logprobs=num_logprobs,
  34. tensor_parallel_size=tensor_parallel_size,
  35. distributed_executor_backend=distributed_executor_backend,
  36. )