test_mapper.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. from contextlib import nullcontext
  2. import numpy as np
  3. import pytest
  4. from transformers import CLIPImageProcessor, LlavaNextImageProcessor
  5. from aphrodite.common.config import ModelConfig
  6. from aphrodite.multimodal import MultiModalRegistry
  7. from aphrodite.multimodal.utils import rescale_image_size
  8. @pytest.fixture
  9. def mm_registry():
  10. return MultiModalRegistry()
  11. @pytest.mark.parametrize("dtype", ["half", "float"])
  12. @pytest.mark.parametrize("size_factor", [0.25, 0.5, 1.0])
  13. def test_clip_image_processor(image_assets, mm_registry, dtype, size_factor):
  14. MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
  15. hf_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)
  16. assert isinstance(hf_processor, CLIPImageProcessor)
  17. model_config = ModelConfig(
  18. model=MODEL_NAME,
  19. tokenizer=MODEL_NAME,
  20. tokenizer_mode="auto",
  21. trust_remote_code=False,
  22. seed=0,
  23. dtype=dtype,
  24. revision=None,
  25. limit_mm_per_prompt={"image": 1},
  26. )
  27. mm_registry.init_mm_limits_per_prompt(model_config)
  28. for asset in image_assets:
  29. image = rescale_image_size(asset.pil_image, size_factor)
  30. hf_result = hf_processor.preprocess(
  31. image,
  32. return_tensors="pt",
  33. )
  34. aphrodite_result = mm_registry.map_input(
  35. model_config,
  36. {"image": image},
  37. )
  38. assert hf_result.keys() == aphrodite_result.keys()
  39. for key, hf_tensor in hf_result.items():
  40. hf_arr: np.ndarray = hf_tensor.numpy()
  41. aphrodite_arr: np.ndarray = aphrodite_result[key].numpy()
  42. assert hf_arr.shape == aphrodite_arr.shape, f"Failed for key={key}"
  43. assert np.allclose(hf_arr, aphrodite_arr), f"Failed for key={key}"
  44. @pytest.mark.parametrize("dtype", ["half", "float"])
  45. @pytest.mark.parametrize("size_factor", [0.25, 0.5, 1.0])
  46. def test_llava_next_image_processor(image_assets, mm_registry, dtype,
  47. size_factor):
  48. MODEL_NAME = "llava-hf/llava-v1.6-vicuna-7b-hf"
  49. hf_processor = LlavaNextImageProcessor.from_pretrained(MODEL_NAME)
  50. assert isinstance(hf_processor, LlavaNextImageProcessor)
  51. model_config = ModelConfig(
  52. model=MODEL_NAME,
  53. tokenizer=MODEL_NAME,
  54. tokenizer_mode="auto",
  55. trust_remote_code=False,
  56. seed=0,
  57. dtype=dtype,
  58. revision=None,
  59. limit_mm_per_prompt={"image": 1},
  60. )
  61. mm_registry.init_mm_limits_per_prompt(model_config)
  62. for asset in image_assets:
  63. image = rescale_image_size(asset.pil_image, size_factor)
  64. hf_result = hf_processor.preprocess(
  65. image,
  66. return_tensors="pt",
  67. )
  68. aphrodite_result = mm_registry.map_input(
  69. model_config,
  70. {"image": image},
  71. )
  72. assert hf_result.keys() == aphrodite_result.keys()
  73. for key, hf_tensor in hf_result.items():
  74. hf_arr: np.ndarray = hf_tensor.numpy()
  75. aphrodite_arr: np.ndarray = aphrodite_result[key].numpy()
  76. assert hf_arr.shape == aphrodite_arr.shape, f"Failed for key={key}"
  77. assert np.allclose(hf_arr, aphrodite_arr), f"Failed for key={key}"
  78. @pytest.mark.parametrize(
  79. ("num_images", "limit", "is_valid"),
  80. [(0, 0, True), (0, 1, True), (1, 0, False), (1, 1, True), (1, 2, True),
  81. (2, 1, False), (2, 2, True)],
  82. )
  83. def test_mm_limits(image_assets, mm_registry, num_images, limit, is_valid):
  84. MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
  85. model_config = ModelConfig(
  86. model=MODEL_NAME,
  87. tokenizer=MODEL_NAME,
  88. tokenizer_mode="auto",
  89. trust_remote_code=False,
  90. seed=0,
  91. dtype="half",
  92. revision=None,
  93. limit_mm_per_prompt={"image": limit},
  94. )
  95. mm_registry.init_mm_limits_per_prompt(model_config)
  96. image = image_assets[0].pil_image
  97. if num_images == 0:
  98. mm_inputs = {}
  99. elif num_images == 1:
  100. mm_inputs = {"image": image}
  101. else:
  102. mm_inputs = {"image": [image] * num_images}
  103. with nullcontext() if is_valid else pytest.raises(ValueError):
  104. mm_registry.map_input(model_config, mm_inputs)
  105. # NOTE: We don't test zero images since the HF processor doesn't support it
  106. @pytest.mark.parametrize("num_images", [1, 2])
  107. def test_image_mapper_multi(image_assets, mm_registry, num_images):
  108. MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
  109. model_config = ModelConfig(
  110. model=MODEL_NAME,
  111. tokenizer=MODEL_NAME,
  112. tokenizer_mode="auto",
  113. trust_remote_code=False,
  114. seed=0,
  115. dtype="half",
  116. revision=None,
  117. limit_mm_per_prompt={"image": num_images},
  118. )
  119. mm_registry.init_mm_limits_per_prompt(model_config)
  120. image = image_assets[0].pil_image
  121. mm_inputs = {"image": [image] * num_images}
  122. mapped_inputs = mm_registry.map_input(model_config, mm_inputs)
  123. assert len(mapped_inputs["pixel_values"]) == num_images