test_intern_vit.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. from typing import Optional
  2. import pytest
  3. import torch
  4. import torch.nn as nn
  5. from huggingface_hub import snapshot_download
  6. from transformers import AutoConfig, AutoModel, CLIPImageProcessor
  7. from ....conftest import _ImageAssets, cleanup
  8. # we use snapshot_download to prevent conflicts between
  9. # dynamic_module and trust_remote_code for hf_runner
  10. DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
  11. models = [
  12. snapshot_download("OpenGVLab/InternViT-300M-448px",
  13. allow_patterns=DOWNLOAD_PATTERN),
  14. snapshot_download("OpenGVLab/InternViT-6B-448px-V1-5",
  15. allow_patterns=DOWNLOAD_PATTERN),
  16. ]
  17. def run_intern_vit_test(
  18. image_assets: _ImageAssets,
  19. model: str,
  20. *,
  21. dtype: str,
  22. distributed_executor_backend: Optional[str] = None,
  23. ):
  24. img_processor = CLIPImageProcessor.from_pretrained(model)
  25. images = [asset.pil_image for asset in image_assets]
  26. pixel_values = [
  27. img_processor(images, return_tensors='pt').pixel_values.to(dtype)
  28. for images in images
  29. ]
  30. config = AutoConfig.from_pretrained(model, trust_remote_code=True)
  31. if not getattr(config, "norm_type", None):
  32. config.norm_type = "rms_norm"
  33. hf_model = AutoModel.from_pretrained(model,
  34. torch_dtype=dtype,
  35. trust_remote_code=True).to("cuda")
  36. hf_outputs_per_image = [
  37. hf_model(pixel_value.to("cuda")).last_hidden_state
  38. for pixel_value in pixel_values
  39. ]
  40. from aphrodite.modeling.models.intern_vit import InternVisionModel
  41. aphrodite_model = InternVisionModel(config)
  42. aphrodite_model.load_weights(hf_model.state_dict().items())
  43. del hf_model
  44. cleanup()
  45. aphrodite_model = aphrodite_model.to("cuda", dtype)
  46. aphrodite_outputs_per_image = [
  47. aphrodite_model(pixel_values=pixel_value.to("cuda"))
  48. for pixel_value in pixel_values
  49. ]
  50. del aphrodite_model
  51. cleanup()
  52. cos_similar = nn.CosineSimilarity(dim=-1)
  53. for aphrodite_output, hf_output in zip(aphrodite_outputs_per_image,
  54. hf_outputs_per_image):
  55. assert cos_similar(aphrodite_output, hf_output).mean() > 0.99
  56. @pytest.mark.parametrize("model", models)
  57. @pytest.mark.parametrize("dtype", [torch.half])
  58. @torch.inference_mode()
  59. def test_models(dist_init, image_assets, model, dtype: str) -> None:
  60. run_intern_vit_test(
  61. image_assets,
  62. model,
  63. dtype=dtype,
  64. )