test_embedding.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. """Compare the outputs of HF and Aphrodite for Mistral models using greedy
  2. sampling.
  3. Run `pytest tests/models/embedding/language/test_embedding.py`.
  4. """
  5. import pytest
  6. import torch
  7. import torch.nn.functional as F
  8. MODELS = [
  9. "intfloat/e5-mistral-7b-instruct",
  10. ]
  11. def compare_embeddings(embeddings1, embeddings2):
  12. similarities = [
  13. F.cosine_similarity(torch.tensor(e1), torch.tensor(e2), dim=0)
  14. for e1, e2 in zip(embeddings1, embeddings2)
  15. ]
  16. return similarities
  17. @pytest.mark.parametrize("model", MODELS)
  18. @pytest.mark.parametrize("dtype", ["half"])
  19. def test_models(
  20. hf_runner,
  21. aphrodite_runner,
  22. example_prompts,
  23. model: str,
  24. dtype: str,
  25. ) -> None:
  26. with hf_runner(model, dtype=dtype, is_embedding_model=True) as hf_model:
  27. hf_outputs = hf_model.encode(example_prompts)
  28. with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
  29. aphrodite_outputs = aphrodite_model.encode(example_prompts)
  30. similarities = compare_embeddings(hf_outputs, aphrodite_outputs)
  31. all_similarities = torch.stack(similarities)
  32. tolerance = 1e-2
  33. assert torch.all((all_similarities <= 1.0 + tolerance)
  34. & (all_similarities >= 1.0 - tolerance)
  35. ), f"Not all values are within {tolerance} of 1.0"