test_embedding.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. """Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
  2. Run `pytest tests/models/test_llama_embedding.py`.
  3. """
  4. import pytest
  5. import torch
  6. import torch.nn.functional as F
  7. MODELS = [
  8. "intfloat/e5-mistral-7b-instruct",
  9. ]
  10. def compare_embeddings(embeddings1, embeddings2):
  11. similarities = [
  12. F.cosine_similarity(torch.tensor(e1), torch.tensor(e2), dim=0)
  13. for e1, e2 in zip(embeddings1, embeddings2)
  14. ]
  15. return similarities
  16. @pytest.mark.parametrize("model", MODELS)
  17. @pytest.mark.parametrize("dtype", ["half"])
  18. def test_models(
  19. hf_runner,
  20. aphrodite_runner,
  21. example_prompts,
  22. model: str,
  23. dtype: str,
  24. ) -> None:
  25. with hf_runner(model, dtype=dtype, is_embedding_model=True) as hf_model:
  26. hf_outputs = hf_model.encode(example_prompts)
  27. with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
  28. aphrodite_outputs = aphrodite_model.encode(example_prompts)
  29. similarities = compare_embeddings(hf_outputs, aphrodite_outputs)
  30. all_similarities = torch.stack(similarities)
  31. tolerance = 1e-2
  32. assert torch.all((all_similarities <= 1.0 + tolerance)
  33. & (all_similarities >= 1.0 - tolerance)
  34. ), f"Not all values are within {tolerance} of 1.0"