test_regression.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """Containing tests that check for regressions in Aphrodite's behavior.
  2. It should include tests that are reported by users and making sure they
  3. will never happen again.
  4. """
  5. import gc
  6. import torch
  7. from aphrodite import LLM, SamplingParams
  8. def test_duplicated_ignored_sequence_group():
  9. sampling_params = SamplingParams(temperature=0.01,
  10. top_p=0.1,
  11. max_tokens=256)
  12. llm = LLM(model="facebook/opt-125m",
  13. max_num_batched_tokens=4096,
  14. tensor_parallel_size=1)
  15. prompts = ["This is a short prompt", "This is a very long prompt " * 1000]
  16. outputs = llm.generate(prompts, sampling_params=sampling_params)
  17. assert len(prompts) == len(outputs)
  18. def test_max_tokens_none():
  19. sampling_params = SamplingParams(temperature=0.01,
  20. top_p=0.1,
  21. max_tokens=None)
  22. llm = LLM(model="facebook/opt-125m",
  23. max_num_batched_tokens=4096,
  24. tensor_parallel_size=1)
  25. prompts = ["Just say hello!"]
  26. outputs = llm.generate(prompts, sampling_params=sampling_params)
  27. assert len(prompts) == len(outputs)
  28. def test_gc():
  29. llm = LLM("facebook/opt-125m", enforce_eager=True)
  30. del llm
  31. gc.collect()
  32. torch.cuda.empty_cache()
  33. # The memory allocated for model and KV cache should be released.
  34. # The memory allocated for PyTorch and others should be less than 50MB.
  35. # Usually, it's around 10MB.
  36. allocated = torch.cuda.memory_allocated()
  37. assert allocated < 50 * 1024 * 1024
  38. def test_model_from_modelscope(monkeypatch):
  39. # model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
  40. MODELSCOPE_MODEL_NAME = "qwen/Qwen1.5-0.5B-Chat"
  41. monkeypatch.setenv("APHRODITE_USE_MODELSCOPE", "True")
  42. try:
  43. llm = LLM(model=MODELSCOPE_MODEL_NAME)
  44. prompts = [
  45. "Hello, my name is",
  46. "The president of the United States is",
  47. "The capital of France is",
  48. "The future of AI is",
  49. ]
  50. sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
  51. outputs = llm.generate(prompts, sampling_params)
  52. assert len(outputs) == 4
  53. finally:
  54. monkeypatch.delenv("APHRODITE_USE_MODELSCOPE", raising=False)
  55. if __name__ == "__main__":
  56. import pytest
  57. pytest.main([__file__])