123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- """Compare the outputs of HF and Aphrodite when using greedy sampling.
- This test only tests small models. Big models such as 7B should be tested from
- test_big_models.py because it could use a larger instance to run tests.
- Run `pytest tests/models/test_models.py`.
- """
- import pytest
- from .utils import check_outputs_equal
- MODELS = [
- "facebook/opt-125m",
- "gpt2",
- "bigcode/tiny_starcoder_py",
- "EleutherAI/pythia-70m",
- "bigscience/bloom-560m", # Testing alibi slopes.
- "microsoft/phi-2",
- "stabilityai/stablelm-3b-4e1t",
- # "allenai/OLMo-1B", # Broken
- "bigcode/starcoder2-3b",
- "google/gemma-1.1-2b-it",
- ]
- @pytest.mark.parametrize("model", MODELS)
- @pytest.mark.parametrize("dtype", ["float"])
- @pytest.mark.parametrize("max_tokens", [96])
- def test_models(
- hf_runner,
- aphrodite_runner,
- example_prompts,
- model: str,
- dtype: str,
- max_tokens: int,
- ) -> None:
- # To pass the small model tests, we need full precision.
- assert dtype == "float"
- with hf_runner(model, dtype=dtype) as hf_model:
- hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
- with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
- aphrodite_outputs = aphrodite_model.generate_greedy(
- example_prompts, max_tokens)
- check_outputs_equal(
- outputs_0_lst=hf_outputs,
- outputs_1_lst=aphrodite_outputs,
- name_0="hf",
- name_1="aphrodite",
- )
- @pytest.mark.parametrize("model", MODELS)
- @pytest.mark.parametrize("dtype", ["float"])
- def test_model_print(
- aphrodite_runner,
- model: str,
- dtype: str,
- ) -> None:
- with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
- # This test is for verifying whether the model's extra_repr
- # can be printed correctly.
- print(aphrodite_model.model.llm_engine.model_executor.driver_worker.
- model_runner.model)
|