12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- """Compare the outputs of a AQLM model between Aphrodite and HF Transformers
- Run `pytest tests/models/test_aqlm.py`.
- """
- import pytest
- from tests.quantization.utils import is_quant_method_supported
- example_prompts = [
- 'Aphrodite is a high-throughput and memory-efficient inference and serving '
- 'engine for LLMs.\n',
- 'Briefly describe the major milestones in the development of artificial '
- 'intelligence from 1950 to 2020.\n',
- 'Compare and contrast artificial intelligence with human intelligence in '
- 'terms of processing information.\n',
- 'Describe the basic components of a neural network and how it can be '
- 'trained.\n',
- 'Write a short story about a robot that dreams for the first time.\n',
- 'Analyze the impact of the COVID-19 pandemic on global economic structures '
- 'and future business models.\n',
- 'Explain the cultural significance of the Mona Lisa painting, and how its '
- 'perception might vary in Western versus Eastern societies.\n',
- "Translate the following English sentence into Japanese, French, and "
- "Swahili: 'The early bird catches the worm.'\n"
- ]
- ground_truth_generations = [
- '\n### Features\n\n- **High-throughput**: v',
- 'The major milestones in the development of artificial intelligence from '
- '195',
- 'Compare and contrast artificial intelligence with human intelligence in '
- 'terms of processing information. The',
- 'Explain the difference between supervised and unsupervised learning.'
- '\nExplain',
- 'Write a short story about a robot that dreams for the first time. The',
- 'Analyze the impact of the COVID-19 pandemic on global economic',
- 'The Mona Lisa is a painting by Leonardo da Vinci, and it',
- 'The early bird catches the worm.\nThe early bird catches the'
- ]
- @pytest.mark.skipif(not is_quant_method_supported("aqlm"),
- reason="AQLM is not supported on this GPU type.")
- @pytest.mark.parametrize("model", ["ISTA-DASLab/Llama-2-7b-AQLM-2Bit-1x16-hf"])
- @pytest.mark.parametrize("dtype", ["half"])
- @pytest.mark.parametrize("max_tokens", [16])
- @pytest.mark.parametrize("num_logprobs", [1])
- def test_models(
- aphrodite_runner,
- example_prompts,
- model: str,
- dtype: str,
- max_tokens: int,
- num_logprobs: int,
- ) -> None:
- with aphrodite_runner(model, dtype=dtype) as aphrodite_model:
- aphrodite_outputs = aphrodite_model.generate_greedy_logprobs(
- example_prompts, max_tokens, num_logprobs)
-
- for prompt_idx in range(len(example_prompts)):
- aphrodite_output_ids, aphrodite_output_str, aphrodite_logprobs = (
- aphrodite_outputs[prompt_idx])
- print("Prompt: ", repr(example_prompts[prompt_idx]))
- print("Reference output:", repr(ground_truth_generations[prompt_idx]))
- print("Output output: ", repr(aphrodite_output_str))
- assert aphrodite_output_str == ground_truth_generations[prompt_idx]
|