from typing import Dict, List import openai import pytest from aphrodite.multimodal.utils import encode_image_base64, fetch_image from ...utils import APHRODITE_PATH, RemoteOpenAIServer MODEL_NAME = "llava-hf/llava-1.5-7b-hf" LLAVA_CHAT_TEMPLATE = APHRODITE_PATH / "examples/chat_templates/llava.jinja" assert LLAVA_CHAT_TEMPLATE.exists() # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA) TEST_IMAGE_URLS = [ "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", "https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png", "https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png", "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png", ] @pytest.fixture(scope="module") def server(): args = [ "--dtype", "bfloat16", "--max-model-len", "4096", "--enforce-eager", "--chat-template", str(LLAVA_CHAT_TEMPLATE), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @pytest.fixture(scope="module") def client(server): return server.get_async_client() @pytest.fixture(scope="session") def base64_encoded_image() -> Dict[str, str]: return { image_url: encode_image_base64(fetch_image(image_url)) for image_url in TEST_IMAGE_URLS } @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) async def test_single_chat_session_image(client: openai.AsyncOpenAI, model_name: str, image_url: str): messages = [{ "role": "user", "content": [ { "type": "image_url", "image_url": { "url": image_url } }, { "type": "text", "text": "What's in this image?" }, ], }] # test single completion chat_completion = await client.chat.completions.create(model=model_name, messages=messages, max_tokens=10, logprobs=True, top_logprobs=5) assert len(chat_completion.choices) == 1 choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( completion_tokens=10, prompt_tokens=596, total_tokens=606) message = choice.message message = chat_completion.choices[0].message assert message.content is not None and len(message.content) >= 10 assert message.role == "assistant" messages.append({"role": "assistant", "content": message.content}) # test multi-turn dialogue messages.append({"role": "user", "content": "express your result in json"}) chat_completion = await client.chat.completions.create( model=model_name, messages=messages, max_tokens=10, ) message = chat_completion.choices[0].message assert message.content is not None and len(message.content) >= 0 @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) async def test_single_chat_session_image_base64encoded( client: openai.AsyncOpenAI, model_name: str, image_url: str, base64_encoded_image: Dict[str, str]): messages = [{ "role": "user", "content": [ { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_encoded_image[image_url]}" } }, { "type": "text", "text": "What's in this image?" }, ], }] # test single completion chat_completion = await client.chat.completions.create(model=model_name, messages=messages, max_tokens=10, logprobs=True, top_logprobs=5) assert len(chat_completion.choices) == 1 choice = chat_completion.choices[0] assert choice.finish_reason == "length" assert chat_completion.usage == openai.types.CompletionUsage( completion_tokens=10, prompt_tokens=596, total_tokens=606) message = choice.message message = chat_completion.choices[0].message assert message.content is not None and len(message.content) >= 10 assert message.role == "assistant" messages.append({"role": "assistant", "content": message.content}) # test multi-turn dialogue messages.append({"role": "user", "content": "express your result in json"}) chat_completion = await client.chat.completions.create( model=model_name, messages=messages, max_tokens=10, ) message = chat_completion.choices[0].message assert message.content is not None and len(message.content) >= 0 @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) async def test_chat_streaming_image(client: openai.AsyncOpenAI, model_name: str, image_url: str): messages = [{ "role": "user", "content": [ { "type": "image_url", "image_url": { "url": image_url } }, { "type": "text", "text": "What's in this image?" }, ], }] # test single completion chat_completion = await client.chat.completions.create( model=model_name, messages=messages, max_tokens=10, temperature=0.0, ) output = chat_completion.choices[0].message.content stop_reason = chat_completion.choices[0].finish_reason # test streaming stream = await client.chat.completions.create( model=model_name, messages=messages, max_tokens=10, temperature=0.0, stream=True, ) chunks: List[str] = [] finish_reason_count = 0 async for chunk in stream: delta = chunk.choices[0].delta if delta.role: assert delta.role == "assistant" if delta.content: chunks.append(delta.content) if chunk.choices[0].finish_reason is not None: finish_reason_count += 1 # finish reason should only return in last block assert finish_reason_count == 1 assert chunk.choices[0].finish_reason == stop_reason assert delta.content assert "".join(chunks) == output @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS) async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str, image_url: str): messages = [{ "role": "user", "content": [ { "type": "image_url", "image_url": { "url": image_url } }, { "type": "image_url", "image_url": { "url": image_url } }, { "type": "text", "text": "What's in this image?" }, ], }] with pytest.raises(openai.BadRequestError): # test multi-image input await client.chat.completions.create( model=model_name, messages=messages, max_tokens=10, temperature=0.0, ) # the server should still work afterwards completion = await client.completions.create( model=model_name, prompt=[0, 0, 0, 0, 0], max_tokens=5, temperature=0.0, ) completion = completion.choices[0].text assert completion is not None and len(completion) >= 0