from http import HTTPStatus import openai import pytest import requests from aphrodite.version import __version__ as APHRODITE_VERSION from ...utils import RemoteOpenAIServer MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" @pytest.fixture(scope="module") def server(): args = [ # use half precision for speed and memory savings in CI environment "--dtype", "bfloat16", "--max-model-len", "8192", "--enforce-eager", "--max-num-seqs", "128", ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @pytest.fixture(scope="module") def client(server): return server.get_async_client() @pytest.mark.asyncio async def test_show_version(client: openai.AsyncOpenAI): base_url = str(client.base_url)[:-3].strip("/") response = requests.get(base_url + "/version") response.raise_for_status() assert response.json() == {"version": APHRODITE_VERSION} @pytest.mark.asyncio async def test_check_health(client: openai.AsyncOpenAI): base_url = str(client.base_url)[:-3].strip("/") response = requests.get(base_url + "/health") assert response.status_code == HTTPStatus.OK