test_openapi_server.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. import openai # use the official client for correctness check
  2. import pytest
  3. from ..utils import APHRODITE_PATH, RemoteOpenAIServer
  4. # any model with a chat template should work here
  5. MODEL_NAME = "facebook/opt-125m"
  6. chatml_jinja_path = APHRODITE_PATH / "examples/chat_templates/chatml.jinja"
  7. assert chatml_jinja_path.exists()
  8. @pytest.fixture(scope="module")
  9. def server():
  10. args = [
  11. # use half precision for speed and memory savings in CI environment
  12. "--dtype",
  13. "float16",
  14. "--max-model-len",
  15. "2048",
  16. "--enforce-eager",
  17. "--chat-template",
  18. str(chatml_jinja_path),
  19. ]
  20. with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
  21. yield remote_server
  22. @pytest.fixture(scope="module")
  23. def client(server):
  24. return server.get_async_client()
  25. @pytest.mark.asyncio
  26. async def test_check_models(client: openai.AsyncOpenAI):
  27. models = await client.models.list()
  28. models = models.data
  29. served_model = models[0]
  30. assert served_model.id == MODEL_NAME
  31. assert all(model.root == MODEL_NAME for model in models)
  32. @pytest.mark.asyncio
  33. async def test_single_completion(client: openai.AsyncOpenAI):
  34. completion = await client.completions.create(model=MODEL_NAME,
  35. prompt="Hello, my name is",
  36. max_tokens=5,
  37. temperature=0.0)
  38. assert completion.id is not None
  39. assert len(completion.choices) == 1
  40. assert len(completion.choices[0].text) >= 5
  41. assert completion.choices[0].finish_reason == "length"
  42. assert completion.usage == openai.types.CompletionUsage(
  43. completion_tokens=5, prompt_tokens=6, total_tokens=11)
  44. # test using token IDs
  45. completion = await client.completions.create(
  46. model=MODEL_NAME,
  47. prompt=[0, 0, 0, 0, 0],
  48. max_tokens=5,
  49. temperature=0.0,
  50. )
  51. assert len(completion.choices[0].text) >= 5
  52. @pytest.mark.asyncio
  53. async def test_single_chat_session(client: openai.AsyncOpenAI):
  54. messages = [{
  55. "role": "system",
  56. "content": "you are a helpful assistant"
  57. }, {
  58. "role": "user",
  59. "content": "what is 1+1?"
  60. }]
  61. # test single completion
  62. chat_completion = await client.chat.completions.create(model=MODEL_NAME,
  63. messages=messages,
  64. max_tokens=10,
  65. logprobs=True,
  66. top_logprobs=5)
  67. assert chat_completion.id is not None
  68. assert len(chat_completion.choices) == 1
  69. choice = chat_completion.choices[0]
  70. assert choice.finish_reason == "length"
  71. assert chat_completion.usage == openai.types.CompletionUsage(
  72. completion_tokens=10, prompt_tokens=55, total_tokens=65)
  73. message = choice.message
  74. assert message.content is not None and len(message.content) >= 10
  75. assert message.role == "assistant"
  76. messages.append({"role": "assistant", "content": message.content})
  77. # test multi-turn dialogue
  78. messages.append({"role": "user", "content": "express your result in json"})
  79. chat_completion = await client.chat.completions.create(
  80. model=MODEL_NAME,
  81. messages=messages,
  82. max_tokens=10,
  83. )
  84. message = chat_completion.choices[0].message
  85. assert message.content is not None and len(message.content) >= 0