test_openapi_server_ray.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import openai # use the official client for correctness check
  2. import pytest
  3. from ..utils import APHRODITE_PATH, RemoteOpenAIServer
  4. # any model with a chat template should work here
  5. MODEL_NAME = "facebook/opt-125m"
  6. chatml_jinja_path = APHRODITE_PATH / "examples/chat_templates/chatml.jinja"
  7. assert chatml_jinja_path.exists()
  8. @pytest.fixture(scope="module")
  9. def server():
  10. args = [
  11. # use half precision for speed and memory savings in CI environment
  12. "--dtype",
  13. "float16",
  14. "--max-model-len",
  15. "2048",
  16. "--enforce-eager",
  17. "--engine-use-ray",
  18. "--chat-template",
  19. str(chatml_jinja_path),
  20. ]
  21. # Allow `--engine-use-ray`, otherwise the launch of the server throw
  22. # an error due to try to use a deprecated feature
  23. env_dict = {"APHRODITE_ALLOW_ENGINE_USE_RAY": "1"}
  24. with RemoteOpenAIServer(MODEL_NAME, args,
  25. env_dict=env_dict) as remote_server:
  26. yield remote_server
  27. @pytest.fixture(scope="module")
  28. def client(server):
  29. return server.get_async_client()
  30. @pytest.mark.asyncio
  31. async def test_check_models(client: openai.AsyncOpenAI):
  32. models = await client.models.list()
  33. models = models.data
  34. served_model = models[0]
  35. assert served_model.id == MODEL_NAME
  36. assert all(model.root == MODEL_NAME for model in models)
  37. @pytest.mark.asyncio
  38. async def test_single_completion(client: openai.AsyncOpenAI):
  39. completion = await client.completions.create(model=MODEL_NAME,
  40. prompt="Hello, my name is",
  41. max_tokens=5,
  42. temperature=0.0)
  43. assert completion.id is not None
  44. assert len(completion.choices) == 1
  45. assert len(completion.choices[0].text) >= 5
  46. assert completion.choices[0].finish_reason == "length"
  47. assert completion.usage == openai.types.CompletionUsage(
  48. completion_tokens=5, prompt_tokens=6, total_tokens=11)
  49. # test using token IDs
  50. completion = await client.completions.create(
  51. model=MODEL_NAME,
  52. prompt=[0, 0, 0, 0, 0],
  53. max_tokens=5,
  54. temperature=0.0,
  55. )
  56. assert len(completion.choices[0].text) >= 5
  57. @pytest.mark.asyncio
  58. async def test_single_chat_session(client: openai.AsyncOpenAI):
  59. messages = [{
  60. "role": "system",
  61. "content": "you are a helpful assistant"
  62. }, {
  63. "role": "user",
  64. "content": "what is 1+1?"
  65. }]
  66. # test single completion
  67. chat_completion = await client.chat.completions.create(model=MODEL_NAME,
  68. messages=messages,
  69. max_tokens=10,
  70. logprobs=True,
  71. top_logprobs=5)
  72. assert chat_completion.id is not None
  73. assert len(chat_completion.choices) == 1
  74. choice = chat_completion.choices[0]
  75. assert choice.finish_reason == "length"
  76. assert chat_completion.usage == openai.types.CompletionUsage(
  77. completion_tokens=10, prompt_tokens=55, total_tokens=65)
  78. message = choice.message
  79. assert message.content is not None and len(message.content) >= 10
  80. assert message.role == "assistant"
  81. messages.append({"role": "assistant", "content": message.content})
  82. # test multi-turn dialogue
  83. messages.append({"role": "user", "content": "express your result in json"})
  84. chat_completion = await client.chat.completions.create(
  85. model=MODEL_NAME,
  86. messages=messages,
  87. max_tokens=10,
  88. )
  89. message = chat_completion.choices[0].message
  90. assert message.content is not None and len(message.content) >= 0