test_mp_api_server.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import pytest
  2. from aphrodite.common.utils import FlexibleArgumentParser
  3. from aphrodite.endpoints.openai.api_server import build_async_engine_client
  4. from aphrodite.endpoints.openai.args import make_arg_parser
  5. @pytest.mark.asyncio
  6. async def test_mp_crash_detection():
  7. with pytest.raises(RuntimeError) as excinfo:
  8. parser = FlexibleArgumentParser(
  9. description="Aphrodite's remote OpenAI server.")
  10. parser = make_arg_parser(parser)
  11. args = parser.parse_args([])
  12. # use an invalid tensor_parallel_size to trigger the
  13. # error in the server
  14. args.tensor_parallel_size = 65536
  15. async with build_async_engine_client(args):
  16. pass
  17. assert "The server process died before responding to the readiness probe"\
  18. in str(excinfo.value)
  19. @pytest.mark.asyncio
  20. async def test_mp_cuda_init():
  21. # it should not crash, when cuda is initialized
  22. # in the API server process
  23. import torch
  24. torch.cuda.init()
  25. parser = FlexibleArgumentParser(
  26. description="Aphrodite's remote OpenAI server.")
  27. parser = make_arg_parser(parser)
  28. args = parser.parse_args([])
  29. async with build_async_engine_client(args):
  30. pass