1
0

test_mp_api_server.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import time
  2. import pytest
  3. from aphrodite.common.utils import FlexibleArgumentParser
  4. from aphrodite.endpoints.openai.api_server import build_async_engine_client
  5. from aphrodite.endpoints.openai.args import make_arg_parser
  6. @pytest.mark.asyncio
  7. async def test_mp_crash_detection():
  8. parser = FlexibleArgumentParser(
  9. description="Aphrodite's remote OpenAI server.")
  10. parser = make_arg_parser(parser)
  11. args = parser.parse_args([])
  12. # use an invalid tensor_parallel_size to trigger the
  13. # error in the server
  14. args.tensor_parallel_size = 65536
  15. start = time.perf_counter()
  16. async with build_async_engine_client(args):
  17. pass
  18. end = time.perf_counter()
  19. assert end - start < 60, ("Expected Aphrodite to gracefully shutdown in "
  20. "<60s if there is an error in the startup.")
  21. @pytest.mark.asyncio
  22. async def test_mp_cuda_init():
  23. # it should not crash, when cuda is initialized
  24. # in the API server process
  25. import torch
  26. torch.cuda.init()
  27. parser = FlexibleArgumentParser(
  28. description="Aphrodite's remote OpenAI server.")
  29. parser = make_arg_parser(parser)
  30. args = parser.parse_args([])
  31. async with build_async_engine_client(args):
  32. pass