test_shutdown.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import json
  2. import os
  3. import openai
  4. import pytest
  5. from ...utils import RemoteOpenAIServer
  6. MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
  7. @pytest.mark.asyncio
  8. async def test_shutdown_on_engine_failure(tmp_path):
  9. # Use a bad adapter to crash the engine
  10. # (This test will fail when that bug is fixed)
  11. adapter_path = tmp_path / "bad_adapter"
  12. os.mkdir(adapter_path)
  13. with open(adapter_path / "adapter_model_config.json", "w") as f:
  14. json.dump({"not": "real"}, f)
  15. with open(adapter_path / "adapter_model.safetensors", "wb") as f:
  16. f.write(b"this is fake")
  17. # dtype, max-len etc set so that this can run in CI
  18. args = [
  19. "--dtype",
  20. "bfloat16",
  21. "--max-model-len",
  22. "8192",
  23. "--enforce-eager",
  24. "--max-num-seqs",
  25. "128",
  26. "--enable-lora",
  27. "--lora-modules",
  28. f"bad-adapter={tmp_path / 'bad_adapter'}",
  29. ]
  30. with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
  31. client = remote_server.get_async_client()
  32. with pytest.raises(openai.APIConnectionError):
  33. # This crashes the engine
  34. await client.completions.create(model="bad-adapter",
  35. prompt="Hello, my name is")
  36. # Now the server should shut down
  37. return_code = remote_server.proc.wait(timeout=1)
  38. assert return_code is not None