__init__.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. from dataclasses import dataclass
  2. from enum import Enum
  3. from typing import Optional, Union
  4. from aphrodite.common.sampling_params import SamplingParams
  5. from aphrodite.inputs import PromptInputs
  6. from aphrodite.lora.request import LoRARequest
  7. from aphrodite.prompt_adapter.request import PromptAdapterRequest
  8. # Success string used for RPC instructions.
  9. APHRODITE_RPC_SUCCESS_STR = "SUCCESS"
  10. # Timeouts.
  11. APHRODITE_RPC_SERVER_START_TIMEOUT_MS = 1000
  12. APHRODITE_RPC_HEALTH_TIMEOUT_MS = 10000
  13. # Minimum value of ZMQ.SOCKET_LIMIT to run mp.
  14. APHRODITE_RPC_SOCKET_LIMIT_CUTOFF = 2000
  15. # HWM is set to Infinity.
  16. APHRODITE_RPC_ZMQ_HWM = 0
  17. @dataclass
  18. class RPCGenerateRequest:
  19. inputs: PromptInputs
  20. sampling_params: SamplingParams
  21. request_id: str
  22. lora_request: Optional[LoRARequest] = None
  23. prompt_adapter_request: Optional[PromptAdapterRequest] = None
  24. @dataclass
  25. class RPCAbortRequest:
  26. request_id: str
  27. class RPCUtilityRequest(Enum):
  28. IS_SERVER_READY = 1
  29. GET_MODEL_CONFIG = 2
  30. GET_DECODING_CONFIG = 3
  31. GET_PARALLEL_CONFIG = 4
  32. GET_SCHEDULER_CONFIG = 5
  33. GET_LORA_CONFIG = 6
  34. DO_LOG_STATS = 7
  35. IS_SERVER_HEALTHY = 8
  36. RPC_REQUEST_TYPE = Union[RPCGenerateRequest, RPCAbortRequest,
  37. RPCUtilityRequest]