__init__.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. from dataclasses import dataclass
  2. from enum import Enum
  3. from typing import Optional, Union
  4. from aphrodite.common.sampling_params import SamplingParams
  5. from aphrodite.inputs import PromptInputs
  6. from aphrodite.lora.request import LoRARequest
  7. from aphrodite.prompt_adapter.request import PromptAdapterRequest
  8. # Success string used for RPC instructions.
  9. APHRODITE_RPC_SUCCESS_STR = "SUCCESS"
  10. # Minimum value of ZMQ.SOCKET_LIMIT to run mp.
  11. APHRODITE_RPC_SOCKET_LIMIT_CUTOFF = 2000
  12. # HWM is set to Infinity.
  13. APHRODITE_RPC_ZMQ_HWM = 0
  14. @dataclass
  15. class RPCGenerateRequest:
  16. inputs: PromptInputs
  17. sampling_params: SamplingParams
  18. request_id: str
  19. lora_request: Optional[LoRARequest] = None
  20. prompt_adapter_request: Optional[PromptAdapterRequest] = None
  21. @dataclass
  22. class RPCAbortRequest:
  23. request_id: str
  24. class RPCUtilityRequest(Enum):
  25. IS_SERVER_READY = 1
  26. GET_MODEL_CONFIG = 2
  27. GET_DECODING_CONFIG = 3
  28. GET_PARALLEL_CONFIG = 4
  29. GET_SCHEDULER_CONFIG = 5
  30. GET_LORA_CONFIG = 6
  31. DO_LOG_STATS = 7
  32. IS_SERVER_HEALTHY = 8
  33. SHUTDOWN_SERVER = 9
  34. RPC_REQUEST_TYPE = Union[RPCGenerateRequest, RPCAbortRequest,
  35. RPCUtilityRequest]