protocol.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. from typing import AsyncGenerator, List, Optional, Protocol, runtime_checkable
  2. from transformers import PreTrainedTokenizer
  3. from aphrodite.common.config import DecodingConfig, ModelConfig
  4. from aphrodite.common.outputs import EmbeddingRequestOutput, RequestOutput
  5. from aphrodite.common.pooling_params import PoolingParams
  6. from aphrodite.common.sampling_params import SamplingParams
  7. from aphrodite.inputs.data import PromptInputs
  8. from aphrodite.lora.request import LoRARequest
  9. from aphrodite.modeling.layers.sampler import SamplerOutput
  10. from aphrodite.processing.scheduler import SchedulerOutputs
  11. from aphrodite.prompt_adapter.request import PromptAdapterRequest
  12. @runtime_checkable
  13. class AsyncEngineClient(Protocol):
  14. """Protocol class for Clients to AsyncAphrodite"""
  15. @property
  16. def is_running(self) -> bool:
  17. ...
  18. @property
  19. def is_stopped(self) -> bool:
  20. ...
  21. @property
  22. def errored(self) -> bool:
  23. ...
  24. @property
  25. def limit_concurrency(self) -> Optional[int]:
  26. """Maximum number of concurrently running requests."""
  27. ...
  28. def generate(
  29. self,
  30. inputs: PromptInputs,
  31. sampling_params: SamplingParams,
  32. request_id: str,
  33. lora_request: Optional[LoRARequest] = None,
  34. prompt_adapter_request: Optional[PromptAdapterRequest] = None
  35. ) -> AsyncGenerator[RequestOutput, None]:
  36. """Generates outputs for a request"""
  37. ...
  38. def encode(
  39. self,
  40. inputs: PromptInputs,
  41. pooling_params: PoolingParams,
  42. request_id: str,
  43. lora_request: Optional[LoRARequest] = None,
  44. ) -> AsyncGenerator[EmbeddingRequestOutput, None]:
  45. """Generate outputs for a request from an embedding model."""
  46. ...
  47. async def abort(self, request_id: str) -> None:
  48. """Abort a request.
  49. Args:
  50. request_id: The unique id of the request.
  51. """
  52. ...
  53. async def get_model_config(self) -> ModelConfig:
  54. """Get the model configuration of the Aphrodite engine."""
  55. ...
  56. async def get_decoding_config(self) -> DecodingConfig:
  57. """Get the decoding configuration of the Aphrodite engine."""
  58. ...
  59. async def get_tokenizer(
  60. self,
  61. lora_request: Optional[LoRARequest] = None,
  62. ) -> PreTrainedTokenizer:
  63. """Get the appropriate Tokenizer for the request"""
  64. ...
  65. async def do_log_stats(
  66. self,
  67. scheduler_outputs: Optional[SchedulerOutputs] = None,
  68. model_output: Optional[List[SamplerOutput]] = None,
  69. ) -> None:
  70. pass
  71. async def check_health(self) -> None:
  72. """Raise if unhealthy"""