interfaces.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. import enum
  2. from abc import ABC, abstractmethod
  3. from typing import List
  4. from typing import Sequence as GenericSequence
  5. from typing import Tuple
  6. from aphrodite.common.sequence import Sequence, SequenceGroup
  7. class AllocStatus(enum.Enum):
  8. """Result for BlockSpaceManager.can_allocate
  9. 1. Ok: seq_group can be allocated now.
  10. 2. Later: seq_group cannot be allocated.
  11. The capacity of allocator is larger than seq_group required.
  12. 3. Never: seq_group can never be allocated.
  13. The seq_group is too large to allocated in GPU.
  14. """
  15. OK = enum.auto()
  16. LATER = enum.auto()
  17. NEVER = enum.auto()
  18. class BlockSpaceManager(ABC):
  19. @staticmethod
  20. def get_block_space_manager_class(version: str):
  21. version = version.lower()
  22. if version == "v1":
  23. from aphrodite.processing.block_manager_v1 import \
  24. BlockSpaceManagerV1
  25. return BlockSpaceManagerV1
  26. if version == "v2":
  27. from aphrodite.processing.block_manager_v2 import \
  28. BlockSpaceManagerV2
  29. return BlockSpaceManagerV2
  30. if version == "embedding":
  31. from aphrodite.processing.embedding_model_block_manager import \
  32. EmbeddingModelBlockSpaceManager
  33. return EmbeddingModelBlockSpaceManager
  34. raise ValueError(f"Unknown version {version=}")
  35. @abstractmethod
  36. def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
  37. pass
  38. @abstractmethod
  39. def allocate(self, seq_group: SequenceGroup) -> None:
  40. pass
  41. @abstractmethod
  42. def can_append_slots(self, seq_group: SequenceGroup,
  43. num_lookahead_slots: int) -> bool:
  44. pass
  45. @abstractmethod
  46. def append_slots(
  47. self,
  48. seq: Sequence,
  49. num_lookahead_slots: int,
  50. ) -> List[Tuple[int, int]]:
  51. pass
  52. @abstractmethod
  53. def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
  54. pass
  55. @abstractmethod
  56. def can_swap_in(self, seq_group: SequenceGroup,
  57. num_lookahead_slots: int) -> AllocStatus:
  58. pass
  59. @abstractmethod
  60. def swap_in(self, seq_group: SequenceGroup,
  61. num_lookahead_slots: int) -> List[Tuple[int, int]]:
  62. pass
  63. @abstractmethod
  64. def can_swap_out(self, seq_group: SequenceGroup) -> bool:
  65. pass
  66. @abstractmethod
  67. def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
  68. pass
  69. @abstractmethod
  70. def free(self, seq: Sequence) -> None:
  71. pass
  72. @abstractmethod
  73. def get_block_table(self, seq: Sequence) -> List[int]:
  74. pass
  75. @abstractmethod
  76. def get_num_free_gpu_blocks(self) -> int:
  77. pass
  78. @abstractmethod
  79. def get_num_free_cpu_blocks(self) -> int:
  80. pass
  81. @abstractmethod
  82. def access_all_blocks_in_seq(
  83. self,
  84. seq: Sequence,
  85. access_time: float,
  86. ) -> None:
  87. pass
  88. @abstractmethod
  89. def get_common_computed_block_ids(
  90. self, seqs: List[Sequence]) -> GenericSequence[int]:
  91. pass
  92. @abstractmethod
  93. def mark_blocks_as_computed(self, seq_group: SequenceGroup):
  94. pass