1
0

interfaces.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import enum
  2. from abc import ABC, abstractmethod
  3. from typing import Dict, List
  4. from typing import Sequence as GenericSequence
  5. from typing import Tuple
  6. from aphrodite.common.sequence import Sequence, SequenceGroup
  7. class AllocStatus(enum.Enum):
  8. """Result for BlockSpaceManager.can_allocate
  9. 1. Ok: seq_group can be allocated now.
  10. 2. Later: seq_group cannot be allocated.
  11. The capacity of allocator is larger than seq_group required.
  12. 3. Never: seq_group can never be allocated.
  13. The seq_group is too large to allocated in GPU.
  14. """
  15. OK = enum.auto()
  16. LATER = enum.auto()
  17. NEVER = enum.auto()
  18. class BlockSpaceManager(ABC):
  19. @staticmethod
  20. def get_block_space_manager_class(version: str):
  21. version = version.lower()
  22. if version == "v1":
  23. from aphrodite.processing.block_manager_v1 import \
  24. BlockSpaceManagerV1
  25. return BlockSpaceManagerV1
  26. if version == "v2":
  27. from aphrodite.processing.block_manager_v2 import \
  28. BlockSpaceManagerV2
  29. return BlockSpaceManagerV2
  30. raise ValueError(f"Unknown version {version=}")
  31. @abstractmethod
  32. def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
  33. pass
  34. @abstractmethod
  35. def allocate(self, seq_group: SequenceGroup) -> None:
  36. pass
  37. @abstractmethod
  38. def can_append_slots(self, seq_group: SequenceGroup,
  39. num_lookahead_slots: int) -> bool:
  40. pass
  41. @abstractmethod
  42. def append_slots(
  43. self,
  44. seq: Sequence,
  45. num_lookahead_slots: int,
  46. ) -> List[Tuple[int, int]]:
  47. pass
  48. @abstractmethod
  49. def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
  50. pass
  51. @abstractmethod
  52. def can_swap_in(self, seq_group: SequenceGroup,
  53. num_lookahead_slots: int) -> AllocStatus:
  54. pass
  55. @abstractmethod
  56. def swap_in(self, seq_group: SequenceGroup,
  57. num_lookahead_slots: int) -> Dict[int, int]:
  58. pass
  59. @abstractmethod
  60. def can_swap_out(self, seq_group: SequenceGroup) -> bool:
  61. pass
  62. @abstractmethod
  63. def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
  64. pass
  65. @abstractmethod
  66. def free(self, seq: Sequence) -> None:
  67. pass
  68. @abstractmethod
  69. def get_block_table(self, seq: Sequence) -> List[int]:
  70. pass
  71. @abstractmethod
  72. def get_num_free_gpu_blocks(self) -> int:
  73. pass
  74. @abstractmethod
  75. def get_num_free_cpu_blocks(self) -> int:
  76. pass
  77. @abstractmethod
  78. def access_all_blocks_in_seq(
  79. self,
  80. seq: Sequence,
  81. access_time: float,
  82. ) -> None:
  83. pass
  84. @abstractmethod
  85. def get_common_computed_block_ids(
  86. self, seqs: List[Sequence]) -> GenericSequence[int]:
  87. pass
  88. @abstractmethod
  89. def mark_blocks_as_computed(self, seq_group: SequenceGroup):
  90. pass