interfaces.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import enum
  2. from abc import ABC, abstractmethod
  3. from typing import List
  4. from typing import Sequence as GenericSequence
  5. from typing import Tuple
  6. from aphrodite.common.sequence import Sequence, SequenceGroup
  7. class AllocStatus(enum.Enum):
  8. """Result for BlockSpaceManager.can_allocate
  9. 1. Ok: seq_group can be allocated now.
  10. 2. Later: seq_group cannot be allocated.
  11. The capacity of allocator is larger than seq_group required.
  12. 3. Never: seq_group can never be allocated.
  13. The seq_group is too large to allocated in GPU.
  14. """
  15. OK = enum.auto()
  16. LATER = enum.auto()
  17. NEVER = enum.auto()
  18. class BlockSpaceManager(ABC):
  19. @staticmethod
  20. def get_block_space_manager_class(version: str):
  21. version = version.lower()
  22. if version == "v1":
  23. from aphrodite.processing.block_manager_v1 import (
  24. BlockSpaceManagerV1)
  25. return BlockSpaceManagerV1
  26. if version == "v2":
  27. from aphrodite.processing.block_manager_v2 import (
  28. BlockSpaceManagerV2)
  29. return BlockSpaceManagerV2
  30. if version == "placeholder":
  31. from aphrodite.processing.placeholder_block_space_manager import (
  32. PlaceholderBlockSpaceManager)
  33. return PlaceholderBlockSpaceManager
  34. raise ValueError(f"Unknown version {version=}")
  35. @abstractmethod
  36. def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
  37. pass
  38. @abstractmethod
  39. def allocate(self, seq_group: SequenceGroup) -> None:
  40. pass
  41. @abstractmethod
  42. def can_append_slots(self, seq_group: SequenceGroup,
  43. num_lookahead_slots: int) -> bool:
  44. pass
  45. @abstractmethod
  46. def append_slots(
  47. self,
  48. seq: Sequence,
  49. num_lookahead_slots: int,
  50. ) -> List[Tuple[int, int]]:
  51. pass
  52. @abstractmethod
  53. def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
  54. pass
  55. @abstractmethod
  56. def can_swap_in(self, seq_group: SequenceGroup,
  57. num_lookahead_slots: int) -> AllocStatus:
  58. pass
  59. @abstractmethod
  60. def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
  61. pass
  62. @abstractmethod
  63. def can_swap_out(self, seq_group: SequenceGroup) -> bool:
  64. pass
  65. @abstractmethod
  66. def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
  67. pass
  68. @abstractmethod
  69. def free(self, seq: Sequence) -> None:
  70. pass
  71. @abstractmethod
  72. def get_block_table(self, seq: Sequence) -> List[int]:
  73. pass
  74. @abstractmethod
  75. def get_num_free_gpu_blocks(self) -> int:
  76. pass
  77. @abstractmethod
  78. def get_num_free_cpu_blocks(self) -> int:
  79. pass
  80. @abstractmethod
  81. def access_all_blocks_in_seq(
  82. self,
  83. seq: Sequence,
  84. access_time: float,
  85. ) -> None:
  86. pass
  87. @abstractmethod
  88. def get_common_computed_block_ids(
  89. self, seqs: List[Sequence]) -> GenericSequence[int]:
  90. pass
  91. @abstractmethod
  92. def mark_blocks_as_computed(self, seq_group: SequenceGroup):
  93. pass