interfaces.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import enum
  2. from abc import ABC, abstractmethod
  3. from typing import Dict, List
  4. from typing import Sequence as GenericSequence
  5. from aphrodite.common.sequence import Sequence, SequenceGroup
  6. class AllocStatus(enum.Enum):
  7. """Result for BlockSpaceManager.can_allocate
  8. 1. Ok: seq_group can be allocated now.
  9. 2. Later: seq_group cannot be allocated.
  10. The capacity of allocator is larger than seq_group required.
  11. 3. Never: seq_group can never be allocated.
  12. The seq_group is too large to allocated in GPU.
  13. """
  14. OK = enum.auto()
  15. LATER = enum.auto()
  16. NEVER = enum.auto()
  17. class BlockSpaceManager(ABC):
  18. @staticmethod
  19. def get_block_space_manager_class(version: str):
  20. version = version.lower()
  21. if version == "v1":
  22. from aphrodite.processing.block_manager_v1 import \
  23. BlockSpaceManagerV1
  24. return BlockSpaceManagerV1
  25. if version == "v2":
  26. from aphrodite.processing.block_manager_v2 import \
  27. BlockSpaceManagerV2
  28. return BlockSpaceManagerV2
  29. raise ValueError(f"Unknown version {version=}")
  30. @abstractmethod
  31. def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
  32. pass
  33. @abstractmethod
  34. def allocate(self, seq_group: SequenceGroup) -> None:
  35. pass
  36. @abstractmethod
  37. def can_append_slots(self, seq_group: SequenceGroup,
  38. num_lookahead_slots: int) -> bool:
  39. pass
  40. @abstractmethod
  41. def append_slots(
  42. self,
  43. seq: Sequence,
  44. num_lookahead_slots: int,
  45. ) -> Dict[int, List[int]]:
  46. pass
  47. @abstractmethod
  48. def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
  49. pass
  50. @abstractmethod
  51. def can_swap_in(self, seq_group: SequenceGroup,
  52. num_lookahead_slots: int) -> AllocStatus:
  53. pass
  54. @abstractmethod
  55. def swap_in(self, seq_group: SequenceGroup,
  56. num_lookahead_slots: int) -> Dict[int, int]:
  57. pass
  58. @abstractmethod
  59. def can_swap_out(self, seq_group: SequenceGroup) -> bool:
  60. pass
  61. @abstractmethod
  62. def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
  63. pass
  64. @abstractmethod
  65. def free(self, seq: Sequence) -> None:
  66. pass
  67. @abstractmethod
  68. def get_block_table(self, seq: Sequence) -> List[int]:
  69. pass
  70. @abstractmethod
  71. def get_num_free_gpu_blocks(self) -> int:
  72. pass
  73. @abstractmethod
  74. def get_num_free_cpu_blocks(self) -> int:
  75. pass
  76. @abstractmethod
  77. def access_all_blocks_in_seq(
  78. self,
  79. seq: Sequence,
  80. access_time: float,
  81. ) -> None:
  82. pass
  83. @abstractmethod
  84. def get_common_computed_block_ids(
  85. self, seqs: List[Sequence]) -> GenericSequence[int]:
  86. pass
  87. @abstractmethod
  88. def mark_blocks_as_computed(self, seq_group: SequenceGroup):
  89. pass