interfaces.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. import enum
  2. from abc import ABC, abstractmethod
  3. from typing import Dict, List
  4. from aphrodite.common.sequence import Sequence, SequenceGroup
  5. class AllocStatus(enum.Enum):
  6. """Result for BlockSpaceManager.can_allocate
  7. 1. Ok: seq_group can be allocated now.
  8. 2. Later: seq_group cannot be allocated.
  9. The capacity of allocator is larger than seq_group required.
  10. 3. Never: seq_group can never be allocated.
  11. The seq_group is too large to allocated in GPU.
  12. """
  13. OK = enum.auto()
  14. LATER = enum.auto()
  15. NEVER = enum.auto()
  16. class BlockSpaceManager(ABC):
  17. @staticmethod
  18. def get_block_space_manager_class(version: str):
  19. version = version.lower()
  20. if version == "v1":
  21. from aphrodite.processing.block_manager_v1 import BlockSpaceManagerV1 # noqa: E501
  22. return BlockSpaceManagerV1
  23. if version == "v2":
  24. from aphrodite.processing.block_manager_v2 import BlockSpaceManagerV2 # noqa: E501
  25. return BlockSpaceManagerV2
  26. raise ValueError(f"Unknown version {version=}")
  27. @abstractmethod
  28. def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
  29. pass
  30. @abstractmethod
  31. def allocate(self, seq_group: SequenceGroup) -> None:
  32. pass
  33. @abstractmethod
  34. def can_append_slots(self, seq_group: SequenceGroup,
  35. num_lookahead_slots: int) -> bool:
  36. pass
  37. @abstractmethod
  38. def append_slots(
  39. self,
  40. seq: Sequence,
  41. num_lookahead_slots: int,
  42. ) -> Dict[int, List[int]]:
  43. pass
  44. @abstractmethod
  45. def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
  46. pass
  47. @abstractmethod
  48. def can_swap_in(self, seq_group: SequenceGroup,
  49. num_lookahead_slots: int) -> bool:
  50. pass
  51. @abstractmethod
  52. def swap_in(self, seq_group: SequenceGroup,
  53. num_lookahead_slots: int) -> Dict[int, int]:
  54. pass
  55. @abstractmethod
  56. def can_swap_out(self, seq_group: SequenceGroup) -> bool:
  57. pass
  58. @abstractmethod
  59. def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
  60. pass
  61. @abstractmethod
  62. def free(self, seq: Sequence) -> None:
  63. pass
  64. @abstractmethod
  65. def get_block_table(self, seq: Sequence) -> List[int]:
  66. pass
  67. @abstractmethod
  68. def get_num_free_gpu_blocks(self) -> int:
  69. pass
  70. @abstractmethod
  71. def get_num_free_cpu_blocks(self) -> int:
  72. pass
  73. @abstractmethod
  74. def access_all_blocks_in_seq(
  75. self,
  76. seq: Sequence,
  77. access_time: float,
  78. ) -> None:
  79. pass
  80. @abstractmethod
  81. def get_common_computed_block_ids(self, seqs: List[Sequence]) -> List[int]:
  82. pass
  83. @abstractmethod
  84. def mark_blocks_as_computed(self, seq_group: SequenceGroup):
  85. pass