placeholder_block_space_manager.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. from typing import List, Tuple
  2. from aphrodite.common.sequence import Sequence, SequenceGroup
  3. from aphrodite.common.utils import Device
  4. from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
  5. class PlaceholderBlockSpaceManager(BlockSpaceManager):
  6. """A version of BlockSpaceManager for use in environments
  7. where block management is not required.
  8. For example: embedding models or attention-free models like Mamba.
  9. This class provides the same interface as BlockSpaceManager, but its
  10. methods perform no actions or return simple values like True in specific
  11. actions. It's designed to be used in scenarios where the overhead of
  12. block management is unnecessary, such as in an embedding environment.
  13. """
  14. def __init__(
  15. self,
  16. **kwargs,
  17. ) -> None:
  18. pass
  19. def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
  20. # Always return OK for dummy purposes
  21. return AllocStatus.OK
  22. def allocate(self, seq_group: SequenceGroup) -> None:
  23. # No actual allocation logic needed
  24. pass
  25. def can_append_slots(self, seq_group: SequenceGroup,
  26. num_lookahead_slots: int) -> bool:
  27. return True
  28. def append_slots(
  29. self,
  30. seq: Sequence,
  31. num_lookahead_slots: int,
  32. ) -> List[Tuple[int, int]]:
  33. return []
  34. def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
  35. pass
  36. def can_swap_in(self, seq_group: SequenceGroup,
  37. num_lookahead_slots: int) -> AllocStatus:
  38. return AllocStatus.OK
  39. def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
  40. return None # type: ignore
  41. def can_swap_out(self, seq_group: SequenceGroup) -> bool:
  42. return True
  43. def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
  44. return None # type: ignore
  45. def free(self, seq: Sequence) -> None:
  46. # No operation on free
  47. return
  48. def get_block_table(self, seq: Sequence) -> List[int]:
  49. return None # type: ignore
  50. def get_num_free_gpu_blocks(self) -> int:
  51. return 1
  52. def get_num_free_cpu_blocks(self) -> int:
  53. return 1
  54. def access_all_blocks_in_seq(
  55. self,
  56. seq: Sequence,
  57. access_time: float,
  58. ) -> None:
  59. pass
  60. def get_common_computed_block_ids(self,
  61. seq_group: SequenceGroup) -> List[int]:
  62. return None # type: ignore
  63. def mark_blocks_as_computed(self, seq_group: SequenceGroup,
  64. token_chunk_size: int):
  65. pass
  66. def get_prefix_cache_hit_rate(self, device: Device) -> float:
  67. return -1