12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- from typing import List, Tuple
- from aphrodite.common.sequence import Sequence, SequenceGroup
- from aphrodite.common.utils import Device
- from aphrodite.processing.interfaces import AllocStatus, BlockSpaceManager
- class PlaceholderBlockSpaceManager(BlockSpaceManager):
- """A version of BlockSpaceManager for use in environments
- where block management is not required.
- For example: embedding models or attention-free models like Mamba.
- This class provides the same interface as BlockSpaceManager, but its
- methods perform no actions or return simple values like True in specific
- actions. It's designed to be used in scenarios where the overhead of
- block management is unnecessary, such as in an embedding environment.
- """
- def __init__(
- self,
- **kwargs,
- ) -> None:
- pass
- def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
- # Always return OK for dummy purposes
- return AllocStatus.OK
- def allocate(self, seq_group: SequenceGroup) -> None:
- # No actual allocation logic needed
- pass
- def can_append_slots(self, seq_group: SequenceGroup,
- num_lookahead_slots: int) -> bool:
- return True
- def append_slots(
- self,
- seq: Sequence,
- num_lookahead_slots: int,
- ) -> List[Tuple[int, int]]:
- return []
- def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
- pass
- def can_swap_in(self, seq_group: SequenceGroup,
- num_lookahead_slots: int) -> AllocStatus:
- return AllocStatus.OK
- def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
- return None # type: ignore
- def can_swap_out(self, seq_group: SequenceGroup) -> bool:
- return True
- def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
- return None # type: ignore
- def free(self, seq: Sequence) -> None:
- # No operation on free
- return
- def get_block_table(self, seq: Sequence) -> List[int]:
- return None # type: ignore
- def get_num_free_gpu_blocks(self) -> int:
- return 1
- def get_num_free_cpu_blocks(self) -> int:
- return 1
- def access_all_blocks_in_seq(
- self,
- seq: Sequence,
- access_time: float,
- ) -> None:
- pass
- def get_common_computed_block_ids(self,
- seq_group: SequenceGroup) -> List[int]:
- return None # type: ignore
- def mark_blocks_as_computed(self, seq_group: SequenceGroup,
- token_chunk_size: int):
- pass
- def get_prefix_cache_hit_rate(self, device: Device) -> float:
- return -1
|