1
0

naive_block.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. from typing import Dict, Iterable, List, Optional, Set
  2. from aphrodite.processing.block.common import (
  3. CopyOnWriteTracker,
  4. RefCounter,
  5. get_all_blocks_recursively,
  6. )
  7. from aphrodite.processing.block.interfaces import Block, BlockAllocator
  8. BlockId = int
  9. Refcount = int
  10. class NaiveBlockAllocator(BlockAllocator):
  11. """A simple block allocator that manages blocks of memory without prefix
  12. caching.
  13. Args:
  14. create_block (Block.Factory): A factory function for creating new
  15. blocks. This is used when a NaiveBlockAllocator is composed within
  16. a prefix caching allocator -- the naive block allocator must
  17. construct prefix caching blocks (but shouldn't know anything else
  18. about them).
  19. num_blocks (int): The total number of blocks to manage.
  20. block_size (int): The size of each block in tokens.
  21. block_ids (Optional[Iterable[int]], optional): An optional iterable of
  22. block IDs. If not provided, block IDs will be assigned sequentially
  23. from 0 to num_blocks - 1.
  24. """
  25. def __init__(
  26. self,
  27. create_block: Block.Factory,
  28. num_blocks: int,
  29. block_size: int,
  30. block_ids: Optional[Iterable[int]] = None,
  31. ):
  32. if block_ids is None:
  33. block_ids = range(num_blocks)
  34. self._free_block_indices: Set[BlockId] = set(block_ids)
  35. self._all_block_indices = frozenset(block_ids)
  36. assert len(self._all_block_indices) == num_blocks
  37. self._refcounter = RefCounter(
  38. all_block_indices=self._free_block_indices)
  39. self._create_block = create_block
  40. self._block_size = block_size
  41. self._cow_tracker = CopyOnWriteTracker(
  42. refcounter=self._refcounter.as_readonly(),
  43. allocator=self,
  44. )
  45. def allocate_immutable(self, prev_block: Optional[Block],
  46. token_ids: List[int]) -> Block:
  47. """Allocates a new immutable block with the given token IDs, linked to
  48. the previous block.
  49. Args:
  50. prev_block (Optional[Block]): The previous block in the sequence. If
  51. None, then the block to be allocated is the first block in the
  52. sequence.
  53. token_ids (List[int]): The token IDs to be stored in the new block.
  54. Returns:
  55. Block: The newly allocated immutable block.
  56. """
  57. block = self.allocate_mutable(prev_block=prev_block)
  58. block.append_token_ids(token_ids)
  59. return block
  60. def allocate_mutable(self, prev_block: Optional[Block]) -> Block:
  61. """Allocates a new mutable block, linked to the previous block.
  62. Args:
  63. prev_block (Optional[Block]): The previous block in the sequence. If
  64. None, then the block to be allocated is the first block in the
  65. sequence.
  66. Returns:
  67. Block: The newly allocated mutable block.
  68. """
  69. block_id = self._allocate_new_block_id()
  70. return self._create_block(
  71. prev_block=prev_block,
  72. token_ids=[],
  73. block_id=block_id,
  74. block_size=self._block_size,
  75. allocator=self,
  76. )
  77. def free(self, block: Block) -> None:
  78. self._free_block_id(block.block_id)
  79. # Mark the block as having no allocation.
  80. block.block_id = None
  81. def fork(self, last_block: Block) -> List[Block]:
  82. """Creates a new sequence of blocks that shares the same underlying
  83. memory as the original sequence.
  84. Args:
  85. last_block (Block): The last block in the original sequence.
  86. Returns:
  87. List[Block]: The new sequence of blocks that shares the same memory
  88. as the original sequence.
  89. """
  90. source_blocks = get_all_blocks_recursively(last_block)
  91. forked_blocks = []
  92. prev_block = None
  93. for block in source_blocks:
  94. # Increment refcount for each block.
  95. refcount = self._refcounter.incr(block.block_id)
  96. assert refcount != 1, "can't fork free'd block"
  97. forked_blocks.append(
  98. self._create_block(
  99. prev_block=prev_block,
  100. token_ids=block.token_ids,
  101. block_id=block.block_id,
  102. block_size=self._block_size,
  103. allocator=self,
  104. ))
  105. prev_block = forked_blocks[-1]
  106. return forked_blocks
  107. def get_num_free_blocks(self) -> int:
  108. return len(self._free_block_indices)
  109. def _allocate_new_block_id(self) -> BlockId:
  110. if not self._free_block_indices:
  111. raise BlockAllocator.NoFreeBlocksError()
  112. block_id = next(iter(self._free_block_indices))
  113. self._refcounter.incr(block_id)
  114. self._free_block_indices.remove(block_id)
  115. return block_id
  116. def _free_block_id(self, block_id: BlockId) -> None:
  117. refcount = self._refcounter.decr(block_id)
  118. if refcount == 0:
  119. self._free_block_indices.add(block_id)
  120. @property
  121. def refcounter(self):
  122. return self._refcounter
  123. @property
  124. def all_block_ids(self):
  125. return self._all_block_indices
  126. def cow_block_if_not_appendable(self, block: Block) -> Optional[BlockId]:
  127. """Performs a copy-on-write operation on the given block if it is not
  128. appendable.
  129. Args:
  130. block (Block): The block to check for copy-on-write.
  131. Returns:
  132. Optional[BlockId]: The block index of the new block if a copy-on
  133. -write operation was performed, or the original block index if
  134. no copy-on-write was necessary.
  135. """
  136. return self._cow_tracker.cow_block_if_not_appendable(block)
  137. def clear_copy_on_writes(self) -> Dict[BlockId, List[BlockId]]:
  138. """Returns the copy-on-write source->destination mapping and clears it.
  139. Returns:
  140. Dict[BlockId, List[BlockId]]: A dictionary mapping source
  141. block indices to lists of destination block indices.
  142. """
  143. return self._cow_tracker.clear_cows()
  144. def mark_blocks_as_computed(self) -> None:
  145. """Mark blocks as computed, used in prefix caching.
  146. Since the naive allocator does not implement prefix caching, we do
  147. nothing.
  148. """
  149. pass
  150. def get_common_computed_block_ids(
  151. self, seq_block_ids: List[List[int]]) -> List[int]:
  152. """Determine blocks that can be skipped in prefill.
  153. Since the naive allocator does not support prefix caching, always return
  154. an empty list.
  155. """
  156. return []
  157. class NaiveBlock(Block):
  158. """An implementation of the Block class that does not support prefix
  159. caching.
  160. The NaiveBlock class represents a block of token IDs with a fixed size. It
  161. provides methods for appending token IDs to the block and manages copy-on
  162. -write operations when necessary.
  163. Args:
  164. prev_block (Block): The previous block in the sequence.
  165. token_ids (List[int]): The initial token IDs to be stored in the block.
  166. block_size (int): The maximum number of token IDs that can be stored in
  167. the block.
  168. allocator (BlockAllocator): The block allocator associated with this
  169. block.
  170. block_id (Optional[int], optional): The physical block index
  171. of this block. Defaults to None, which means no allocation has been
  172. made.
  173. _cow_target (Optional[Block], optional): The copy-on-write target block.
  174. If not provided, it defaults to self.
  175. """
  176. def __init__(
  177. self,
  178. prev_block: Block,
  179. token_ids: List[int],
  180. block_size: int,
  181. allocator: BlockAllocator,
  182. block_id: Optional[int] = None,
  183. _cow_target: Optional[Block] = None,
  184. ):
  185. self._token_ids = []
  186. self._block_size = block_size
  187. self._prev_block = prev_block
  188. self._block_id = block_id
  189. self._allocator = allocator
  190. self._cow_target = _cow_target if _cow_target is not None else self
  191. self._append_token_ids_no_cow(token_ids)
  192. def append_token_ids(self, token_ids: List[int]) -> None:
  193. """Appends the given token IDs to the block, instructing the allocator
  194. to perform a copy-on-write if necessary.
  195. Args:
  196. token_ids (List[int]): The token IDs to be appended to the block.
  197. """
  198. self._append_token_ids_no_cow(token_ids)
  199. if self._block_id is not None:
  200. self._block_id = self._allocator.cow_block_if_not_appendable(
  201. self._cow_target)
  202. def _append_token_ids_no_cow(self, token_ids: List[int]) -> None:
  203. assert self.num_empty_slots >= len(token_ids)
  204. self._token_ids.extend(token_ids)
  205. @property
  206. def block_id(self) -> Optional[int]:
  207. return self._block_id
  208. @block_id.setter
  209. def block_id(self, value: Optional[int]) -> None:
  210. self._block_id = value
  211. @property
  212. def is_full(self) -> bool:
  213. return self.num_empty_slots == 0
  214. @property
  215. def num_empty_slots(self) -> int:
  216. return self._block_size - len(self._token_ids)
  217. @property
  218. def token_ids(self) -> List[int]:
  219. return self._token_ids
  220. def block_size(self) -> int:
  221. return self._block_size
  222. @property
  223. def prev_block(self) -> Optional["Block"]:
  224. return self._prev_block