utils.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. """Block manager utils."""
  2. from aphrodite.common.sequence import SequenceGroup
  3. from aphrodite.common.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
  4. STR_NOT_IMPL_ENC_DEC_SWA)
  5. def _get_block_mgr_sliding_window_attr(block_mgr):
  6. '''
  7. BlockManagerV1 and BlockManagerV2 have slightly different
  8. members related to sliding window attention (SWA). This
  9. function extracts the appropriate member to use for determining
  10. whether SWA is enabled.
  11. Arguments:
  12. * block_mgr: BlockManagerV1 or BlockManagerV2 instance
  13. '''
  14. if hasattr(block_mgr, 'block_sliding_window'):
  15. return block_mgr.block_sliding_window
  16. if hasattr(block_mgr, 'max_block_sliding_window'):
  17. return block_mgr.max_block_sliding_window
  18. raise AttributeError("Block manager instance has neither " + \
  19. "block_sliding_window nor " + \
  20. "max_block_sliding_window attributes.")
  21. def check_no_caching_or_swa_for_blockmgr_encdec(
  22. block_mgr, seq_group: SequenceGroup) -> None:
  23. '''
  24. Enforce that prefix caching & sliding-window attention (SWA)
  25. are currently unsupported *specifically* for encoder/decoder models.
  26. Raises NotImplementedError if unsupported scenario is detected.
  27. Arguments:
  28. * block_mgr: BlockSpaceManager instance
  29. * seq_group: SequenceGroup passed to block_mgr
  30. '''
  31. if seq_group.is_encoder_decoder():
  32. if _get_block_mgr_sliding_window_attr(block_mgr) is not None:
  33. raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_SWA)
  34. if block_mgr.enable_caching:
  35. raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE)