utils.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. """Block manager utils."""
  2. from aphrodite.common.sequence import SequenceGroup
  3. # Exception strings for non-implemented block manager enc/dec scenarios
  4. STR_NOT_IMPL_ENC_DEC_SWA = \
  5. "Sliding window attention for encoder/decoder models " + \
  6. "is not currently supported."
  7. STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \
  8. "Prefix caching for encoder/decoder models " + \
  9. "is not currently supported."
  10. def _get_block_mgr_sliding_window_attr(block_mgr):
  11. '''
  12. BlockManagerV1 and BlockManagerV2 have slightly different
  13. members related to sliding window attention (SWA). This
  14. function extracts the appropriate member to use for determining
  15. whether SWA is enabled.
  16. Arguments:
  17. * block_mgr: BlockManagerV1 or BlockManagerV2 instance
  18. '''
  19. if hasattr(block_mgr, 'block_sliding_window'):
  20. return block_mgr.block_sliding_window
  21. if hasattr(block_mgr, 'max_block_sliding_window'):
  22. return block_mgr.max_block_sliding_window
  23. raise AttributeError("Block manager instance has neither " + \
  24. "block_sliding_window nor " + \
  25. "max_block_sliding_window attributes.")
  26. def check_no_caching_or_swa_for_blockmgr_encdec(
  27. block_mgr, seq_group: SequenceGroup) -> None:
  28. '''
  29. Enforce that prefix caching & sliding-window attention (SWA)
  30. are currently unsupported *specifically* for encoder/decoder models.
  31. Raises NotImplementedError if unsupported scenario is detected.
  32. Arguments:
  33. * block_mgr: BlockSpaceManager instance
  34. * seq_group: SequenceGroup passed to block_mgr
  35. '''
  36. if seq_group.is_encoder_decoder():
  37. if _get_block_mgr_sliding_window_attr(block_mgr) is not None:
  38. raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_SWA)
  39. if block_mgr.enable_caching:
  40. raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE)