utils.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. '''
  2. Worker-related helper functions.
  3. '''
  4. from aphrodite.common.utils import STR_NOT_IMPL_ENC_DEC_ERR_STRS
  5. from aphrodite.worker.model_runner import GPUModelRunnerBase
  6. def assert_enc_dec_mr_supported_scenario(
  7. enc_dec_mr: GPUModelRunnerBase) -> None:
  8. '''
  9. Asserted that the provided encoder/decoder model runner instance reflects
  10. a supported scenario.
  11. '''
  12. if enc_dec_mr.cache_config.enable_prefix_caching:
  13. raise NotImplementedError(
  14. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE'])
  15. if enc_dec_mr.sliding_window is not None:
  16. raise NotImplementedError(
  17. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_SWA'])
  18. if enc_dec_mr.scheduler_config.chunked_prefill_enabled:
  19. raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_ERR_STRS[
  20. 'STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL'])
  21. if getattr(enc_dec_mr.model_config.hf_config, 'attn_logit_softcapping',
  22. None) is not None:
  23. raise NotImplementedError(
  24. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP']
  25. )
  26. if enc_dec_mr.lora_config is not None:
  27. raise NotImplementedError(
  28. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_LORA'])
  29. if enc_dec_mr.parallel_config.pipeline_parallel_size > 1:
  30. raise NotImplementedError(
  31. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_PP'])
  32. if enc_dec_mr.model_config.is_multimodal_model:
  33. raise NotImplementedError(
  34. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_MM'])
  35. if enc_dec_mr.scheduler_config.num_lookahead_slots > 0:
  36. raise NotImplementedError(
  37. STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_SPEC_DEC'])
  38. if enc_dec_mr.prompt_adapter_config is not None:
  39. raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_ERR_STRS[
  40. 'STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER'])