12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- '''
- Worker-related helper functions.
- '''
- from aphrodite.common.utils import STR_NOT_IMPL_ENC_DEC_ERR_STRS
- from aphrodite.worker.model_runner import GPUModelRunnerBase
- def assert_enc_dec_mr_supported_scenario(
- enc_dec_mr: GPUModelRunnerBase) -> None:
- '''
- Asserted that the provided encoder/decoder model runner instance reflects
- a supported scenario.
- '''
- if enc_dec_mr.cache_config.enable_prefix_caching:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE'])
- if enc_dec_mr.sliding_window is not None:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_SWA'])
- if enc_dec_mr.scheduler_config.chunked_prefill_enabled:
- raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_ERR_STRS[
- 'STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL'])
- if getattr(enc_dec_mr.model_config.hf_config, 'attn_logit_softcapping',
- None) is not None:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP']
- )
- if enc_dec_mr.lora_config is not None:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_LORA'])
- if enc_dec_mr.parallel_config.pipeline_parallel_size > 1:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_PP'])
- if enc_dec_mr.model_config.is_multimodal_model:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_MM'])
- if enc_dec_mr.scheduler_config.num_lookahead_slots > 0:
- raise NotImplementedError(
- STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_SPEC_DEC'])
- if enc_dec_mr.prompt_adapter_config is not None:
- raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_ERR_STRS[
- 'STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER'])
|