''' Worker-related helper functions. ''' from aphrodite.common.utils import STR_NOT_IMPL_ENC_DEC_ERR_STRS from aphrodite.task_handler.model_runner import GPUModelRunnerBase def assert_enc_dec_mr_supported_scenario( enc_dec_mr: GPUModelRunnerBase) -> None: ''' Asserted that the provided encoder/decoder model runner instance reflects a supported scenario. ''' if enc_dec_mr.cache_config.enable_prefix_caching: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE']) if enc_dec_mr.sliding_window is not None: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_SWA']) if enc_dec_mr.scheduler_config.chunked_prefill_enabled: raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_ERR_STRS[ 'STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL']) if getattr(enc_dec_mr.model_config.hf_config, 'attn_logit_softcapping', None) is not None: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP'] ) if enc_dec_mr.lora_config is not None: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_LORA']) if enc_dec_mr.parallel_config.pipeline_parallel_size > 1: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_PP']) if enc_dec_mr.model_config.multimodal_config is not None: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_MM']) if enc_dec_mr.scheduler_config.num_lookahead_slots > 0: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_SPEC_DEC']) if not enc_dec_mr.model_config.enforce_eager: raise NotImplementedError( STR_NOT_IMPL_ENC_DEC_ERR_STRS['STR_NOT_IMPL_ENC_DEC_CUDA_GRAPH']) if enc_dec_mr.prompt_adapter_config is not None: raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_ERR_STRS[ 'STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER'])