|
@@ -55,6 +55,7 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
|
|
|
parallel_config: ParallelConfig,
|
|
|
scheduler_config: SchedulerConfig,
|
|
|
device_config: DeviceConfig,
|
|
|
+ **kwargs,
|
|
|
):
|
|
|
self.model_config = model_config
|
|
|
self.parallel_config = parallel_config
|
|
@@ -197,6 +198,7 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
|
|
|
virtual_engine: int = 0,
|
|
|
finished_requests_ids: Optional[List[str]] = None
|
|
|
) -> ModelInputForNeuron:
|
|
|
+ multi_modal_kwargs = None
|
|
|
# NOTE: We assume that all sequences in the group are all prompts or
|
|
|
# all decodes.
|
|
|
is_prompt = seq_group_metadata_list[0].is_prompt
|