|
@@ -7,7 +7,6 @@
|
|
|
|
|
|
import math
|
|
import math
|
|
import re
|
|
import re
|
|
-from array import array
|
|
|
|
from functools import partial
|
|
from functools import partial
|
|
from typing import (Any, Callable, Dict, Iterable, List, Literal, Mapping,
|
|
from typing import (Any, Callable, Dict, Iterable, List, Literal, Mapping,
|
|
Optional, Tuple, TypedDict, Union)
|
|
Optional, Tuple, TypedDict, Union)
|
|
@@ -23,8 +22,7 @@ from transformers import PretrainedConfig
|
|
|
|
|
|
from aphrodite.attention import Attention, AttentionMetadata
|
|
from aphrodite.attention import Attention, AttentionMetadata
|
|
from aphrodite.common.config import CacheConfig, MultiModalConfig
|
|
from aphrodite.common.config import CacheConfig, MultiModalConfig
|
|
-from aphrodite.common.sequence import (APHRODITE_TOKEN_ID_ARRAY_TYPE,
|
|
|
|
- IntermediateTensors, SequenceData)
|
|
|
|
|
|
+from aphrodite.common.sequence import IntermediateTensors, SequenceData
|
|
from aphrodite.common.utils import is_list_of
|
|
from aphrodite.common.utils import is_list_of
|
|
from aphrodite.distributed import (get_pp_group,
|
|
from aphrodite.distributed import (get_pp_group,
|
|
get_tensor_model_parallel_world_size)
|
|
get_tensor_model_parallel_world_size)
|
|
@@ -819,8 +817,7 @@ def dummy_data_for_qwen(
|
|
# The presence of a visual config indicates this is a multimodal model.
|
|
# The presence of a visual config indicates this is a multimodal model.
|
|
# If we don't have it, the model is considered an LLM for warmup purposes.
|
|
# If we don't have it, the model is considered an LLM for warmup purposes.
|
|
if not hasattr(hf_config, "visual"):
|
|
if not hasattr(hf_config, "visual"):
|
|
- seq_data = SequenceData(array(APHRODITE_TOKEN_ID_ARRAY_TYPE,
|
|
|
|
- [0] * seq_len))
|
|
|
|
|
|
+ seq_data = SequenceData.from_token_counts((0, seq_len))
|
|
mm_data = None
|
|
mm_data = None
|
|
return seq_data, mm_data
|
|
return seq_data, mm_data
|
|
|
|
|
|
@@ -847,11 +844,13 @@ def dummy_data_for_qwen(
|
|
if len(toks) < seq_len:
|
|
if len(toks) < seq_len:
|
|
toks += [0] * (seq_len - len(toks))
|
|
toks += [0] * (seq_len - len(toks))
|
|
|
|
|
|
|
|
+ seq_data = SequenceData.from_seqs(toks)
|
|
|
|
+
|
|
# Build the input images; width/height doesn't actually matter here since
|
|
# Build the input images; width/height doesn't actually matter here since
|
|
# the data will get resized and the # of tokens per image is constant
|
|
# the data will get resized and the # of tokens per image is constant
|
|
image = Image.new("RGB", (224, 224), color=0)
|
|
image = Image.new("RGB", (224, 224), color=0)
|
|
mm_data = {"image": image if num_images == 1 else [image] * num_images}
|
|
mm_data = {"image": image if num_images == 1 else [image] * num_images}
|
|
- return SequenceData(array(APHRODITE_TOKEN_ID_ARRAY_TYPE, toks)), mm_data
|
|
|
|
|
|
+ return seq_data, mm_data
|
|
|
|
|
|
|
|
|
|
@MULTIMODAL_REGISTRY.register_image_input_mapper(input_mapper_for_qwen)
|
|
@MULTIMODAL_REGISTRY.register_image_input_mapper(input_mapper_for_qwen)
|