1
0

registry.py 8.3 KB


  1. import functools
  2. from array import array
  3. from collections import UserDict
  4. from dataclasses import dataclass
  5. from typing import (TYPE_CHECKING, Any, Callable, Dict, Mapping, Optional,
  6. Protocol, Tuple, Type)
  7. from loguru import logger
  8. from torch import nn
  9. from transformers import PretrainedConfig
  10. from typing_extensions import TypeVar
  11. from aphrodite.constants import APHRODITE_TOKEN_ID_ARRAY_TYPE
  12. from .data import LLMInputs
  13. if TYPE_CHECKING:
  14. from aphrodite.common.config import ModelConfig
  15. from aphrodite.common.sequence import SequenceData
  16. from aphrodite.multimodal import MultiModalDataDict, MultiModalRegistry
  17. C = TypeVar("C", bound=PretrainedConfig)
  18. @dataclass(frozen=True)
  19. class InputContext:
  20. """
  21. Contains information about the model which may be used to
  22. modify the inputs.
  23. """
  24. model_config: "ModelConfig"
  25. """The configuration of the model."""
  26. def get_hf_config(self, hf_config_type: Type[C] = PretrainedConfig) -> C:
  27. """
  28. Get the HuggingFace configuration
  29. (:class:`transformers.PretrainedConfig`) of the model,
  30. additionally checking its type.
  31. Raises:
  32. ValueError: If the model is not of the specified type.
  33. """
  34. hf_config = self.model_config.hf_config
  35. if not isinstance(hf_config, hf_config_type):
  36. raise TypeError("Invalid type of HuggingFace config. "
  37. f"Expected type: {hf_config_type}, but "
  38. f"found type: {type(hf_config)}")
  39. return hf_config
  40. def get_hf_image_processor_config(self) -> Dict[str, Any]:
  41. """
  42. Get the HuggingFace image processor configuration of the model.
  43. """
  44. return self.model_config.hf_image_processor_config
  45. N = TypeVar("N", bound=Type[nn.Module])
  46. class DummyDataFactory(Protocol):
  47. def __call__(
  48. self,
  49. ctx: InputContext,
  50. seq_len: int,
  51. mm_counts: Mapping[str, int],
  52. ) -> Tuple["SequenceData", Optional["MultiModalDataDict"]]:
  53. """
  54. Create dummy data to be inputted into the model.
  55. Note:
  56. :data:`InputProcessor` is not applied to the dummy data.
  57. """
  58. ...
  59. class _MultiModalCounts(UserDict):
  60. """
  61. Wraps `mm_counts` for a more informative error message
  62. when attempting to access a plugin that does not exist.
  63. """
  64. def __getitem__(self, key: str) -> int:
  65. try:
  66. return super().__getitem__(key)
  67. except KeyError as exc:
  68. msg = (f"There is no multi-modal plugin with the key: {key}. "
  69. f"Available keys: {set(self.keys())}")
  70. raise KeyError(msg) from exc
  71. InputProcessor = Callable[[InputContext, LLMInputs], LLMInputs]
  72. """Preprocess the inputs to the model."""
  73. class InputRegistry:
  74. """
  75. A registry to dispatch data processing
  76. according to the target model.
  77. """
  78. def __init__(self) -> None:
  79. self._dummy_factories_by_model_type: Dict[Type[nn.Module],
  80. DummyDataFactory] = {}
  81. self._input_processors_by_model_type: Dict[Type[nn.Module],
  82. InputProcessor] = {}
  83. def _default_dummy_data_factory(
  84. self,
  85. ctx: InputContext,
  86. seq_len: int,
  87. mm_counts: Mapping[str, int],
  88. ) -> Tuple["SequenceData", Optional["MultiModalDataDict"]]:
  89. """
  90. The default dummy data factory represents the longest possible text
  91. that can be inputted to the model.
  92. Note:
  93. :data:`InputProcessor` is not applied to the dummy data.
  94. """
  95. # Avoid circular import
  96. from aphrodite.common.sequence import SequenceData
  97. dummy_seq_data = SequenceData(
  98. array(APHRODITE_TOKEN_ID_ARRAY_TYPE, [0]) * seq_len)
  99. dummy_multi_modal_data = None
  100. return dummy_seq_data, dummy_multi_modal_data
  101. def register_dummy_data(self, factory: DummyDataFactory):
  102. """
  103. Register a dummy data factory to a model class.
  104. During memory profiling, the provided function is invoked to create
  105. dummy data to be inputted into the model. The resulting memory usage
  106. should be an upper bound of what the model would use at inference time.
  107. """
  108. def wrapper(model_cls: N) -> N:
  109. if model_cls in self._dummy_factories_by_model_type:
  110. logger.warning(
  111. f"Model class {model_cls} already has dummy data "
  112. f"registered to {self}. It is overwritten by the new one.")
  113. self._dummy_factories_by_model_type[model_cls] = factory
  114. return model_cls
  115. return wrapper
  116. def dummy_data_for_profiling(
  117. self,
  118. model_config: "ModelConfig",
  119. seq_len: int,
  120. mm_registry: "MultiModalRegistry",
  121. ) -> Tuple["SequenceData", Optional["MultiModalDataDict"]]:
  122. """
  123. Create dummy data for profiling the memory usage of a model.
  124. The model is identified by ``model_config``.
  125. See also:
  126. :ref:`enabling_multimodal_inputs`
  127. Note:
  128. This should be called after
  129. :meth:`~MultiModalRegistry.init_mm_limits_per_prompt`.
  130. """
  131. # Avoid circular import
  132. from aphrodite.modeling.model_loader import get_model_architecture
  133. model_cls, _ = get_model_architecture(model_config)
  134. dummy_factory = self._dummy_factories_by_model_type \
  135. .get(model_cls, self._default_dummy_data_factory)
  136. mm_counts = mm_registry.get_mm_limits_per_prompt(model_config)
  137. seq_data, mm_data = dummy_factory(
  138. InputContext(model_config),
  139. seq_len,
  140. _MultiModalCounts(mm_counts),
  141. )
  142. # Having more tokens is over-conservative but otherwise fine
  143. num_tokens = seq_data.prompt_token_ids
  144. assert len(num_tokens) >= seq_len, (
  145. f"Expected at least {seq_len} dummy tokens for profiling, "
  146. f"but found {len(num_tokens)} tokens instead.")
  147. if mm_data is not None:
  148. for k, v in mm_data.items():
  149. num_items = len(v) if isinstance(v, list) else 1
  150. num_expected = mm_counts[k]
  151. assert num_items >= num_expected, (
  152. f"Expected at least {num_expected} dummy '{k}' instances "
  153. f"for profiling, but found {num_items} instances instead.")
  154. return seq_data, mm_data
  155. def _default_input_processor(self, ctx: InputContext,
  156. inputs: LLMInputs) -> LLMInputs:
  157. """The default input processor is a no-op."""
  158. return inputs
  159. def register_input_processor(self, processor: InputProcessor):
  160. """
  161. Register an input processor to a model class.
  162. The provided function is invoked on each input to the model. This
  163. happens before
  164. :meth:`~aphrodite.multimodal.MultiModalRegistry.map_input`.
  165. See also:
  166. :ref:`input_processing_pipeline`
  167. """
  168. def wrapper(model_cls: N) -> N:
  169. if model_cls in self._input_processors_by_model_type:
  170. logger.warning(
  171. f"Model class {model_cls} already has input processor "
  172. f"registered to {self}. It is overwritten by the new one.")
  173. self._input_processors_by_model_type[model_cls] = processor
  174. return model_cls
  175. return wrapper
  176. def process_input(self, model_config: "ModelConfig",
  177. inputs: LLMInputs) -> LLMInputs:
  178. """
  179. Apply an input processor to an instance of model inputs.
  180. The model is identified by ``model_config``.
  181. See also:
  182. :ref:`input_processing_pipeline`
  183. """
  184. # Avoid circular import
  185. from aphrodite.modeling.model_loader import get_model_architecture
  186. model_cls, _ = get_model_architecture(model_config)
  187. processor = self._input_processors_by_model_type \
  188. .get(model_cls, self._default_input_processor)
  189. return processor(InputContext(model_config), inputs)
  190. def create_input_processor(self, model_config: "ModelConfig"):
  191. """
  192. Create an input processor (see :meth:`process_input`) for a
  193. specific model.
  194. """
  195. return functools.partial(self.process_input, model_config)