1
0

data.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. from typing import (TYPE_CHECKING, Generic, Iterable, List, Optional, Tuple,
  2. Union)
  3. from typing_extensions import NotRequired, TypedDict, TypeVar
  4. if TYPE_CHECKING:
  5. from aphrodite.multimodal import MultiModalDataDict
  6. class TextPrompt(TypedDict):
  7. """Schema for a text prompt."""
  8. prompt: str
  9. """The input text to be tokenized before passing to the model."""
  10. multi_modal_data: NotRequired["MultiModalDataDict"]
  11. """
  12. Optional multi-modal data to pass to the model,
  13. if the model supports it.
  14. """
  15. class TokensPrompt(TypedDict):
  16. """Schema for a tokenized prompt."""
  17. prompt_token_ids: List[int]
  18. """A list of token IDs to pass to the model."""
  19. multi_modal_data: NotRequired["MultiModalDataDict"]
  20. """
  21. Optional multi-modal data to pass to the model,
  22. if the model supports it.
  23. """
  24. class NegativeTextPrompt(TypedDict):
  25. """Schema for a text prompt."""
  26. negative_prompt: str
  27. """The input text to be tokenized before passing to the model."""
  28. class NegativeTokensPrompt(TypedDict):
  29. """Schema for a tokenized prompt."""
  30. negative_prompt_token_ids: List[int]
  31. """A list of token IDs to pass to the model."""
  32. SingletonPromptInputs = Union[str, TextPrompt, TokensPrompt,
  33. NegativeTextPrompt, NegativeTokensPrompt]
  34. """
  35. Set of possible schemas for a single LLM input:
  36. - A text prompt (:class:`str` or :class:`TextPrompt`)
  37. - A tokenized prompt (:class:`TokensPrompt`)
  38. Note that "singleton" is as opposed to a data structure
  39. which encapsulates multiple prompts, i.e. of the sort
  40. which may be utilized for encoder/decoder models when
  41. the user desires to express both the encoder & decoder
  42. prompts explicitly, i.e. ExplicitEncoderDecoderPrompt
  43. A prompt of type SingletonPromptInputs may be employed
  44. as (1) input to a decoder-only model, (2) input to
  45. the encoder of an encoder/decoder model, in the scenario
  46. where the decoder-prompt is not specified explicitly, or
  47. (3) as a member of a larger data structure encapsulating
  48. more than one prompt, i.e. ExplicitEncoderDecoderPrompt
  49. """
  50. _T1_co = TypeVar("_T1_co",
  51. bound=SingletonPromptInputs,
  52. default=SingletonPromptInputs,
  53. covariant=True)
  54. _T2_co = TypeVar("_T2_co",
  55. bound=SingletonPromptInputs,
  56. default=SingletonPromptInputs,
  57. covariant=True)
  58. # TODO: Make fields ReadOnly once mypy supports it
  59. class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
  60. """Represents an encoder/decoder model input prompt,
  61. comprising an explicit encoder prompt and a
  62. decoder prompt.
  63. The encoder and decoder prompts, respectively,
  64. may formatted according to any of the
  65. SingletonPromptInputs schemas, and are not
  66. required to have the same schema.
  67. Only the encoder prompt may have multi-modal data.
  68. Note that an ExplicitEncoderDecoderPrompt may not
  69. be used as an input to a decoder-only model,
  70. and that the `encoder_prompt` and `decoder_prompt`
  71. fields of this data structure may not themselves
  72. must be SingletonPromptInputs instances.
  73. """
  74. encoder_prompt: _T1_co
  75. decoder_prompt: Optional[_T2_co]
  76. PromptInputs = Union[SingletonPromptInputs, ExplicitEncoderDecoderPrompt]
  77. """
  78. Set of possible schemas for an LLM input, including
  79. both decoder-only and encoder/decoder input types:
  80. - A text prompt (:class:`str` or :class:`TextPrompt`)
  81. - A tokenized prompt (:class:`TokensPrompt`)
  82. - A single data structure containing both an encoder and a decoder prompt
  83. (:class:`ExplicitEncoderDecoderPrompt`)
  84. """
  85. class LLMInputs(TypedDict):
  86. """
  87. The inputs in :class:`~aphrodite.AphroditeEngine` before they are
  88. passed to the model executor.
  89. This specifies the data required for decoder-only models.
  90. """
  91. prompt_token_ids: List[int]
  92. """The token IDs of the prompt."""
  93. prompt: NotRequired[Optional[str]]
  94. """
  95. The original prompt text corresponding to the token IDs, if available.
  96. """
  97. multi_modal_data: NotRequired[Optional["MultiModalDataDict"]]
  98. """
  99. Optional multi-modal data to pass to the model,
  100. if the model supports it.
  101. """
  102. negative_prompt_token_ids: NotRequired[Optional[List[int]]]
  103. negative_prompt: NotRequired[Optional[str]]
  104. """
  105. Optional negative prompt data to pass to the model.
  106. """
  107. class EncoderDecoderLLMInputs(LLMInputs):
  108. """
  109. The inputs in :class:`~aphrodite.AphroditeEngine` before they are
  110. passed to the model executor.
  111. This specifies the required data for encoder-decoder models.
  112. """
  113. encoder_prompt_token_ids: List[int]
  114. """The token IDs of the encoder prompt."""
  115. encoder_prompt: NotRequired[Optional[str]]
  116. """
  117. The original encoder prompt text corresponding to the token IDs, if
  118. available.
  119. """
  120. encoder_negative_prompt_token_ids: NotRequired[Optional[List[int]]]
  121. encoder_negative_prompt: NotRequired[Optional[str]]
  122. """
  123. Optional negative prompt data to pass to the model.
  124. """
  125. _T1 = TypeVar("_T1",
  126. bound=SingletonPromptInputs,
  127. default=SingletonPromptInputs)
  128. _T2 = TypeVar("_T2",
  129. bound=SingletonPromptInputs,
  130. default=SingletonPromptInputs)
  131. def build_explicit_enc_dec_prompt(
  132. encoder_prompt: _T1,
  133. decoder_prompt: Optional[_T2],
  134. ) -> ExplicitEncoderDecoderPrompt[_T1, _T2]:
  135. return ExplicitEncoderDecoderPrompt(encoder_prompt=encoder_prompt,
  136. decoder_prompt=decoder_prompt)
  137. def zip_enc_dec_prompts(
  138. enc_prompts: Iterable[_T1],
  139. dec_prompts: Iterable[Optional[_T2]],
  140. ) -> List[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
  141. """
  142. Zip encoder and decoder prompts together into a list of
  143. :class:`ExplicitEncoderDecoderPrompt` instances.
  144. """
  145. return [
  146. build_explicit_enc_dec_prompt(encoder_prompt, decoder_prompt)
  147. for (encoder_prompt, decoder_prompt) in zip(enc_prompts, dec_prompts)
  148. ]
  149. def to_enc_dec_tuple_list(
  150. enc_dec_prompts: Iterable[ExplicitEncoderDecoderPrompt[_T1, _T2]],
  151. ) -> List[Tuple[_T1, Optional[_T2]]]:
  152. return [(enc_dec_prompt["encoder_prompt"],
  153. enc_dec_prompt["decoder_prompt"])
  154. for enc_dec_prompt in enc_dec_prompts]