config.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. import contextlib
  2. import enum
  3. import json
  4. from pathlib import Path
  5. from typing import Any, Dict, Optional, Type, Union
  6. import huggingface_hub
  7. from huggingface_hub import (file_exists, hf_hub_download,
  8. try_to_load_from_cache)
  9. from loguru import logger
  10. from transformers import GenerationConfig, PretrainedConfig
  11. from transformers.models.auto.image_processing_auto import (
  12. get_image_processor_config)
  13. from transformers.models.auto.modeling_auto import (
  14. MODEL_FOR_CAUSAL_LM_MAPPING_NAMES)
  15. from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
  16. import aphrodite.common.envs as envs
  17. from aphrodite.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
  18. EAGLEConfig,
  19. InternVLChatConfig,
  20. JAISConfig, MedusaConfig,
  21. MLPSpeculatorConfig,
  22. MPTConfig, Qwen2VLConfig,
  23. RWConfig, UltravoxConfig)
  24. from aphrodite.transformers_utils.utils import check_gguf_file
  25. APHRODITE_USE_MODELSCOPE = envs.APHRODITE_USE_MODELSCOPE
  26. if APHRODITE_USE_MODELSCOPE:
  27. from modelscope import AutoConfig
  28. else:
  29. from transformers import AutoConfig
  30. MISTRAL_CONFIG_NAME = "params.json"
  31. _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
  32. "chatglm": ChatGLMConfig,
  33. "dbrx": DbrxConfig,
  34. "mpt": MPTConfig,
  35. "RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct)
  36. "RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct)
  37. "jais": JAISConfig,
  38. "mlp_speculator": MLPSpeculatorConfig,
  39. "medusa": MedusaConfig,
  40. "internvl_chat": InternVLChatConfig,
  41. "ultravox": UltravoxConfig,
  42. "eagle": EAGLEConfig,
  43. "qwen2_vl": Qwen2VLConfig,
  44. }
  45. for name, cls in _CONFIG_REGISTRY.items():
  46. with contextlib.suppress(ValueError):
  47. AutoConfig.register(name, cls)
  48. class ConfigFormat(str, enum.Enum):
  49. AUTO = "auto"
  50. HF = "hf"
  51. MISTRAL = "mistral"
  52. def file_or_path_exists(model: Union[str, Path], config_name, revision,
  53. token) -> bool:
  54. if Path(model).exists():
  55. return (Path(model) / config_name).is_file()
  56. # Offline mode support: Check if config file is cached already
  57. cached_filepath = try_to_load_from_cache(repo_id=model,
  58. filename=config_name,
  59. revision=revision)
  60. if isinstance(cached_filepath, str):
  61. # The config file exists in cache- we can continue trying to load
  62. return True
  63. # NB: file_exists will only check for the existence of the config file on
  64. # hf_hub. This will fail in offline mode.
  65. try:
  66. return file_exists(model, config_name, revision=revision, token=token)
  67. except huggingface_hub.errors.OfflineModeIsEnabled:
  68. # Don't raise in offline mode, all we know is that we don't have this
  69. # file cached.
  70. return False
  71. def get_config(
  72. model: Union[str, Path],
  73. trust_remote_code: bool,
  74. revision: Optional[str] = None,
  75. code_revision: Optional[str] = None,
  76. rope_scaling: Optional[dict] = None,
  77. rope_theta: Optional[float] = None,
  78. config_format: ConfigFormat = ConfigFormat.AUTO,
  79. **kwargs,
  80. ) -> PretrainedConfig:
  81. # Separate model folder from file path for GGUF models
  82. is_gguf = check_gguf_file(model)
  83. if is_gguf:
  84. kwargs["gguf_file"] = Path(model).name
  85. model = Path(model).parent
  86. if config_format == ConfigFormat.AUTO:
  87. if is_gguf or file_or_path_exists(model,
  88. HF_CONFIG_NAME,
  89. revision=revision,
  90. token=kwargs.get("token")):
  91. config_format = ConfigFormat.HF
  92. elif file_or_path_exists(model,
  93. MISTRAL_CONFIG_NAME,
  94. revision=revision,
  95. token=kwargs.get("token")):
  96. config_format = ConfigFormat.MISTRAL
  97. else:
  98. # If we're in offline mode and found no valid config format, then
  99. # raise an offline mode error to indicate to the user that they
  100. # don't have files cached and may need to go online.
  101. # This is conveniently triggered by calling file_exists().
  102. file_exists(model,
  103. HF_CONFIG_NAME,
  104. revision=revision,
  105. token=kwargs.get("token"))
  106. raise ValueError(f"No supported config format found in {model}")
  107. if config_format == ConfigFormat.HF:
  108. config_dict, _ = PretrainedConfig.get_config_dict(
  109. model, revision=revision, code_revision=code_revision, **kwargs)
  110. # Use custom model class if it's in our registry
  111. model_type = config_dict.get("model_type")
  112. if model_type in _CONFIG_REGISTRY:
  113. config_class = _CONFIG_REGISTRY[model_type]
  114. config = config_class.from_pretrained(model,
  115. revision=revision,
  116. code_revision=code_revision)
  117. else:
  118. try:
  119. config = AutoConfig.from_pretrained(
  120. model,
  121. trust_remote_code=trust_remote_code,
  122. revision=revision,
  123. code_revision=code_revision,
  124. **kwargs,
  125. )
  126. except ValueError as e:
  127. if (not trust_remote_code
  128. and "requires you to execute the configuration file"
  129. in str(e)):
  130. err_msg = (
  131. "Failed to load the model config. If the model "
  132. "is a custom model not yet available in the "
  133. "HuggingFace transformers library, consider setting "
  134. "`trust_remote_code=True` in LLM or using the "
  135. "`--trust-remote-code` flag in the CLI.")
  136. raise RuntimeError(err_msg) from e
  137. else:
  138. raise e
  139. elif config_format == ConfigFormat.MISTRAL:
  140. config = load_params_config(model, revision)
  141. else:
  142. raise ValueError(f"Unsupported config format: {config_format}")
  143. # Special architecture mapping check for GGUF models
  144. if is_gguf:
  145. if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
  146. raise RuntimeError(
  147. f"Can't get gguf config for {config.model_type}.")
  148. model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
  149. config.update({"architectures": [model_type]})
  150. for key, value in [
  151. ("rope_scaling", rope_scaling),
  152. ("rope_theta", rope_theta),
  153. ]:
  154. if value is not None:
  155. logger.info(
  156. f"Updating {key} from {getattr(config, key, None)} to {value}")
  157. config.update({key: value})
  158. return config
  159. def load_params_config(model, revision) -> PretrainedConfig:
  160. # This function loads a params.json config which
  161. # should be used when loading models in mistral format
  162. config_file_name = "params.json"
  163. config_path = Path(model) / config_file_name
  164. if not config_path.is_file():
  165. config_path = Path(
  166. hf_hub_download(model, config_file_name, revision=revision))
  167. with open(config_path, "r") as file:
  168. config_dict = json.load(file)
  169. config_mapping = {
  170. "dim": "hidden_size",
  171. "norm_eps": "rms_norm_eps",
  172. "n_kv_heads": "num_key_value_heads",
  173. "n_layers": "num_hidden_layers",
  174. "n_heads": "num_attention_heads",
  175. "hidden_dim": "intermediate_size",
  176. }
  177. def recurse_elems(elem: Any):
  178. if isinstance(elem, dict):
  179. config_dict = {}
  180. for key, value in elem.items():
  181. key = config_mapping.get(key, key)
  182. config_dict[key] = recurse_elems(value)
  183. return PretrainedConfig(**config_dict)
  184. else:
  185. return elem
  186. config_dict["model_type"] = config_dict.get("model_type", "transformer")
  187. config_dict["hidden_act"] = config_dict.get("activation", "silu")
  188. config_dict["tie_word_embeddings"] = config_dict.get(
  189. "tie_embeddings", False)
  190. config_dict["max_seq_len"] = config_dict.get("max_seq_len", 128_000)
  191. config_dict["max_position_embeddings"] = config_dict.get(
  192. "max_position_embeddings", 128_000)
  193. if config_dict.get("moe") is not None:
  194. config_dict["architectures"] = ["MixtralForCausalLM"]
  195. else:
  196. config_dict["architectures"] = ["MistralForCausalLM"]
  197. if config_dict.get("vision_encoder") is not None:
  198. multimodal_config = config_dict.pop("vision_encoder")
  199. config_dict = {
  200. "text_config": config_dict,
  201. "vision_config": multimodal_config
  202. }
  203. config_dict["architectures"] = ["PixtralForConditionalGeneration"]
  204. config_dict["model_type"] = "pixtral"
  205. config = recurse_elems(config_dict)
  206. return config
  207. def get_hf_image_processor_config(
  208. model: Union[str, Path],
  209. revision: Optional[str] = None,
  210. **kwargs,
  211. ) -> Dict[str, Any]:
  212. # ModelScope does not provide an interface for image_processor
  213. if APHRODITE_USE_MODELSCOPE:
  214. return dict()
  215. # Separate model folder from file path for GGUF models
  216. if Path(model).is_file() and Path(model).suffix == ".gguf":
  217. model = Path(model).parent
  218. return get_image_processor_config(model, revision=revision, **kwargs)
  219. def get_hf_text_config(config: PretrainedConfig):
  220. """Get the "sub" config relevant to llm for multi modal models.
  221. No op for pure text models.
  222. """
  223. if hasattr(config, "text_config"):
  224. # The code operates under the assumption that text_config should have
  225. # `num_attention_heads` (among others). Assert here to fail early
  226. # if transformers config doesn't align with this assumption.
  227. assert hasattr(config.text_config, "num_attention_heads")
  228. return config.text_config
  229. else:
  230. return config
  231. def try_get_generation_config(
  232. model: str,
  233. trust_remote_code: bool,
  234. revision: Optional[str] = None,
  235. ) -> Optional[GenerationConfig]:
  236. try:
  237. return GenerationConfig.from_pretrained(
  238. model,
  239. revision=revision,
  240. )
  241. except OSError: # Not found
  242. try:
  243. config = get_config(
  244. model,
  245. trust_remote_code=trust_remote_code,
  246. revision=revision,
  247. )
  248. return GenerationConfig.from_model_config(config)
  249. except OSError: # Not found
  250. return None