1
0

config.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. from typing import Optional
  2. from transformers import AutoConfig, PretrainedConfig
  3. from transformers.models.auto.configuration_auto import CONFIG_MAPPING
  4. from aphrodite.transformers_utils.configs import (BaiChuanConfig, DbrxConfig,
  5. ChatGLMConfig, MPTConfig,
  6. QWenConfig, RWConfig)
  7. from aphrodite.common.gguf import GGUFReader
  8. _CONFIG_REGISTRY = {
  9. "baichuan": BaiChuanConfig,
  10. "chatglm": ChatGLMConfig,
  11. "dbrx": DbrxConfig,
  12. "mpt": MPTConfig,
  13. "qwen": QWenConfig,
  14. "RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct)
  15. "RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct)
  16. }
  17. def extract_gguf_config(checkpoint):
  18. result = GGUFReader(checkpoint)
  19. architecture = result.fields['general.architecture']
  20. architecture = str(bytes(architecture.parts[architecture.data[0]]),
  21. encoding='utf-8')
  22. # Only support llama so far
  23. if architecture != "llama":
  24. raise RuntimeError(f"Unsupported architecture {architecture}, "
  25. "only llama is supported.")
  26. # write config
  27. vocab_size = len(result.fields['tokenizer.ggml.token_type'].data)
  28. context_length = int(result.fields['llama.context_length'].parts[-1])
  29. n_layer = int(result.fields['llama.block_count'].parts[-1])
  30. n_head = int(result.fields['llama.attention.head_count'].parts[-1])
  31. n_local_heads = int(
  32. result.fields['llama.attention.head_count_kv'].parts[-1])
  33. intermediate_size = int(
  34. result.fields['llama.feed_forward_length'].parts[-1])
  35. norm_eps = float(
  36. result.fields['llama.attention.layer_norm_rms_epsilon'].parts[-1])
  37. dim = int(result.fields['llama.embedding_length'].parts[-1])
  38. arch = "MixtralForCausalLM"
  39. if 'llama.expert_count' in result.fields:
  40. arch = "MixtralForCausalLM"
  41. name = "mixtral"
  42. else:
  43. arch = "LlamaForCausalLM"
  44. name = "llama"
  45. model_config = {
  46. "architectures": [arch],
  47. "bos_token_id": 1,
  48. "eos_token_id": 2,
  49. "hidden_act": "silu",
  50. "hidden_size": dim,
  51. "intermediate_size": intermediate_size,
  52. "max_position_embeddings": context_length,
  53. "model_type": name,
  54. "num_attention_heads": n_head,
  55. "num_hidden_layers": n_layer,
  56. "num_key_value_heads": n_local_heads,
  57. "rms_norm_eps": norm_eps,
  58. "torch_dtype": "float16",
  59. "vocab_size": vocab_size
  60. }
  61. if 'llama.rope.freq_base' in result.fields:
  62. model_config['rope_theta'] = float(
  63. result.fields['llama.rope.freq_base'].parts[-1])
  64. if 'llama.expert_count' in result.fields:
  65. model_config['num_local_experts'] = int(
  66. result.fields['llama.expert_count'].parts[-1])
  67. model_config['num_experts_per_tok'] = int(
  68. result.fields['llama.expert_used_count'].parts[-1])
  69. if name in _CONFIG_REGISTRY:
  70. config_class = _CONFIG_REGISTRY[name]
  71. else:
  72. config_class = CONFIG_MAPPING[name]
  73. hf_config = config_class.from_dict(model_config)
  74. return hf_config
  75. def get_config(model: str,
  76. trust_remote_code: bool,
  77. revision: Optional[str] = None,
  78. code_revision: Optional[str] = None) -> PretrainedConfig:
  79. if model.endswith("gguf"):
  80. return extract_gguf_config(model)
  81. try:
  82. config = AutoConfig.from_pretrained(
  83. model,
  84. trust_remote_code=trust_remote_code,
  85. revision=revision,
  86. code_revision=code_revision)
  87. except ValueError as e:
  88. if (not trust_remote_code and
  89. "requires you to execute the configuration file" in str(e)):
  90. err_msg = (
  91. "Failed to load the model config. If the model is a custom "
  92. "model not yet available in the HuggingFace transformers "
  93. "library, consider setting `trust_remote_code=True` in LLM "
  94. "or using the `--trust-remote-code` flag in the CLI.")
  95. raise RuntimeError(err_msg) from e
  96. else:
  97. raise e
  98. if config.model_type in _CONFIG_REGISTRY:
  99. config_class = _CONFIG_REGISTRY[config.model_type]
  100. config = config_class.from_pretrained(model,
  101. revision=revision,
  102. code_revision=code_revision)
  103. return config
  104. def get_hf_text_config(config: PretrainedConfig):
  105. """Get the `sub` config relevant to multimodal models.
  106. No-op for text models.
  107. """
  108. if hasattr(config, "text_config"):
  109. # The code operates under the assumption that
  110. # text_config should have `num_attention_heads`
  111. # (among others). Assert here to fail early
  112. # if transformer config doesn't align with
  113. # the assumption.
  114. assert hasattr(config.text_config, "num_attention_heads")
  115. return config.text_config
  116. else:
  117. return config