123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- import contextlib
- import os
- from typing import Dict, Optional, Type
- from loguru import logger
- from transformers import GenerationConfig, PretrainedConfig
- from aphrodite.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
- JAISConfig, MedusaConfig,
- MLPSpeculatorConfig,
- MPTConfig, RWConfig)
- APHRODITE_USE_MODELSCOPE = os.getenv("APHRODITE_USE_MODELSCOPE", "0") == "1"
- if APHRODITE_USE_MODELSCOPE:
- from modelscope import AutoConfig
- else:
- from transformers import AutoConfig
- _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
- "chatglm": ChatGLMConfig,
- "dbrx": DbrxConfig,
- "mpt": MPTConfig,
- "RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct)
- "RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct)
- "jais": JAISConfig,
- "mlp_speculator": MLPSpeculatorConfig,
- "medusa": MedusaConfig,
- }
- for name, cls in _CONFIG_REGISTRY.items():
- with contextlib.suppress(ValueError):
- AutoConfig.register(name, cls)
- def get_config(model: str,
- trust_remote_code: bool,
- revision: Optional[str] = None,
- code_revision: Optional[str] = None,
- rope_scaling: Optional[dict] = None,
- rope_theta: Optional[float] = None) -> PretrainedConfig:
- try:
- config = AutoConfig.from_pretrained(
- model,
- trust_remote_code=trust_remote_code,
- revision=revision,
- code_revision=code_revision)
- except ValueError as e:
- if (not trust_remote_code and
- "requires you to execute the configuration file" in str(e)):
- err_msg = (
- "Failed to load the model config. If the model is a custom "
- "model not yet available in the HuggingFace transformers "
- "library, consider setting `trust_remote_code=True` in LLM "
- "or using the `--trust-remote-code` flag in the CLI.")
- raise RuntimeError(err_msg) from e
- else:
- raise e
- if config.model_type in _CONFIG_REGISTRY:
- config_class = _CONFIG_REGISTRY[config.model_type]
- config = config_class.from_pretrained(model,
- revision=revision,
- code_revision=code_revision)
- for key, value in [("rope_scaling", rope_scaling),
- ("rope_theta", rope_theta)]:
- if value is not None:
- logger.info(f"Updating {key} from "
- f"{getattr(config, key, None)} to {value}")
- config.update({key: value})
- return config
- def get_hf_text_config(config: PretrainedConfig):
- """Get the "sub" config relevant to llm for multi modal models.
- No op for pure text models.
- """
- if hasattr(config, "text_config"):
- # The code operates under the assumption that text_config should have
- # `num_attention_heads` (among others). Assert here to fail early
- # if transformers config doesn't align with this assumption.
- assert hasattr(config.text_config, "num_attention_heads")
- return config.text_config
- else:
- return config
- def try_get_generation_config(
- model: str,
- trust_remote_code: bool,
- revision: Optional[str] = None,
- ) -> Optional[GenerationConfig]:
- try:
- return GenerationConfig.from_pretrained(
- model,
- revision=revision,
- )
- except OSError: # Not found
- try:
- config = get_config(
- model,
- trust_remote_code=trust_remote_code,
- revision=revision,
- )
- return GenerationConfig.from_model_config(config)
- except OSError: # Not found
- return None
|