123456789101112131415161718192021222324252627282930313233343536 |
- """Utilities for selecting and loading models."""
- import contextlib
- from typing import Tuple, Type
- import torch
- from torch import nn
- from aphrodite.common.config import ModelConfig
- from aphrodite.modeling.models import ModelRegistry
- @contextlib.contextmanager
- def set_default_torch_dtype(dtype: torch.dtype):
- """Sets the default torch dtype to the given dtype."""
- old_dtype = torch.get_default_dtype()
- torch.set_default_dtype(dtype)
- yield
- torch.set_default_dtype(old_dtype)
- def get_model_architecture(
- model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
- architectures = getattr(model_config.hf_config, "architectures", [])
- # Special handling for quantized Mixtral.
- # FIXME: This is a temporary hack.
- mixtral_supported = ["fp8", "compressed-tensors"]
- if (model_config.quantization is not None
- and model_config.quantization not in mixtral_supported
- and "MixtralForCausalLM" in architectures):
- architectures = ["QuantMixtralForCausalLM"]
- return ModelRegistry.resolve_model_cls(architectures)
- def get_architecture_class_name(model_config: ModelConfig) -> str:
- return get_model_architecture(model_config)[1]
|