__init__.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. from typing import Type
  2. from loguru import logger
  3. from aphrodite.quantization.aqlm import AQLMConfig
  4. from aphrodite.quantization.awq import AWQConfig
  5. from aphrodite.quantization.base_config import QuantizationConfig
  6. from aphrodite.quantization.bitsandbytes import BitsandBytesConfig
  7. from aphrodite.quantization.eetq import EETQConfig
  8. from aphrodite.quantization.exl2 import Exl2Config
  9. from aphrodite.quantization.fp8 import Fp8Config
  10. from aphrodite.quantization.gguf import GGUFConfig
  11. from aphrodite.quantization.gptq import GPTQConfig
  12. from aphrodite.quantization.marlin import MarlinConfig
  13. from aphrodite.quantization.quip import QuipConfig
  14. from aphrodite.quantization.squeezellm import SqueezeLLMConfig
  15. try:
  16. from aphrodite._quant_C import quant_ops # noqa: F401
  17. except ImportError:
  18. logger.warning("The Quantization Kernels are not installed. "
  19. "To use quantization with Aphrodite, make sure "
  20. "you've exported the `APHRODITE_INSTALL_QUANT_KERNELS=1`"
  21. "environment variable during the compilation process.")
  22. QUANTIZATION_METHODS = {
  23. "aqlm": AQLMConfig,
  24. "awq": AWQConfig,
  25. "bnb": BitsandBytesConfig,
  26. "eetq": EETQConfig,
  27. "exl2": Exl2Config,
  28. "fp8": Fp8Config,
  29. "gguf": GGUFConfig,
  30. "gptq": GPTQConfig,
  31. "quip": QuipConfig,
  32. "squeezellm": SqueezeLLMConfig,
  33. "marlin": MarlinConfig,
  34. }
  35. def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
  36. if quantization not in QUANTIZATION_METHODS:
  37. raise ValueError(f"Invalid quantization method: {quantization}")
  38. return QUANTIZATION_METHODS[quantization]
  39. __all__ = [
  40. "QuantizationConfig",
  41. "get_quantization_config",
  42. "QUANTIZATION_METHODS",
  43. ]