__init__.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from typing import Type
  2. from aphrodite.quantization.aqlm import AQLMConfig
  3. from aphrodite.quantization.autoquant import AutoQuantConfig
  4. from aphrodite.quantization.awq import AWQConfig
  5. from aphrodite.quantization.awq_marlin import AWQMarlinConfig
  6. from aphrodite.quantization.base_config import QuantizationConfig
  7. from aphrodite.quantization.bitsandbytes import BitsAndBytesConfig
  8. from aphrodite.quantization.compressed_tensors.compressed_tensors import \
  9. CompressedTensorsConfig
  10. from aphrodite.quantization.deepspeedfp import DeepSpeedFPConfig
  11. from aphrodite.quantization.eetq import EETQConfig
  12. from aphrodite.quantization.exl2 import Exl2Config
  13. from aphrodite.quantization.fbgemm_fp8 import FBGEMMFp8Config
  14. from aphrodite.quantization.fp8 import Fp8Config
  15. from aphrodite.quantization.gguf import GGUFConfig
  16. from aphrodite.quantization.gptq import GPTQConfig
  17. from aphrodite.quantization.gptq_marlin import GPTQMarlinConfig
  18. from aphrodite.quantization.gptq_marlin_24 import GPTQMarlin24Config
  19. from aphrodite.quantization.marlin import MarlinConfig
  20. from aphrodite.quantization.quip import QuipConfig
  21. from aphrodite.quantization.squeezellm import SqueezeLLMConfig
  22. QUANTIZATION_METHODS = {
  23. "aqlm": AQLMConfig,
  24. "awq": AWQConfig,
  25. "autoquant": AutoQuantConfig,
  26. "deepspeedfp": DeepSpeedFPConfig,
  27. "eetq": EETQConfig,
  28. "exl2": Exl2Config,
  29. "fp8": Fp8Config,
  30. "fbgemm_fp8": FBGEMMFp8Config,
  31. "gguf": GGUFConfig,
  32. # The order of gptq methods is important for config.py iteration over
  33. # override_quantization_method(..)
  34. "marlin": MarlinConfig,
  35. "gptq_marlin_24": GPTQMarlin24Config,
  36. "gptq_marlin": GPTQMarlinConfig,
  37. "awq_marlin": AWQMarlinConfig,
  38. "gptq": GPTQConfig,
  39. "quip": QuipConfig,
  40. "squeezellm": SqueezeLLMConfig,
  41. "compressed-tensors": CompressedTensorsConfig,
  42. "bitsandbytes": BitsAndBytesConfig,
  43. }
  44. def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
  45. if quantization not in QUANTIZATION_METHODS:
  46. raise ValueError(f"Invalid quantization method: {quantization}")
  47. return QUANTIZATION_METHODS[quantization]
  48. __all__ = [
  49. "QuantizationConfig",
  50. "get_quantization_config",
  51. "QUANTIZATION_METHODS",
  52. ]