__init__.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. from typing import Type
  2. from aphrodite.quantization.aqlm import AQLMConfig
  3. from aphrodite.quantization.awq import AWQConfig
  4. from aphrodite.quantization.awq_marlin import AWQMarlinConfig
  5. from aphrodite.quantization.base_config import QuantizationConfig
  6. from aphrodite.quantization.bitsandbytes import BitsAndBytesConfig
  7. from aphrodite.quantization.compressed_tensors.compressed_tensors import (
  8. CompressedTensorsConfig)
  9. from aphrodite.quantization.deepspeedfp import DeepSpeedFPConfig
  10. from aphrodite.quantization.eetq import EETQConfig
  11. from aphrodite.quantization.experts_int8 import ExpertsInt8Config
  12. from aphrodite.quantization.fbgemm_fp8 import FBGEMMFp8Config
  13. from aphrodite.quantization.fp6 import QuantLLMFPConfig
  14. from aphrodite.quantization.fp8 import Fp8Config
  15. from aphrodite.quantization.gguf import GGUFConfig
  16. from aphrodite.quantization.gptq import GPTQConfig
  17. from aphrodite.quantization.gptq_marlin import GPTQMarlinConfig
  18. from aphrodite.quantization.gptq_marlin_24 import GPTQMarlin24Config
  19. from aphrodite.quantization.hqq_marlin import HQQMarlinConfig
  20. from aphrodite.quantization.marlin import MarlinConfig
  21. from aphrodite.quantization.modelopt import ModelOptFp8Config
  22. from aphrodite.quantization.neuron_quant import NeuronQuantConfig
  23. from aphrodite.quantization.qqq import QQQConfig
  24. from aphrodite.quantization.quip import QuipConfig
  25. from aphrodite.quantization.squeezellm import SqueezeLLMConfig
  26. from aphrodite.quantization.tpu_int8 import Int8TpuConfig
  27. QUANTIZATION_METHODS = {
  28. "aqlm": AQLMConfig,
  29. "awq": AWQConfig,
  30. "deepspeedfp": DeepSpeedFPConfig,
  31. "tpu_int8": Int8TpuConfig,
  32. "eetq": EETQConfig,
  33. "fp8": Fp8Config,
  34. "quant_llm": QuantLLMFPConfig,
  35. "fbgemm_fp8": FBGEMMFp8Config,
  36. "modelopt": ModelOptFp8Config,
  37. "gguf": GGUFConfig,
  38. # The order of gptq methods is important for config.py iteration over
  39. # override_quantization_method(..)
  40. "marlin": MarlinConfig,
  41. "gptq_marlin_24": GPTQMarlin24Config,
  42. "gptq_marlin": GPTQMarlinConfig,
  43. "awq_marlin": AWQMarlinConfig,
  44. "gptq": GPTQConfig,
  45. "quip": QuipConfig,
  46. "squeezellm": SqueezeLLMConfig,
  47. "compressed-tensors": CompressedTensorsConfig,
  48. "bitsandbytes": BitsAndBytesConfig,
  49. "qqq": QQQConfig,
  50. "hqq": HQQMarlinConfig,
  51. "experts_int8": ExpertsInt8Config,
  52. # the quant_llm methods
  53. "fp2": QuantLLMFPConfig,
  54. "fp3": QuantLLMFPConfig,
  55. "fp4": QuantLLMFPConfig,
  56. "fp5": QuantLLMFPConfig,
  57. "fp6": QuantLLMFPConfig,
  58. "fp7": QuantLLMFPConfig,
  59. "neuron_quant": NeuronQuantConfig,
  60. }
  61. def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
  62. if quantization not in QUANTIZATION_METHODS:
  63. raise ValueError(f"Invalid quantization method: {quantization}")
  64. return QUANTIZATION_METHODS[quantization]
  65. __all__ = [
  66. "QuantizationConfig",
  67. "get_quantization_config",
  68. "QUANTIZATION_METHODS",
  69. ]