瀏覽代碼

fix ops in gptq and awq

AlpinDale 10 月之前
父節點
當前提交
483c95a2f8
共有 2 個文件被更改,包括 4 次插入2 次删除
  1. 2 1
      aphrodite/quantization/awq.py
  2. 2 1
      aphrodite/quantization/gptq.py

+ 2 - 1
aphrodite/quantization/awq.py

@@ -9,6 +9,7 @@ from aphrodite.modeling.layers.fused_moe import (moe_align_block_size,
 from aphrodite.modeling.layers.linear import (LinearMethodBase,
                                               set_weight_attrs)
 from aphrodite.quantization.base_config import (QuantizationConfig)
+from aphrodite._C import ops as _C_ops
 
 HAS_QUANTS = False
 with suppress(ImportError):
@@ -222,7 +223,7 @@ class AWQLinearMethod(LinearMethodBase):
         out = torch.empty((gate_up.shape[:-1] + (gate_up.shape[-1] // 2, )),
                           dtype=x.dtype,
                           device=x.device)
-        ops.silu_and_mul(out, gate_up)
+        _C_ops.silu_and_mul(out, gate_up)
 
         out = ops.awq_group_gemm(out, w2["qweight"], w2["scales"],
                                  w2["qzeros"], topk_weights, sorted_token_ids,

+ 2 - 1
aphrodite/quantization/gptq.py

@@ -12,6 +12,7 @@ from aphrodite.modeling.layers.fused_moe import (fused_moe, fused_topk,
 from aphrodite.modeling.layers.linear import LinearMethodBase, set_weight_attrs
 from aphrodite.quantization.base_config import (
     QuantizationConfig, )
+from aphrodite._C import ops as _C_ops
 
 HAS_QUANTS = False
 with suppress(ImportError):
@@ -321,7 +322,7 @@ class GPTQLinearMethod(LinearMethodBase):
             dtype=x.dtype,
             device=x.device,
         )
-        ops.silu_and_mul(out, gate_up)
+        _C_ops.silu_and_mul(out, gate_up)
 
         out = ops.group_gptq_gemm(
             out,