1
0
AlpinDale 10 сар өмнө
parent
commit
483c95a2f8

+ 2 - 1
aphrodite/quantization/awq.py

@@ -9,6 +9,7 @@ from aphrodite.modeling.layers.fused_moe import (moe_align_block_size,
 from aphrodite.modeling.layers.linear import (LinearMethodBase,
                                               set_weight_attrs)
 from aphrodite.quantization.base_config import (QuantizationConfig)
+from aphrodite._C import ops as _C_ops
 
 HAS_QUANTS = False
 with suppress(ImportError):
@@ -222,7 +223,7 @@ class AWQLinearMethod(LinearMethodBase):
         out = torch.empty((gate_up.shape[:-1] + (gate_up.shape[-1] // 2, )),
                           dtype=x.dtype,
                           device=x.device)
-        ops.silu_and_mul(out, gate_up)
+        _C_ops.silu_and_mul(out, gate_up)
 
         out = ops.awq_group_gemm(out, w2["qweight"], w2["scales"],
                                  w2["qzeros"], topk_weights, sorted_token_ids,

+ 2 - 1
aphrodite/quantization/gptq.py

@@ -12,6 +12,7 @@ from aphrodite.modeling.layers.fused_moe import (fused_moe, fused_topk,
 from aphrodite.modeling.layers.linear import LinearMethodBase, set_weight_attrs
 from aphrodite.quantization.base_config import (
     QuantizationConfig, )
+from aphrodite._C import ops as _C_ops
 
 HAS_QUANTS = False
 with suppress(ImportError):
@@ -321,7 +322,7 @@ class GPTQLinearMethod(LinearMethodBase):
             dtype=x.dtype,
             device=x.device,
         )
-        ops.silu_and_mul(out, gate_up)
+        _C_ops.silu_and_mul(out, gate_up)
 
         out = ops.group_gptq_gemm(
             out,