qdq_util.cuh 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. /*
  2. Copied from https://github.com/turboderp/exllamav2
  3. */
  4. #ifndef _qdq_util_cuh
  5. #define _qdq_util_cuh
  6. namespace aphrodite {
  7. namespace gptq {
  8. union half2_uint32
  9. {
  10. uint32_t as_uint32;
  11. half2 as_half2;
  12. __device__ half2_uint32(uint32_t val) : as_uint32(val) {}
  13. __device__ half2_uint32(half2 val) : as_half2(val) {}
  14. };
  15. union half_uint16
  16. {
  17. uint16_t as_uint16;
  18. half as_half;
  19. __device__ half_uint16(uint16_t val) : as_uint16(val) {}
  20. __device__ half_uint16(half val) : as_half(val) {}
  21. };
  22. // Max_scale premultiplied by 1/256
  23. __forceinline__ __device__ half dq_scale(const int qs, const half max_scale)
  24. {
  25. int qs_i = qs + 1;
  26. half qs_h = __int2half_rn(qs_i * qs_i);
  27. qs_h = __hmul(qs_h, max_scale);
  28. return qs_h;
  29. }
  30. __forceinline__ __device__ half dq(const int q, const int qzero, const half scale)
  31. {
  32. return __hmul(__int2half_rn(q - qzero), scale);
  33. }
  34. __forceinline__ __device__ half dq_ns(const int q, const int qzero)
  35. {
  36. //return __hsub(__int2half_rn(q), __int2half_rn(qzero));
  37. return __int2half_rn(q - qzero);
  38. }
  39. __forceinline__ __device__ int exb(const uint32_t q, const int shift, const int mask)
  40. {
  41. return (int)((q >> shift) & mask);
  42. }
  43. __forceinline__ __device__ int exb(const uint32_t q1, const uint32_t q0, const int shift, const int mask)
  44. {
  45. return (int)(__funnelshift_rc(q0, q1, shift) & mask);
  46. }
  47. } // namespace gptq
  48. } // namespace aphrodite
  49. #endif