dtype_int8.cuh 697 B

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #pragma once
  2. #include <stdint.h>
  3. #include "attention_generic.cuh"
  4. #include "dtype_float32.cuh"
  5. namespace aphrodite {
  6. // int8 vector types for quantization of kv cache
  7. template<>
  8. struct Vec<int8_t, 1> {
  9. using Type = int8_t;
  10. };
  11. template<>
  12. struct Vec<int8_t, 2> {
  13. using Type = int16_t;
  14. };
  15. template<>
  16. struct Vec<int8_t, 4> {
  17. using Type = int32_t;
  18. };
  19. template<>
  20. struct Vec<int8_t, 8> {
  21. using Type = int64_t;
  22. };
  23. template<>
  24. struct FloatVec<int8_t> {
  25. using Type = float;
  26. };
  27. template<>
  28. struct FloatVec<int16_t> {
  29. using Type = float2;
  30. };
  31. template<>
  32. struct FloatVec<int32_t> {
  33. using Type = Float4_;
  34. };
  35. template<>
  36. struct FloatVec<int64_t> {
  37. using Type = Float8_;
  38. };
  39. }