dtype_fp8.cuh 564 B

1234567891011121314151617181920212223242526272829303132333435
  1. #pragma once
  2. #include "attention_generic.cuh"
  3. #include <stdint.h>
  4. #ifdef ENABLE_FP8_E5M2
  5. #include <cuda_fp8.h>
  6. #endif
  7. namespace aphrodite {
  8. #if defined(ENABLE_FP8_E5M2) || defined(ENABLE_FP8_E4M3)
  9. // fp8 vector types for quantization of kv cache
  10. template<>
  11. struct Vec<uint8_t, 1> {
  12. using Type = uint8_t;
  13. };
  14. template<>
  15. struct Vec<uint8_t, 2> {
  16. using Type = uint16_t;
  17. };
  18. template<>
  19. struct Vec<uint8_t, 4> {
  20. using Type = uint32_t;
  21. };
  22. template<>
  23. struct Vec<uint8_t, 8> {
  24. using Type = uint2;
  25. };
  26. #endif // ENABLE_FP8_E5M2
  27. } // namespace aphrodite