dtype_fp8.cuh 451 B

12345678910111213141516171819202122232425262728293031
  1. #pragma once
  2. #include "attention_generic.cuh"
  3. #include <stdint.h>
  4. #include <cuda_fp8.h>
  5. namespace aphrodite {
  6. // FP8 vector types for quantization of KV Cache
  7. template<>
  8. struct Vec<uint8_t, 1> {
  9. using Type = uint8_t;
  10. };
  11. template<>
  12. struct Vec<uint8_t, 2> {
  13. using Type = uint16_t;
  14. };
  15. template<>
  16. struct Vec<uint8_t, 4> {
  17. using Type = uint32_t;
  18. };
  19. template<>
  20. struct Vec<uint8_t, 8> {
  21. using Type = uint2;
  22. };
  23. } // namespace aphrodite