#pragma once #include "attention_generic.cuh" #include #ifdef ENABLE_FP8 #ifndef USE_ROCM #include #endif // USE_ROCM #endif // ENABLE_FP8 namespace aphrodite { enum class Fp8KVCacheDataType { kAuto = 0, kFp8E4M3 = 1, kFp8E5M2 = 2, }; // fp8 vector types for quantization of kv cache template <> struct Vec { using Type = uint8_t; }; template <> struct Vec { using Type = uint16_t; }; template <> struct Vec { using Type = uint32_t; }; template <> struct Vec { using Type = uint2; }; } // namespace aphrodite