#pragma once #include "attention_generic.cuh" #include #ifdef ENABLE_FP8_E5M2 #include #endif namespace aphrodite { #if defined(ENABLE_FP8_E5M2) || defined(ENABLE_FP8_E4M3) // fp8 vector types for quantization of kv cache template<> struct Vec { using Type = uint8_t; }; template<> struct Vec { using Type = uint16_t; }; template<> struct Vec { using Type = uint32_t; }; template<> struct Vec { using Type = uint2; }; #endif // ENABLE_FP8_E5M2 } // namespace aphrodite