#pragma once #include "attention_generic.cuh" #include #include namespace aphrodite { // FP8 vector types for quantization of KV Cache template<> struct Vec { using Type = uint8_t; }; template<> struct Vec { using Type = uint16_t; }; template<> struct Vec { using Type = uint32_t; }; template<> struct Vec { using Type = uint2; }; } // namespace aphrodite