#pragma once #include #include "attention_generic.cuh" #include "dtype_float32.cuh" namespace aphrodite { // int8 vector types for quantization of kv cache template<> struct Vec { using Type = int8_t; }; template<> struct Vec { using Type = int16_t; }; template<> struct Vec { using Type = int32_t; }; template<> struct Vec { using Type = int64_t; }; template<> struct FloatVec { using Type = float; }; template<> struct FloatVec { using Type = float2; }; template<> struct FloatVec { using Type = Float4_; }; template<> struct FloatVec { using Type = Float8_; }; }