--- amd_hip_bf16.h 2024-02-06 18:28:58.268699142 +0000 +++ amd_hip_bf16.h.new 2024-02-06 18:28:31.988647133 +0000 @@ -90,10 +90,10 @@ #include "math_fwd.h" // ocml device functions #if defined(__HIPCC_RTC__) -#define __HOST_DEVICE__ __device__ +#define __HOST_DEVICE__ __device__ static #else #include -#define __HOST_DEVICE__ __host__ __device__ +#define __HOST_DEVICE__ __host__ __device__ static inline #endif // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on