1
0

rocm_bf16.patch 545 B

123456789101112131415
  1. --- amd_hip_bf16.h 2024-02-06 18:28:58.268699142 +0000
  2. +++ amd_hip_bf16.h.new 2024-02-06 18:28:31.988647133 +0000
  3. @@ -90,10 +90,10 @@
  4. #include "math_fwd.h" // ocml device functions
  5. #if defined(__HIPCC_RTC__)
  6. -#define __HOST_DEVICE__ __device__
  7. +#define __HOST_DEVICE__ __device__ static
  8. #else
  9. #include <climits>
  10. -#define __HOST_DEVICE__ __host__ __device__
  11. +#define __HOST_DEVICE__ __host__ __device__ static inline
  12. #endif
  13. // Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on