cuda_utils.h 480 B

123456789101112131415
  1. #pragma once
  2. #if defined(__CUDACC__) || defined(_NVHPC_CUDA)
  3. #define HOST_DEVICE_INLINE __forceinline__ __host__ __device__
  4. #define DEVICE_INLINE __forceinline__ __device__
  5. #define HOST_INLINE __forceinline__ __host__
  6. #else
  7. #define HOST_DEVICE_INLINE inline
  8. #define DEVICE_INLINE inline
  9. #define HOST_INLINE inline
  10. #endif
  11. int64_t get_device_attribute(int64_t attribute, int64_t device_id);
  12. int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id);