1
0

cuda_utils_kernels.cu 842 B

1234567891011121314151617181920212223242526272829
  1. #ifdef USE_ROCM
  2. #include <hip/hip_runtime.h>
  3. #include <hip/hip_runtime_api.h>
  4. #endif
  5. int64_t get_device_attribute(int64_t attribute, int64_t device_id) {
  6. int device, value;
  7. if (device_id < 0) {
  8. cudaGetDevice(&device);
  9. } else {
  10. device = device_id;
  11. }
  12. cudaDeviceGetAttribute(&value, static_cast<cudaDeviceAttr>(attribute),
  13. device);
  14. return value;
  15. }
  16. int64_t get_max_shared_memory_per_block_device_attribute(int64_t device_id) {
  17. int64_t attribute;
  18. // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
  19. // cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
  20. #ifdef USE_ROCM
  21. attribute = hipDeviceAttributeMaxSharedMemoryPerBlock;
  22. #else
  23. attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin;
  24. #endif
  25. return get_device_attribute(attribute, device_id);
  26. }