cuda_utils_kernels.cu 835 B

12345678910111213141516171819202122232425262728293031323334
  1. #ifdef USE_ROCM
  2. #include <hip/hip_runtime.h>
  3. #include <hip/hip_runtime_api.h>
  4. #endif
  5. int get_device_attribute(
  6. int attribute,
  7. int device_id)
  8. {
  9. int device, value;
  10. if (device_id < 0) {
  11. cudaGetDevice(&device);
  12. }
  13. else {
  14. device = device_id;
  15. }
  16. cudaDeviceGetAttribute(&value, static_cast<cudaDeviceAttr>(attribute), device);
  17. return value;
  18. }
  19. int get_max_shared_memory_per_block_device_attribute(
  20. int device_id)
  21. {
  22. int attribute;
  23. // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
  24. // cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 if not is_hip() else 74
  25. #ifdef USE_ROCM
  26. attribute = hipDeviceAttributeMaxSharedMemoryPerBlock;
  27. #else
  28. attribute = cudaDevAttrMaxSharedMemoryPerBlockOptin;
  29. #endif
  30. return get_device_attribute(attribute, device_id);
  31. }