cute_utils.cuh 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. #pragma once
  2. #include <cute/tensor.hpp>
  3. #include <torch/all.h>
  4. namespace cute {
  5. ////////////////////////////////////////////////////////////////////
  6. // layout utils
  7. ////////////////////////////////////////////////////////////////////
  8. // Permute layout based on indices, example:
  9. // permute_layout<1, 0>(layout) will swap the two dimensions
  10. // permute_layout<0, 2, 1>(layout) will swap the last two dimensions
  11. template <size_t... I, typename Layout>
  12. CUTE_HOST_DEVICE static constexpr auto permute_layout(Layout l) {
  13. static_assert(rank(l) == sizeof...(I), "Invalid permutation, rank mismatch");
  14. return cute::make_layout(cute::get<I>(l)...);
  15. }
  16. // is the layout f(x) = x
  17. template <typename Layout>
  18. CUTE_HOST_DEVICE static constexpr bool is_identity_layout() {
  19. if constexpr (std::is_same_v<Layout, void>)
  20. return true;
  21. else {
  22. constexpr auto coalesced_layout = coalesce(Layout{});
  23. if constexpr (rank(coalesced_layout) == 1 &&
  24. stride<0>(coalesced_layout) == 1) {
  25. return true;
  26. }
  27. return false;
  28. }
  29. }
  30. ////////////////////////////////////////////////////////////////////
  31. // Pointer utils
  32. ////////////////////////////////////////////////////////////////////
  33. template <class PointerType>
  34. static constexpr auto get_logical_ptr(PointerType* ptr) {
  35. if constexpr (cute::sizeof_bits_v<PointerType> < 8) {
  36. return cute::subbyte_iterator<PointerType>(ptr);
  37. } else {
  38. return ptr;
  39. }
  40. }
  41. ////////////////////////////////////////////////////////////////////
  42. // Misc utils
  43. ////////////////////////////////////////////////////////////////////
  44. template <typename T, typename Elements>
  45. CUTE_HOST_DEVICE static constexpr auto create_auto_vectorizing_copy() {
  46. constexpr auto bits = sizeof_bits_v<T> * Elements{};
  47. if constexpr (bits % 128 == 0) {
  48. return AutoVectorizingCopyWithAssumedAlignment<128>{};
  49. } else if constexpr (bits % 64 == 0) {
  50. return AutoVectorizingCopyWithAssumedAlignment<64>{};
  51. } else if constexpr (bits % 32 == 0) {
  52. return AutoVectorizingCopyWithAssumedAlignment<32>{};
  53. } else if constexpr (bits % 16 == 0) {
  54. return AutoVectorizingCopyWithAssumedAlignment<16>{};
  55. } else {
  56. return AutoVectorizingCopyWithAssumedAlignment<8>{};
  57. }
  58. }
  59. }; // namespace cute