1
0

base.cuh 1.1 KB

1234567891011121314151617181920212223242526272829303132
  1. /*
  2. * Modified by HandH1998
  3. * Modified by Neural Magic
  4. * Copyright (C) Marlin.2024 Elias Frantar
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #pragma once
  19. constexpr int ceildiv(int a, int b) { return (a + b - 1) / b; }
  20. // Instances of `Vec` are used to organize groups of >>registers<<, as needed
  21. // for instance as inputs to tensor core operations. Consequently, all
  22. // corresponding index accesses must be compile-time constants, which is why we
  23. // extensively use `#pragma unroll` throughout the kernel code to guarantee
  24. // this.
  25. template <typename T, int n>
  26. struct Vec {
  27. T elems[n];
  28. __device__ T& operator[](int i) { return elems[i]; }
  29. };