weightOnlyBatchedGemvBs4Int8b.cu 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. /*
  2. * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "kernel.h"
  17. namespace tensorrt_llm
  18. {
  19. namespace kernels
  20. {
  21. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyPerChannel,
  22. IdentityActivation, false, false, 2, 4, 256>;
  23. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<64>,
  24. IdentityActivation, true, true, 2, 4, 128>;
  25. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<64>,
  26. IdentityActivation, true, false, 2, 4, 128>;
  27. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<64>,
  28. IdentityActivation, false, true, 2, 4, 128>;
  29. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<64>,
  30. IdentityActivation, false, false, 2, 4, 128>;
  31. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<128>,
  32. IdentityActivation, true, true, 2, 4, 128>;
  33. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<128>,
  34. IdentityActivation, true, false, 2, 4, 128>;
  35. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<128>,
  36. IdentityActivation, false, true, 2, 4, 128>;
  37. template struct WeightOnlyBatchedGemvKernelLauncher<WeightOnlyQuantType::Int8b, WeightOnlyGroupWise<128>,
  38. IdentityActivation, false, false, 2, 4, 128>;
  39. } // namespace kernels
  40. } // namespace tensorrt_llm