cutlass_heuristic.h 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. /*
  2. * Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <vector>
  18. #include <cstddef>
  19. #include <cstdint>
  20. #include "cutlass_extensions/ft_gemm_configs.h"
  21. namespace fastertransformer {
  22. std::vector<CutlassGemmConfig> get_candidate_configs(int sm, const bool is_weight_only, const bool simt_configs_only);
  23. CutlassGemmConfig estimate_best_config_from_occupancies(const std::vector<CutlassGemmConfig>& candidate_configs,
  24. const std::vector<int>& occupancies,
  25. const int64_t m,
  26. const int64_t n,
  27. const int64_t k,
  28. const int64_t num_experts,
  29. const int split_k_limit,
  30. const size_t workspace_bytes,
  31. const int multi_processor_count,
  32. const int is_weight_only);
  33. } // namespace fastertransformer