|
@@ -145,11 +145,19 @@ ext_modules = []
|
|
|
|
|
|
# We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
|
|
|
# files included in the source distribution, in case the user compiles from source.
|
|
|
-if IS_ROCM:
|
|
|
- if not USE_TRITON_ROCM:
|
|
|
- subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"])
|
|
|
+if os.path.isdir(".git"):
|
|
|
+ subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"], check=True)
|
|
|
+ subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"], check=True)
|
|
|
else:
|
|
|
- subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
|
|
|
+ if IS_ROCM:
|
|
|
+ if not USE_TRITON_ROCM:
|
|
|
+ assert (
|
|
|
+ os.path.exists("csrc/composable_kernel/example/ck_tile/01_fmha/generate.py")
|
|
|
+ ), "csrc/composable_kernel is missing, please use source distribution or git clone"
|
|
|
+ else:
|
|
|
+ assert (
|
|
|
+ os.path.exists("csrc/cutlass/include/cutlass/cutlass.h")
|
|
|
+ ), "csrc/cutlass is missing, please use source distribution or git clone"
|
|
|
|
|
|
if not SKIP_CUDA_BUILD and not IS_ROCM:
|
|
|
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
|
|
@@ -324,10 +332,10 @@ elif not SKIP_CUDA_BUILD and IS_ROCM:
|
|
|
if not os.path.exists("./build"):
|
|
|
os.makedirs("build")
|
|
|
|
|
|
- os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd --output_dir build --receipt 2")
|
|
|
- os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_appendkv --output_dir build --receipt 2")
|
|
|
- os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_splitkv --output_dir build --receipt 2")
|
|
|
- os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d bwd --output_dir build --receipt 2")
|
|
|
+ subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd", "--output_dir", "build", "--receipt", "2"], check=True)
|
|
|
+ subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_appendkv", "--output_dir", "build", "--receipt", "2"], check=True)
|
|
|
+ subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_splitkv", "--output_dir", "build", "--receipt", "2"], check=True)
|
|
|
+ subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "bwd", "--output_dir", "build", "--receipt", "2"], check=True)
|
|
|
|
|
|
# Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h
|
|
|
# See https://github.com/pytorch/pytorch/pull/70650
|