há 2 meses atrás · 6b1d059eda
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,6 +3,7 @@ recursive-include csrc *.h
 
				 recursive-include csrc *.cuh
			
 
				 recursive-include csrc *.cpp
			
 
				 recursive-include csrc *.hpp
			
 
				+recursive-include csrc *.py
			
 
				 
			
 
				 recursive-include flash_attn *.cu
			
 
				 recursive-include flash_attn *.h
			
--- a/setup.py
+++ b/setup.py
@@ -145,11 +145,19 @@ ext_modules = []
 
				 
			
 
				 # We want this even if SKIP_CUDA_BUILD because when we run python setup.py sdist we want the .hpp
			
 
				 # files included in the source distribution, in case the user compiles from source.
			
 
				-if IS_ROCM:
			
 
				-    if not USE_TRITON_ROCM:
			
 
				-        subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"])
			
 
				+if os.path.isdir(".git"):
			
 
				+    subprocess.run(["git", "submodule", "update", "--init", "csrc/composable_kernel"], check=True)
			
 
				+    subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"], check=True)
			
 
				 else:
			
 
				-    subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
			
 
				+    if IS_ROCM:
			
 
				+        if not USE_TRITON_ROCM:
			
 
				+            assert (
			
 
				+                os.path.exists("csrc/composable_kernel/example/ck_tile/01_fmha/generate.py")
			
 
				+            ), "csrc/composable_kernel is missing, please use source distribution or git clone"
			
 
				+    else:
			
 
				+        assert (
			
 
				+            os.path.exists("csrc/cutlass/include/cutlass/cutlass.h")
			
 
				+        ), "csrc/cutlass is missing, please use source distribution or git clone"
			
 
				 
			
 
				 if not SKIP_CUDA_BUILD and not IS_ROCM:
			
 
				     print("\n\ntorch.__version__  = {}\n\n".format(torch.__version__))
			
@@ -324,10 +332,10 @@ elif not SKIP_CUDA_BUILD and IS_ROCM:
 
				         if not os.path.exists("./build"):
			
 
				             os.makedirs("build")
			
 
				 
			
 
				-        os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd --output_dir build --receipt 2")
			
 
				-        os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_appendkv --output_dir build --receipt 2")
			
 
				-        os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d fwd_splitkv --output_dir build --receipt 2")
			
 
				-        os.system(f"{sys.executable} {ck_dir}/example/ck_tile/01_fmha/generate.py -d bwd --output_dir build --receipt 2")
			
 
				+        subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd", "--output_dir", "build", "--receipt", "2"], check=True)
			
 
				+        subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_appendkv", "--output_dir", "build", "--receipt", "2"], check=True)
			
 
				+        subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "fwd_splitkv", "--output_dir", "build", "--receipt", "2"], check=True)
			
 
				+        subprocess.run([sys.executable, f"{ck_dir}/example/ck_tile/01_fmha/generate.py", "-d", "bwd", "--output_dir", "build", "--receipt", "2"], check=True)
			
 
				 
			
 
				         # Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h
			
 
				         # See https://github.com/pytorch/pytorch/pull/70650