.. |
__init__.py
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
benchmark_attn.py
|
dfe1a59e4b
Add var-seq-len to FA3 fp16 / bf16 fwd (#1072)
|
4 月之前 |
benchmark_flash_attention.py
|
cdc966e81a
adding files for fp8 changes.
|
4 月之前 |
benchmark_flash_attention_fp8.py
|
df66e974bc
fixed odd-seq-len-k.
|
4 月之前 |
block_info.h
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
epilogue_fwd_sm90_tma.hpp
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
flash.h
|
dfe1a59e4b
Add var-seq-len to FA3 fp16 / bf16 fwd (#1072)
|
4 月之前 |
flash_api.cpp
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
flash_attn_interface.py
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
flash_bwd_hdim128_fp16_sm90.cu
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_bwd_hdim256_fp16_sm90.cu
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_bwd_hdim64_fp16_sm90.cu
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_bwd_kernel.h
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_bwd_launch_template.h
|
cb516f855b
Remove torchlib dependency from cpp files (#1083)
|
4 月之前 |
flash_bwd_preprocess_kernel.h
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_fwd_hdim128_bf16_sm90.cu
|
74b0761ff7
[FA3] BF16 forward
|
5 月之前 |
flash_fwd_hdim128_fp16_sm90.cu
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_fwd_hdim128_fp8_sm90.cu
|
cdc966e81a
adding files for fp8 changes.
|
4 月之前 |
flash_fwd_hdim256_bf16_sm90.cu
|
74b0761ff7
[FA3] BF16 forward
|
5 月之前 |
flash_fwd_hdim256_fp16_sm90.cu
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_fwd_hdim256_fp8_sm90.cu
|
cdc966e81a
adding files for fp8 changes.
|
4 月之前 |
flash_fwd_hdim64_bf16_sm90.cu
|
74b0761ff7
[FA3] BF16 forward
|
5 月之前 |
flash_fwd_hdim64_fp16_sm90.cu
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
flash_fwd_hdim64_fp8_sm90.cu
|
cdc966e81a
adding files for fp8 changes.
|
4 月之前 |
flash_fwd_kernel.h
|
dfe1a59e4b
Add var-seq-len to FA3 fp16 / bf16 fwd (#1072)
|
4 月之前 |
flash_fwd_launch_template.h
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
kernel_traits.h
|
fe4c5b59df
undid clang formatting.
|
4 月之前 |
mainloop_fwd_sm90_tma_gmma_ws.hpp
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
named_barrier.hpp
|
74b0761ff7
[FA3] BF16 forward
|
5 月之前 |
seq_len.h
|
dfe1a59e4b
Add var-seq-len to FA3 fp16 / bf16 fwd (#1072)
|
4 月之前 |
setup.py
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
softmax.h
|
7f67966cc7
FA3 initial code release
|
5 月之前 |
static_switch.h
|
dfe1a59e4b
Add var-seq-len to FA3 fp16 / bf16 fwd (#1072)
|
4 月之前 |
test_flash_attn.py
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |
tile_scheduler.hpp
|
74b0761ff7
[FA3] BF16 forward
|
5 月之前 |
utils.h
|
d5893f3c74
Merge branch 'main' into changes_for_fp8
|
4 月之前 |