.. |
instantiations
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
__init__.py
|
7f67966cc7
FA3 initial code release
|
6 сар өмнө |
benchmark_attn.py
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
benchmark_flash_attention_fp8.py
|
a5a75274bc
FA3 kvcache + split kv + gqa parallelization (#1236)
|
2 сар өмнө |
benchmark_split_kv.py
|
a5a75274bc
FA3 kvcache + split kv + gqa parallelization (#1236)
|
2 сар өмнө |
combine.h
|
478ee666cc
Make namespace comment consistent (#1305)
|
2 сар өмнө |
copy_sm90_bulk_reduce.hpp
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
epilogue_bwd.hpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
epilogue_fwd.hpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
flash.h
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
flash_api.cpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
flash_attn_interface.py
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_bwd_kernel_sm80.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_bwd_kernel_sm90.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_bwd_launch_template.h
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
flash_bwd_postprocess_kernel.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_bwd_preprocess_kernel.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_fwd_combine_kernel.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_fwd_combine_launch_template.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_fwd_combine_sm80.cu
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_fwd_kernel_sm80.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
flash_fwd_kernel_sm90.h
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
flash_fwd_launch_template.h
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
generate_kernels.py
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
heuristics.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
mainloop_bwd_sm80.hpp
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
mainloop_bwd_sm90_tma_gmma_ws.hpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
mainloop_fwd_sm80.hpp
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
mainloop_fwd_sm90_tma_gmma_ws.hpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
mask.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
named_barrier.hpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
pack_gqa.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
padding.py
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
paged_kv.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
rotary.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
seqlen.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
setup.py
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
sm90_pipeline_no_cluster.hpp
|
68bf390920
Update Cutlass to fix mem fence
|
2 өдөр өмнө |
softmax.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
static_switch.h
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
test_attn_kvcache.py
|
a5a75274bc
FA3 kvcache + split kv + gqa parallelization (#1236)
|
2 сар өмнө |
test_flash_attn.py
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
test_kvcache.py
|
a5a75274bc
FA3 kvcache + split kv + gqa parallelization (#1236)
|
2 сар өмнө |
test_util.py
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
tile_scheduler.hpp
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |
tile_size.h
|
7a802796e1
Big refactor and update
|
2 өдөр өмнө |
utils.h
|
7bc3f031a4
Compile for both Sm80 and Sm90
|
1 өдөр өмнө |