Commit 15cf65a
1 file changed
+1
-1
lines changedSubmodule cudnn-frontend updated 91 files
- CMakeLists.txt+1-1
- README.md+20
- benchmark/sdpa_benchmark_training/README.md+59-1
- benchmark/sdpa_benchmark_training/benchmark_single_sdpa.py+266-32
- benchmark/sdpa_benchmark_training/config_types.py+2-2
- benchmark/sdpa_benchmark_training/configs/dsv3.py+1-1
- benchmark/sdpa_benchmark_training/configs/gpt_oss.py+5-5
- benchmark/sdpa_benchmark_training/configs/llama.py+3-3
- benchmark/sdpa_benchmark_training/runner.py+2-9
- include/cudnn_frontend/cudnn_interface.h+11
- include/cudnn_frontend/experimental/attention_utils.h+160-94
- include/cudnn_frontend/experimental/oss_engine_interface.h+12-13
- include/cudnn_frontend/experimental/sm100_rms_norm_silu_engine.h+27-36
- include/cudnn_frontend/experimental/sm100_sdpa_prefill_engine.h+17-27
- include/cudnn_frontend/experimental/sm90_sdpa_prefill_engine.h+16-26
- include/cudnn_frontend/graph_helpers.h-14
- include/cudnn_frontend/graph_interface.h+6-10
- include/cudnn_frontend/graph_properties.h+28-1
- include/cudnn_frontend/node/scaled_dot_product_flash_attention.h+132-7
- include/cudnn_frontend/node/sdpa_fp8_bwd.h+89-146
- include/cudnn_frontend/node/sdpa_support_surface.h+18-10
- include/cudnn_frontend/plans.h+3-3
- include/cudnn_frontend/utils/attn_score_modifiers.h+5
- include/cudnn_frontend_shim.h+6-11
- include/cudnn_frontend_version.h+2-2
- python/cudnn/README.md+16-2
- python/cudnn/__init__.py+123-2
- python/cudnn/api_base.py+45
- python/cudnn/discrete_grouped_gemm/__init__.py+26
- python/cudnn/discrete_grouped_gemm/discrete_grouped_gemm_dswiglu/__init__.py+12
- python/cudnn/discrete_grouped_gemm/discrete_grouped_gemm_dswiglu/api.py+914
- python/cudnn/discrete_grouped_gemm/discrete_grouped_gemm_dswiglu/discrete_B_blockscaled_grouped_gemm_dglu.py+3.7k
- python/cudnn/discrete_grouped_gemm/discrete_grouped_gemm_swiglu/__init__.py+12
- python/cudnn/discrete_grouped_gemm/discrete_grouped_gemm_swiglu/api.py+965
- python/cudnn/discrete_grouped_gemm/discrete_grouped_gemm_swiglu/discrete_B_blockscaled_grouped_gemm_glu.py+2.9k
- python/cudnn/discrete_grouped_gemm/discrete_kernel_utils.py+874
- python/cudnn/discrete_grouped_gemm/moe_persistent_scheduler.py+1.0k
- python/cudnn/discrete_grouped_gemm/moe_sched_extension.py+217
- python/cudnn/discrete_grouped_gemm/moe_utils.py+499
- python/cudnn/gemm_amax/api.py+9
- python/cudnn/gemm_swiglu/api.py+9
- python/cudnn/grouped_gemm/__init__.py+21
- python/cudnn/grouped_gemm/grouped_gemm_dglu/__init__.py+12
- python/cudnn/grouped_gemm/grouped_gemm_dglu/api.py+1.9k
- python/cudnn/grouped_gemm/grouped_gemm_dglu/continugous_blockscaled_grouped_gemm_dglu_quant_dbias_fusion.py+4.4k
- python/cudnn/grouped_gemm/grouped_gemm_dglu/moe_blockscaled_grouped_gemm_dglu.py+3.6k
- python/cudnn/grouped_gemm/grouped_gemm_dswiglu/api.py+219-28
- python/cudnn/grouped_gemm/grouped_gemm_glu/__init__.py+12
- python/cudnn/grouped_gemm/grouped_gemm_glu/api.py+1.8k
- python/cudnn/grouped_gemm/grouped_gemm_glu/continugous_blockscaled_grouped_gemm_glu_quant_bias_fusion.py+3.7k
- python/cudnn/grouped_gemm/grouped_gemm_glu/moe_blockscaled_grouped_gemm_glu.py+2.8k
- python/cudnn/grouped_gemm/grouped_gemm_quant/__init__.py+20
- python/cudnn/grouped_gemm/grouped_gemm_quant/api.py+1.3k
- python/cudnn/grouped_gemm/grouped_gemm_quant/grouped_gemm_quant.py+2.1k
- python/cudnn/grouped_gemm/grouped_gemm_swiglu/api.py+323-46
- python/cudnn/grouped_gemm/grouped_gemm_swiglu/grouped_gemm_swiglu_quant.py+5-2
- python/cudnn/grouped_gemm/moe_kernel_helpers.py+856
- python/cudnn/grouped_gemm/moe_persistent_scheduler.py+1.0k
- python/cudnn/grouped_gemm/moe_sched_extension.py+339
- python/cudnn/grouped_gemm/moe_utils.py+506
- python/cudnn/grouped_gemm/utils.py+404
- python/pygraph/pygraph.h+12-4
- python/pygraph/sdpa.cpp+44-5
- samples/cpp/CMakeLists.txt-1
- samples/cpp/sdpa/fp16_dynamic_shapes.cpp+2-2
- samples/cpp/sdpa/mxfp8_bwd.cpp+7-7
- samples/cpp/sdpa/prefill_oss_engine.cpp+4-9
- test/cpp/CMakeLists.txt-1
- test/python/conftest.py+1
- test/python/fe_api/test_discrete_grouped_gemm_dswiglu.py+555
- test/python/fe_api/test_discrete_grouped_gemm_dswiglu_utils.py+296
- test/python/fe_api/test_discrete_grouped_gemm_swiglu.py+734
- test/python/fe_api/test_discrete_grouped_gemm_swiglu_utils.py+520
- test/python/fe_api/test_fe_api_utils.py+11-10
- test/python/fe_api/test_grouped_gemm_dglu.py+971
- test/python/fe_api/test_grouped_gemm_dswiglu.py+387-2
- test/python/fe_api/test_grouped_gemm_dswiglu_utils.py+124-59
- test/python/fe_api/test_grouped_gemm_glu.py+914
- test/python/fe_api/test_grouped_gemm_quant.py+902
- test/python/fe_api/test_grouped_gemm_quant_utils.py+576
- test/python/fe_api/test_grouped_gemm_swiglu.py+258
- test/python/fe_api/test_grouped_gemm_swiglu_utils.py+95-52
- test/python/sdpa/blocked.py+1-74
- test/python/sdpa/fp16.py+43-36
- test/python/sdpa/fp8.py+199-221
- test/python/sdpa/fp8_ref.py+53-127
- test/python/sdpa/helpers.py+33-3
- test/python/sdpa/mxfp8.py+132-294
- test/python/sdpa/mxfp8_ref.py+52-12
- test/python/sdpa/random_config.py+20-8
- test/python/test_mhas_v2.py+49-58
0 commit comments