Skip to content

Commit

Permalink
Fix issues for Cutlass 3.6
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Joe Todd <[email protected]>
  • Loading branch information
aacostadiaz and joeatodd committed Dec 5, 2024
1 parent b0e09d7 commit cbea514
Show file tree
Hide file tree
Showing 38 changed files with 1,352 additions and 201 deletions.
157 changes: 88 additions & 69 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,17 @@ function(cutlass_example_add_executable NAME)

add_dependencies(cutlass_examples ${NAME})

if (NOT CUTLASS_ENABLE_SYCL)
SET(ADD_CUDA ON)
endif()

target_link_libraries(
${NAME}
PRIVATE
CUTLASS
cutlass_tools_util_includes
$<$<BOOL:${CUTLASS_ENABLE_CUBLAS}>:nvidia::cublas>
cuda
$<$<BOOL:${ADD_CUDA}>:cuda>
)

target_include_directories(
Expand All @@ -64,6 +68,13 @@ function(cutlass_example_add_executable NAME)
${CUTLASS_EXAMPLES_UTILS_DIR}
)

if (CUTLASS_ENABLE_SYCL)
add_dependencies(${NAME} onemkl_project)
target_include_directories(${NAME} PRIVATE ${ONEMKL_INCLUDE_DIR})
target_link_libraries(${NAME} PUBLIC ${ONEMKL_LIB})
add_sycl_to_target(TARGET ${NAME})
endif()

install(
TARGETS ${NAME}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
Expand All @@ -80,71 +91,79 @@ function(cutlass_example_add_executable NAME)

endfunction()

foreach(EXAMPLE
00_basic_gemm
01_cutlass_utilities
02_dump_reg_shmem
03_visualize_layout
04_tile_iterator
05_batched_gemm
06_splitK_gemm
07_volta_tensorop_gemm
08_turing_tensorop_gemm
09_turing_tensorop_conv2dfprop
10_planar_complex
11_planar_complex_array
12_gemm_bias_relu
13_two_tensor_op_fusion
14_ampere_tf32_tensorop_gemm
15_ampere_sparse_tensorop_gemm
16_ampere_tensorop_conv2dfprop
17_fprop_per_channel_bias
18_ampere_fp64_tensorop_affine2_gemm
19_tensorop_canonical
20_simt_canonical
21_quaternion_gemm
22_quaternion_conv
23_ampere_gemm_operand_reduction_fusion
24_gemm_grouped
25_ampere_fprop_mainloop_fusion
26_ampere_wgrad_mainloop_fusion
27_ampere_3xtf32_fast_accurate_tensorop_gemm
28_ampere_3xtf32_fast_accurate_tensorop_fprop
29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm
30_wgrad_split_k
31_basic_syrk
32_basic_trmm
33_ampere_3xtf32_tensorop_symm
34_transposed_conv2d
35_gemm_softmax
36_gather_scatter_fusion
37_gemm_layernorm_gemm_fusion
38_syr2k_grouped
cute
39_gemm_permute
41_fused_multi_head_attention
42_ampere_tensorop_group_conv
43_ell_block_sparse_gemm
45_dual_gemm
46_depthwise_simt_conv2dfprop
47_ampere_gemm_universal_streamk
48_hopper_warp_specialized_gemm
49_hopper_gemm_with_collective_builder
50_hopper_gemm_with_epilogue_swizzle
51_hopper_gett
52_hopper_gather_scatter_fusion
53_hopper_gemm_permute
54_hopper_fp8_warp_specialized_gemm
55_hopper_mixed_dtype_gemm
56_hopper_ptr_array_batched_gemm
57_hopper_grouped_gemm
58_ada_fp8_gemm
59_ampere_gather_scatter_conv
61_hopper_gemm_with_topk_and_softmax
62_hopper_sparse_gemm
63_hopper_gemm_with_weight_prefetch
)

add_subdirectory(${EXAMPLE})

endforeach()
if (NOT CUTLASS_ENABLE_SYCL)
foreach(EXAMPLE
00_basic_gemm
01_cutlass_utilities
02_dump_reg_shmem
03_visualize_layout
04_tile_iterator
05_batched_gemm
06_splitK_gemm
07_volta_tensorop_gemm
08_turing_tensorop_gemm
09_turing_tensorop_conv2dfprop
10_planar_complex
11_planar_complex_array
12_gemm_bias_relu
13_two_tensor_op_fusion
14_ampere_tf32_tensorop_gemm
15_ampere_sparse_tensorop_gemm
16_ampere_tensorop_conv2dfprop
17_fprop_per_channel_bias
18_ampere_fp64_tensorop_affine2_gemm
19_tensorop_canonical
20_simt_canonical
21_quaternion_gemm
22_quaternion_conv
23_ampere_gemm_operand_reduction_fusion
24_gemm_grouped
25_ampere_fprop_mainloop_fusion
26_ampere_wgrad_mainloop_fusion
27_ampere_3xtf32_fast_accurate_tensorop_gemm
28_ampere_3xtf32_fast_accurate_tensorop_fprop
29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm
30_wgrad_split_k
31_basic_syrk
32_basic_trmm
33_ampere_3xtf32_tensorop_symm
34_transposed_conv2d
35_gemm_softmax
36_gather_scatter_fusion
37_gemm_layernorm_gemm_fusion
38_syr2k_grouped
cute
39_gemm_permute
41_fused_multi_head_attention
42_ampere_tensorop_group_conv
43_ell_block_sparse_gemm
45_dual_gemm
46_depthwise_simt_conv2dfprop
47_ampere_gemm_universal_streamk
48_hopper_warp_specialized_gemm
49_hopper_gemm_with_collective_builder
50_hopper_gemm_with_epilogue_swizzle
51_hopper_gett
52_hopper_gather_scatter_fusion
53_hopper_gemm_permute
54_hopper_fp8_warp_specialized_gemm
55_hopper_mixed_dtype_gemm
56_hopper_ptr_array_batched_gemm
57_hopper_grouped_gemm
58_ada_fp8_gemm
59_ampere_gather_scatter_conv
61_hopper_gemm_with_topk_and_softmax
62_hopper_sparse_gemm
63_hopper_gemm_with_weight_prefetch
)
add_subdirectory(${EXAMPLE})
endforeach()
else()
foreach(EXAMPLE
14_ampere_tf32_tensorop_gemm
cute
sycl
)
add_subdirectory(${EXAMPLE})
endforeach()
endif()
Loading

0 comments on commit cbea514

Please sign in to comment.