diff --git a/CMakeLists.txt b/CMakeLists.txt index a7d29ff2..620cb57b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ if(libm_exists) endif() if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) - foreach(tgt in nvfatbin nvfatbin_static) + foreach(tgt nvfatbin nvfatbin_static) if (NOT TARGET ${tgt}) _CUDAToolkit_find_and_add_import_lib(${tgt}) endif() @@ -39,8 +39,13 @@ endif() set(CMAKE_THREAD_PREFER_PTHREAD TRUE) +set(targets_base runtime-and-driver nvtx rtc fatbin) +set(targets "") + +foreach(tgt ${targets_base}) + list(APPEND targets ${tgt} ${tgt}_static) +endforeach() -set(targets runtime-and-driver nvtx rtc fatbin) set(prefixed-targets "") set(caw_namespace "cuda-api-wrappers") @@ -56,7 +61,10 @@ foreach(wrapper_lib ${targets}) "$" "$" ) - target_link_libraries(${caw_lib} INTERFACE CUDA::cudart CUDA::cuda_driver) + target_link_libraries(${caw_lib} INTERFACE CUDA::cuda_driver) + string(REGEX MATCH "_static\$" static_suffix ${wrapper_lib}) + target_link_libraries(${caw_lib} INTERFACE CUDA::cudart${static_suffix}) + # These next three dependencies should be carried by the CUDA libraries themselves... # but they aren't - this is CMake bug 25665 @@ -80,8 +88,12 @@ foreach(wrapper_lib ${targets}) WINDOWS_EXPORT_ALL_SYMBOLS ON ) endforeach() -add_library("cuda-api-wrappers::driver-and-runtime" ALIAS caw_runtime-and-driver) -target_link_libraries(caw_rtc INTERFACE cuda-api-wrappers::runtime-and-driver CUDA::nvrtc) + +foreach(static_suffix "" "_static") + add_library(${caw_namespace}::driver-and-runtime${static_suffix} ALIAS caw_runtime-and-driver${static_suffix}) + target_link_libraries(caw_rtc${static_suffix} INTERFACE cuda-api-wrappers::runtime-and-driver${static_suffix} CUDA::nvrtc${static_suffix}) +endforeach() + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1) if (TARGET CUDA::nvptxcompiler) target_link_libraries(caw_rtc INTERFACE CUDA::nvptxcompiler) @@ -91,20 +103,35 @@ if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.1) else() message(WARNING "No valid NVIDIA PTX Compiler target is available") endif() + + if (TARGET CUDA::nvptxcompiler_static) + target_link_libraries(caw_rtc_static INTERFACE CUDA::nvptxcompiler_static) + else() + message(WARNING "No static PTX Compiler target is available") + endif() endif() + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) if (TARGET CUDA::nvfatbin) target_link_libraries(caw_fatbin INTERFACE CUDA::nvfatbin) elseif (TARGET CUDA::nvfatbin) target_link_libraries(caw_fatbin INTERFACE CUDA::nvfatbin_static) - elseif(EXISTS "${CUDA_nvfatbin_LIBRARY}") + elseif(EXISTS "${CUDA_nvfatbin_LIBRARY}") target_link_libraries(caw_fatbin INTERFACE "${CUDA_nvfatbin_LIBRARY}") - elseif(EXISTS "${CUDA_nvfatbin_static_LIBRARY}") + elseif(EXISTS "${CUDA_nvfatbin_static_LIBRARY}") target_link_libraries(caw_fatbin INTERFACE "${CUDA_nvfatbin_static_LIBRARY}") else() message(WARNING "Could not locate a valid NVIDIA fatbin creator target or library file") endif() + if (TARGET CUDA::nvfatbin_static) + target_link_libraries(caw_fatbin_static INTERFACE CUDA::nvfatbin_static) + elseif(EXISTS "${CUDA_nvfatbin_static_LIBRARY}") + target_link_libraries(caw_fatbin_static INTERFACE "${CUDA_nvfatbin_static_LIBRARY}") + else() + message(WARNING "Could not locate a valid NVIDIA non-fatbin creator target or library file") + endif() + if (TARGET CUDA::cufilt) target_link_libraries(caw_runtime-and-driver INTERFACE CUDA::cufilt) elseif(EXISTS "${CUDA_cufilt_LIBRARY}") @@ -112,16 +139,20 @@ if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) else() message(WARNING "Could not locate the cufilt demangling library") endif() - endif() -target_link_libraries(caw_fatbin INTERFACE cuda-api-wrappers::runtime-and-driver) -target_link_libraries(caw_nvtx INTERFACE cuda-api-wrappers::runtime-and-driver) +foreach(tgt_base caw_fatbin caw_nvtx) + foreach(static_suffix "" "_static") + target_link_libraries(${tgt_base}${static_suffix} INTERFACE cuda-api-wrappers::runtime-and-driver${static_suffix}) + endforeach() +endforeach() if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 10.0) target_link_libraries(caw_nvtx INTERFACE CUDA::nvtx3) + target_link_libraries(caw_nvtx_static INTERFACE CUDA::nvtx3) else() target_link_libraries(caw_nvtx INTERFACE CUDA::nvToolsExt) + target_link_libraries(caw_nvtx_static INTERFACE CUDA::nvToolsExt) endif() @@ -195,7 +226,6 @@ write_basic_package_version_file( COMPATIBILITY ${COMPAT_SETTING} ) - install( FILES "${CMAKE_CURRENT_BINARY_DIR}/cuda-api-wrappers-config-version.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/cuda-api-wrappers" diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index bfafd24a..35d33763 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -2,6 +2,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") include(CheckIncludeFiles) option(CAW_WITH_COOPERATIVE_GROUPS "Enable cooperative-groups-related code in example programs" ON) +option(CAW_EXAMPLES_STATIC_DEPS "Have the example programs depend on the static versions of raw CUDA targets" OFF) enable_language(CXX) enable_language(CUDA) @@ -52,22 +53,28 @@ if(CAW_WITH_COOPERATIVE_GROUPS AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "9. message(STATUS "Using cooperative groups in example programs") endif() -link_libraries(cuda-api-wrappers::runtime-and-driver) +if (CAW_EXAMPLES_STATIC_DEPS) + set(static_suffix "_static") +else() + set(static_suffix "") +endif() +link_libraries(cuda-api-wrappers::runtime-and-driver${static_suffix}) +set(rtc_lib_tgt cuda-api-wrappers::rtc${static_suffix}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "bin") add_executable(vectorAdd modified_cuda_samples/vectorAdd/vectorAdd.cu) if (TARGET CUDA::cublas) add_executable(matrixMulCUBLAS modified_cuda_samples/matrixMulCUBLAS/matrixMulCUBLAS.cpp) - target_link_libraries(matrixMulCUBLAS CUDA::cublas) + target_link_libraries(matrixMulCUBLAS CUDA::cublas${static_suffix}) endif() add_executable(vectorAdd_unique_regions modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu) add_executable(vectorAddMapped modified_cuda_samples/vectorAddMapped/vectorAddMapped.cu) add_executable(vectorAddManaged modified_cuda_samples/vectorAddManaged/vectorAddManaged.cu) add_executable(vectorAdd_nvrtc modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp) -target_link_libraries(vectorAdd_nvrtc cuda-api-wrappers::rtc) +target_link_libraries(vectorAdd_nvrtc ${rtc_lib_tgt}) if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.1") add_executable(vectorAdd_ptx modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp) - target_link_libraries(vectorAdd_ptx cuda-api-wrappers::rtc) + target_link_libraries(vectorAdd_ptx ${rtc_lib_tgt}) endif() if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.2") add_executable(streamOrderedAllocation modified_cuda_samples/streamOrderedAllocation/streamOrderedAllocation.cu) @@ -85,7 +92,7 @@ endif() if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.3") # Needs direct cubin access for NVRTC programs add_executable(clock_nvrtc modified_cuda_samples/clock_nvrtc/clock.cpp) - target_link_libraries(clock_nvrtc cuda-api-wrappers::rtc) + target_link_libraries(clock_nvrtc ${rtc_lib_tgt}) endif() add_executable(simpleDrvRuntimePTX modified_cuda_samples/simpleDrvRuntimePTX/simpleDrvRuntimePTX.cpp) add_executable(inlinePTX modified_cuda_samples/inlinePTX/inlinePTX.cu) @@ -203,11 +210,11 @@ add_executable(unified_addressing by_api_module/unified_addressing.cpp) add_executable(io_compute_overlap_with_streams other/io_compute_overlap_with_streams.cu) if( WIN32 AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.0") add_executable(vectorAdd_profiled other/vectorAdd_profiled.cu) - target_link_libraries(vectorAdd_profiled cuda-api-wrappers::nvtx) + target_link_libraries(vectorAdd_profiled cuda-api-wrappers::nvtx${static_suffix}) endif() add_executable(manipulate_current_device other/manipulate_current_device.cu) add_executable(inclusion_in_two_translation_units other/inclusion_in_two_translation_units/main.cpp other/inclusion_in_two_translation_units/second_tu.cpp) -target_link_libraries(inclusion_in_two_translation_units cuda-api-wrappers::rtc cuda-api-wrappers::nvtx) +target_link_libraries(inclusion_in_two_translation_units ${rtc_lib_tgt} cuda-api-wrappers::nvtx${static_suffix}) if( NOT MSVC ) foreach(std_version "14" "17" "20" "23") if("cxx_std_${std_version}" IN_LIST CMAKE_CXX_COMPILE_FEATURES) @@ -216,7 +223,7 @@ if( NOT MSVC ) endif() set(tgt "cpp_${std_version}") add_executable(${tgt} other/new_cpp_standard/main.cpp) - target_link_libraries(${tgt} cuda-api-wrappers::rtc cuda-api-wrappers::nvtx cuda-api-wrappers::fatbin) + target_link_libraries(${tgt} ${rtc_lib_tgt} cuda-api-wrappers::nvtx${static_suffix} cuda-api-wrappers::fatbin${static_suffix}) if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") target_link_libraries(${tgt} stdc++fs) endif() @@ -238,12 +245,12 @@ foreach(hdr ${jitify_headers}) POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${hdr} ${jitify_headers_target_dir}) endforeach() -target_link_libraries(jitify cuda-api-wrappers::rtc) +target_link_libraries(jitify ${rtc_lib_tgt}) if(NOT MSVC) target_link_libraries(jitify stdc++fs) endif() add_executable(module_management by_api_module/module_management.cpp) -target_link_libraries(module_management cuda-api-wrappers::rtc) +target_link_libraries(module_management ${rtc_lib_tgt}) if(NOT "${CMAKE_CUDA_COMPILER_ID}" STREQUAL "Clang")