forked from UoB-HPC/BabelStream
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOMP.cmake
174 lines (143 loc) · 6.21 KB
/
OMP.cmake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Compiler ID for reference (as of CMake 3.13)
# Absoft = Absoft Fortran (absoft.com)
# ADSP = Analog VisualDSP++ (analog.com)
# AppleClang = Apple Clang (apple.com)
# ARMCC = ARM Compiler (arm.com)
# Bruce = Bruce C Compiler
# CCur = Concurrent Fortran (ccur.com)
# Clang = LLVM Clang (clang.llvm.org)
# Cray = Cray Compiler (cray.com)
# Embarcadero, Borland = Embarcadero (embarcadero.com)
# G95 = G95 Fortran (g95.org)
# GNU = GNU Compiler Collection (gcc.gnu.org)
# HP = Hewlett-Packard Compiler (hp.com)
# IAR = IAR Systems (iar.com)
# Intel = Intel Compiler (intel.com)
# MIPSpro = SGI MIPSpro (sgi.com)
# MSVC = Microsoft Visual Studio (microsoft.com)
# NVIDIA = NVIDIA CUDA Compiler (nvidia.com)
# OpenWatcom = Open Watcom (openwatcom.org)
# PGI = The Portland Group (pgroup.com)
# Flang = Flang Fortran Compiler
# PathScale = PathScale (pathscale.com)
# SDCC = Small Device C Compiler (sdcc.sourceforge.net)
# SunPro = Oracle Solaris Studio (oracle.com)
# TI = Texas Instruments (ti.com)
# TinyCC = Tiny C Compiler (tinycc.org)
# XL, VisualAge, zOS = IBM XL (ibm.com)
# These are only added in CMake 3.15:
# ARMClang = ARM Compiler based on Clang (arm.com)
# These are only added in CMake 3.20:
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
# Power9 = ppc64le
# x64 = x86_64
# arm64 = aarch64
#
#predefined offload flags based on compiler id
set(OMP_FLAGS_OFFLOAD_INTEL
-qnextgen -fiopenmp -fopenmp-targets=spir64)
set(OMP_FLAGS_OFFLOAD_GNU_NVIDIA
-foffload=nvptx-none)
set(OMP_FLAGS_OFFLOAD_GNU_AMD
-foffload=amdgcn-amdhsa)
set(OMP_FLAGS_OFFLOAD_CLANG_NVIDIA
-fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda)
set(OMP_FLAGS_OFFLOAD_CLANG_AMD
-fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa)
set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG
-march=) # prefix only, arch appended by the vendor:arch tuple
set(OMP_FLAGS_CPU_INTEL
-qopt-streaming-stores=always)
set(OMP_FLAGS_CPU_GNU_PPC64LE
-mcpu=native)
set(OMP_FLAGS_CPU_XL
-O5 -qarch=auto -qtune=auto)
# NEC
set(OMP_FLAGS_CPU_NEC -O4 -finline)
register_flag_optional(CMAKE_CXX_COMPILER
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
"c++")
register_flag_optional(ARCH
"This overrides CMake's CMAKE_SYSTEM_PROCESSOR detection which uses (uname -p), this is mainly for use with
specialised accelerators only and not to be confused with offload which is is mutually exclusive with this.
Supported values are:
- NEC"
"")
register_flag_optional(OFFLOAD
"Whether to use OpenMP offload, the format is <VENDOR:ARCH?>|ON|OFF.
We support a small set of known offload flags for clang, gcc, and icpx.
However, as offload support is rapidly evolving, we recommend you directly supply them via OFFLOAD_FLAGS.
For example:
* OFFLOAD=NVIDIA:sm_60
* OFFLOAD=AMD:gfx906
* OFFLOAD=INTEL
* OFFLOAD=ON OFFLOAD_FLAGS=..."
OFF)
register_flag_optional(OFFLOAD_FLAGS
"If OFFLOAD is enabled, this *overrides* the default offload flags"
"")
register_flag_optional(OFFLOAD_APPEND_LINK_FLAG
"If enabled, this appends all resolved offload flags (OFFLOAD=<vendor:arch> or directly from OFFLOAD_FLAGS) to the link flags.
This is required for most offload implementations so that offload libraries can linked correctly."
ON)
macro(setup)
find_package(OpenMP REQUIRED)
register_link_library(OpenMP::OpenMP_CXX)
string(TOUPPER ${CMAKE_CXX_COMPILER_ID} COMPILER)
if(NOT ARCH)
string(TOUPPER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
else()
message(STATUS "Using custom arch: ${ARCH}")
endif()
if (("${OFFLOAD}" STREQUAL OFF) OR (NOT DEFINED OFFLOAD))
# no offload
# resolve the CPU specific flags
# starting with ${COMPILER_VENDOR}_${PLATFORM_ARCH}, then try ${COMPILER_VENDOR}, and then give up
register_append_compiler_and_arch_specific_cxx_flags(
OMP_FLAGS_CPU
${COMPILER}
${ARCH}
)
elseif ("${OFFLOAD}" STREQUAL ON)
# offload but with custom flags
register_definitions(OMP_TARGET_GPU)
separate_arguments(OFFLOAD_FLAGS)
set(OMP_FLAGS ${OFFLOAD_FLAGS})
elseif ((DEFINED OFFLOAD) AND OFFLOAD_FLAGS)
# offload but OFFLOAD_FLAGS overrides
register_definitions(OMP_TARGET_GPU)
separate_arguments(OFFLOAD_FLAGS)
list(OMP_FLAGS APPEND ${OFFLOAD_FLAGS})
else ()
# handle the vendor:arch value
string(REPLACE ":" ";" OFFLOAD_TUPLE "${OFFLOAD}")
list(LENGTH OFFLOAD_TUPLE LEN)
if (LEN EQUAL 1)
# offload with <vendor> tuple
list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR)
# append OMP_FLAGS_OFFLOAD_<vendor> if exists
list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${OFFLOAD_VENDOR}})
elseif (LEN EQUAL 2)
# offload with <vendor:arch> tuple
list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR)
list(GET OFFLOAD_TUPLE 1 OFFLOAD_ARCH)
# append OMP_FLAGS_OFFLOAD_<compiler>_<vendor> if exists
list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${COMPILER}_${OFFLOAD_VENDOR}})
# append offload arch if OMP_FLAGS_OFFLOAD_<compiler>_ARCH_FLAG if exists
if (DEFINED OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG)
list(APPEND OMP_FLAGS
"${OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG}${OFFLOAD_ARCH}")
endif ()
else ()
message(FATAL_ERROR "Unrecognised OFFLOAD format: `${OFFLOAD}`, consider directly using OFFLOAD_FLAGS")
endif ()
endif ()
message(STATUS "OMP CXX flags : ${OMP_FLAGS}")
message(STATUS "OMP Link flags : ${OMP_LINK_FLAGS}")
# propagate flags to linker so that it links with the offload stuff as well
register_append_cxx_flags(ANY ${OMP_FLAGS})
if (OFFLOAD_APPEND_LINK_FLAG)
register_append_link_flags(${OMP_FLAGS})
endif ()
endmacro()