Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tpetra: More Kokkos timer injection support #12431

Merged
merged 5 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions packages/tpetra/core/src/Tpetra_Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,10 @@ namespace Tpetra {
#endif // defined(HAVE_TPETRACORE_MPI)
initKokkosIfNeeded (argc, argv, myRank);

// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
// Add Kokkos calls to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
Tpetra::Details::AddKokkosFunctionsToTimeMonitor();
}
tpetraIsInitialized_ = true;
}
Expand All @@ -265,10 +265,10 @@ namespace Tpetra {
#endif // defined(HAVE_TPETRACORE_MPI)
initKokkosIfNeeded (argc, argv, myRank);

// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
// Add Kokkos::deep calls to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
Tpetra::Details::AddKokkosFunctionsToTimeMonitor();
}
tpetraIsInitialized_ = true;

Expand Down Expand Up @@ -312,10 +312,10 @@ namespace Tpetra {
const int myRank = comm->getRank ();
initKokkosIfNeeded (argc, argv, myRank);

// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
// Add Kokkos calls to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
Tpetra::Details::AddKokkosFunctionsToTimeMonitor();
}
tpetraIsInitialized_ = true;
wrappedDefaultComm_ = comm;
Expand Down
13 changes: 13 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,19 @@ bool Behavior::timeKokkosFence()

}

bool Behavior::timeKokkosFunctions()
{
constexpr char envVarName[] = "TPETRA_TIME_KOKKOS_FUNCTIONS";
constexpr bool defaultValue(false);

static bool value_ = defaultValue;
static bool initialized_ = false;
return idempotentlyGetEnvironmentVariableAsBool
(value_, initialized_, envVarName, defaultValue);

}


} // namespace Details
} // namespace Tpetra

7 changes: 7 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,13 @@ class Behavior {
/// <tt>TPETRA_TIME_KOKKOS_FENCE</tt> environment variable.
static bool timeKokkosFence();

/// \brief Add Teuchos timers for all host calls to Kokkos::parallel_for(),
/// Kokkos::parallel_reduce() and Kokkos::parallel_scan().
///
/// This is disabled by default. You may control this at run time via the
/// <tt>TPETRA_TIME_KOKKOS_FUNCTIONS</tt> environment variable.
static bool timeKokkosFunctions();

/// \brief Warn if more than this many Kokkos spaces are accessed.
///
/// This is disabled by default. You may control this at run time via the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,51 @@



namespace {
// Get a useful label from the deviceId
// NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
std::string deviceIdToString(const uint32_t deviceId) {
using namespace Kokkos::Tools::Experimental;
std::string device_label("(");
ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
if (eid.type == DeviceType::Serial) device_label+="Serial";
else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
else if (eid.type == DeviceType::HIP) device_label+="HIP";
else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
else if (eid.type == DeviceType::HPX) device_label+="HPX";
else if (eid.type == DeviceType::Threads) device_label+="Threats";
else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
else device_label+="Unknown to Tpetra";

if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
device_label += " All Instances)";
else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
device_label += " DeepCopyResource)";
else
device_label += " Instance " + std::to_string(eid.instance_id) + ")";

return device_label;
}

void overlappingWarning() {
std::ostringstream warning;
warning <<
"\n*********************************************************************\n"
"WARNING: Overlapping timers detected!\n"
"A TimeMonitor timer was stopped before a nested subtimer was\n"
"stopped. This is not allowed by the StackedTimer. This corner case\n"
"typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
"assigned to a new timer. To disable this warning, either fix the\n"
"ordering of timer creation and destuction or disable the StackedTimer\n";
std::cout << warning.str() << std::endl;
}

}// anonymous space


namespace Tpetra {
namespace Details {

Expand Down Expand Up @@ -103,23 +148,14 @@ namespace Details {
stackedTimer->stop(timer_->name());
}
catch (std::runtime_error&) {
std::ostringstream warning;
warning <<
"\n*********************************************************************\n"
"WARNING: Overlapping timers detected!\n"
"A TimeMonitor timer was stopped before a nested subtimer was\n"
"stopped. This is not allowed by the StackedTimer. This corner case\n"
"typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
"assigned to a new timer. To disable this warning, either fix the\n"
"ordering of timer creation and destuction or disable the StackedTimer\n";
std::cout << warning.str() << std::endl;
overlappingWarning();
Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
}
#endif
}

}
timer_ = Teuchos::null;
}

}// end DeepCopyTimerInjection

void AddKokkosDeepCopyToTimeMonitor(bool force) {
Expand All @@ -129,10 +165,10 @@ namespace Details {
Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
DeepCopyTimerInjection::initialized_=true;

}
}
}


namespace FenceTimerInjection {
Teuchos::RCP<Teuchos::Time> timer_;
Expand All @@ -148,32 +184,7 @@ namespace Details {
active_handle = (active_handle+1) % 1024;
*handle = active_handle;

// Get a useful label from the deviceId
// NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
std::string device_label("(");
{
using namespace Kokkos::Tools::Experimental;

ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
if (eid.type == DeviceType::Serial) device_label+="Serial";
else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
else if (eid.type == DeviceType::HIP) device_label+="HIP";
else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
else if (eid.type == DeviceType::HPX) device_label+="HPX";
else if (eid.type == DeviceType::Threads) device_label+="Threats";
else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
else device_label+="Unknown to Tpetra";

if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
device_label += " All Instances)";
else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
device_label += " DeepCopyResource)";
else
device_label += " Instance " + std::to_string(eid.instance_id) + ")";
}
std::string device_label = deviceIdToString(deviceId);

timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::fence ")+name + " " + device_label);
timer_->start();
Expand All @@ -198,23 +209,12 @@ namespace Details {
stackedTimer->stop(timer_->name());
}
catch (std::runtime_error&) {
std::ostringstream warning;
warning <<
"\n*********************************************************************\n"
"WARNING: Overlapping timers detected!\n"
"A TimeMonitor timer was stopped before a nested subtimer was\n"
"stopped. This is not allowed by the StackedTimer. This corner case\n"
"typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
"assigned to a new timer. To disable this warning, either fix the\n"
"ordering of timer creation and destuction or disable the StackedTimer\n";
std::cout << warning.str() << std::endl;
overlappingWarning();
Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
}
#endif
}

timer_ = Teuchos::null;

}
timer_ = Teuchos::null;
}
// Else: We've nested our fences, and we need to ignore the inner fences
}
Expand All @@ -233,6 +233,77 @@ namespace Details {
}


namespace FunctionsTimerInjection {
Teuchos::RCP<Teuchos::Time> timer_;
bool initialized_ = false;

void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
uint64_t* kernelID) {
// Nested fences are not allowed
if(timer_ != Teuchos::null)
return;
std::string device_label = deviceIdToString(devID);

timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::")+ kernelName + " " +kernelPrefix + " " + device_label);
timer_->start();
timer_->incrementNumCalls();
#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
if (nonnull(stackedTimer))
stackedTimer->start(timer_->name());
#endif

}

void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
kokkosp_begin_kernel("parallel_for",kernelPrefix,devID,kernelID);
}

void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
kokkosp_begin_kernel("parallel_scan",kernelPrefix,devID,kernelID);
}

void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
kokkosp_begin_kernel("parallel_reduce",kernelPrefix,devID,kernelID);
}

void kokkosp_end_kernel(const uint64_t handle) {
if (timer_ != Teuchos::null) {
timer_->stop();
#ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
try {
const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
if (nonnull(stackedTimer))
stackedTimer->stop(timer_->name());
}
catch (std::runtime_error&) {
overlappingWarning();
Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
}
#endif
}

timer_ = Teuchos::null;
}
}//end FunctionsInjection

void AddKokkosFunctionsToTimeMonitor(bool force) {
if (!FunctionsTimerInjection::initialized_) {
if (force || Tpetra::Details::Behavior::timeKokkosFunctions()) {
Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);

// The end-call is generic, even though the start-call is not.
Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
FunctionsTimerInjection::initialized_=true;
}
}
}



} // namespace Details
} // namespace Tpetra
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ namespace Details {
// This is used for unit testing the capability
void AddKokkosFenceToTimeMonitor(bool force = false);

// The force option overrides the environment variable control via TPETRA_TIME_KOKKOS_FUNCTIONS
// This is used for unit testing the capability
void AddKokkosFunctionsToTimeMonitor(bool force = false);


} // namespace Details
} // namespace Tpetra

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ initializeKokkos ()
std::atexit (finalizeKokkosIfNeeded);

}
// Add Kokkos::deep_copy() to the TimeMonitor if the environment says so
// Add Kokkos calls to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosDeepCopyToTimeMonitor();
// Add Kokkos::fence() to the TimeMonitor if the environment says so
Tpetra::Details::AddKokkosFenceToTimeMonitor();
Tpetra::Details::AddKokkosFunctionsToTimeMonitor();
}

} // namespace Details
Expand Down