forked from intel/pti-gpu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtool.cc
95 lines (77 loc) · 2.31 KB
/
tool.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
//==============================================================
// Copyright (C) Intel Corporation
//
// SPDX-License-Identifier: MIT
// =============================================================
#include <iomanip>
#include <iostream>
#include <set>
#include "cl_metric_collector.h"
static ClMetricCollector* collector = nullptr;
static std::chrono::steady_clock::time_point start;
// External Tool Interface ////////////////////////////////////////////////////
extern "C"
#if defined(_WIN32)
__declspec(dllexport)
#endif
void Usage() {
std::cout <<
"Usage: ./cl_gpu_query[.exe] <application> <args>" <<
std::endl;
}
extern "C"
#if defined(_WIN32)
__declspec(dllexport)
#endif
int ParseArgs(int argc, char* argv[]) {
return 1;
}
extern "C"
#if defined(_WIN32)
__declspec(dllexport)
#endif
void SetToolEnv() {}
// Internal Tool Functionality ////////////////////////////////////////////////
static void PrintResults() {
PTI_ASSERT(collector != nullptr);
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
std::chrono::duration<uint64_t, std::nano> time = end - start;
const ClKernelInfoMap& kernel_map = collector->GetKernelInfoMap();
if (kernel_map.empty()) {
return;
}
uint64_t total_duration = 0;
for (auto& value : kernel_map) {
total_duration += value.second.total_time;
}
PTI_ASSERT(total_duration > 0);
std::cerr << std::endl;
std::cerr << "=== Device Metrics: ===" << std::endl;
std::cerr << std::endl;
std::cerr << "Total Execution Time (ns): " << time.count() << std::endl;
std::cerr << "Total Kernel Time (ns): " << total_duration << std::endl;
std::cerr << std::endl;
ClMetricCollector::PrintKernelsTable(kernel_map);
std::cerr << std::endl;
}
// Internal Tool Interface ////////////////////////////////////////////////////
void EnableProfiling() {
cl_device_id device = utils::cl::GetIntelDevice(CL_DEVICE_TYPE_GPU);
if (device == nullptr) {
std::cerr << "[WARNING] Unable to find target GPU device for tracing" <<
std::endl;
return;
}
collector = ClMetricCollector::Create(device, "ComputeBasic");
if (collector == nullptr) {
return;
}
start = std::chrono::steady_clock::now();
}
void DisableProfiling() {
if (collector != nullptr) {
collector->DisableTracing();
PrintResults();
delete collector;
}
}