Benchmarks: add tags to csv output

* Unify/standardize a few tags * Update ruby perf impact script to use names instead of column indices
celerity · Oct 24, 2023 · efd0fb2 · efd0fb2
1 parent 0bde656
commit efd0fb2
Show file tree

Hide file tree

Showing 7 changed files with 320 additions and 310 deletions.
diff --git a/ci/check-perf-impact.rb b/ci/check-perf-impact.rb
@@ -5,9 +5,9 @@
 
 # information regarding the benchmark file
 BENCH_FN = 'ci/perf/gpuc2_bench.csv'
-NAME_COL_1 = 0     # first name column
-NAME_COL_2 = 1     # second name column
-RAW_DATA_COL = 11  # raw data column (array of runs)
+NAME_COL_1 = "test case"       # first name column
+NAME_COL_2 = "benchmark name"  # second name column
+RAW_DATA_COL = "raw"           # raw data column (array of runs)
 
 # customizing chart generation
 MAX_CHARTS_PER_IMAGE = 10   # maximum number of comparisons in a single image
@@ -67,9 +67,9 @@ def get_data_for_version(version = nil)
     throw "failed git checkout (cmd: #{cmd})!" unless $?.success?
   end
   # read the data
-  data = CSV.read(BENCH_FN)
+  data = CSV.read(BENCH_FN, headers: true)
   bench_data_map = {}
-  data[1..].each do |row|
+  data.each do |row|
     raw_data = row[RAW_DATA_COL].delete_prefix('"').delete_suffix('"').split(",").map(&:to_f)
     bench_data_map[row[NAME_COL_1]+" / "+row[NAME_COL_2]] = raw_data
   end
@@ -78,7 +78,7 @@ def get_data_for_version(version = nil)
     `git restore --staged #{BENCH_FN}`
     `git restore #{BENCH_FN}`
   end
-  # return the geometric mean, benchmark data map, and full data set
+  # return the benchmark data map, and full data set
   return bench_data_map, data
 end
 

diff --git a/ci/perf/gpuc2_bench.csv b/ci/perf/gpuc2_bench.csv
diff --git a/ci/perf/gpuc2_bench.md b/ci/perf/gpuc2_bench.md
diff --git a/test/benchmark_reporters.cc b/test/benchmark_reporters.cc
@@ -106,7 +106,7 @@ class benchmark_csv_reporter : public benchmark_reporter_base {
 
 	void testRunStarting(const Catch::TestRunInfo& test_run_info) override {
 		benchmark_reporter_base::testRunStarting(test_run_info);
-		fmt::print(m_stream, "test case,benchmark name,samples,iterations,estimated,mean,low mean,high mean,std dev,low std dev,high std dev,raw\n");
+		fmt::print(m_stream, "test case,benchmark name,samples,iterations,estimated,mean,low mean,high mean,std dev,low std dev,high std dev,tags,raw\n");
 	}
 
 	void benchmarkEnded(const Catch::BenchmarkStats<>& benchmark_stats) override {
@@ -116,6 +116,11 @@ class benchmark_csv_reporter : public benchmark_reporter_base {
 		fmt::print(m_stream, "{:.4f},{:.4f},{:.4f},{:.4f},{:.4f},{:.4f},", benchmark_stats.mean.point.count(), benchmark_stats.mean.lower_bound.count(),
 		    benchmark_stats.mean.upper_bound.count(), benchmark_stats.standardDeviation.point.count(), benchmark_stats.standardDeviation.lower_bound.count(),
 		    benchmark_stats.standardDeviation.upper_bound.count());
+		// Print the benchmark tags for tool-based processing and categorization (as quoted comma-separated values)
+		const auto& tci = currentTestCaseInfo;
+		std::vector<std::string> tags;
+		std::transform(tci->tags.cbegin(), tci->tags.cend(), std::back_inserter(tags), [](const Catch::Tag& t) { return std::string(t.original); });
+		fmt::print(m_stream, "\"{}\",", fmt::join(tags, ","));
 		// Finally print all raw values for custom analyses (as quoted comma-separated values).
 		std::vector<double> raw;
 		raw.reserve(benchmark_stats.samples.size());

diff --git a/test/dag_benchmarks.cc b/test/dag_benchmarks.cc
@@ -17,7 +17,7 @@ using namespace std::chrono_literals;
 struct bench_graph_node : intrusive_graph_node<bench_graph_node> {};
 
 // try to cover the dependency counts we'll see in practice
-TEMPLATE_TEST_CASE_SIG("benchmark intrusive graph dependency handling with N nodes", "[benchmark][intrusive_graph_node]", ((int N), N), 1, 10, 100) {
+TEMPLATE_TEST_CASE_SIG("benchmark intrusive graph dependency handling with N nodes", "[benchmark][group:graph-nodes]", ((int N), N), 1, 10, 100) {
 	// note that bench_graph_nodes are created/destroyed *within* the BENCHMARK
 	// in the first two cases while the latter 2 cases only operate on already
 	// existing nodes -- this is intentional; both cases are relevant in practise
@@ -60,7 +60,7 @@ TEMPLATE_TEST_CASE_SIG("benchmark intrusive graph dependency handling with N nod
 	};
 }
 
-TEST_CASE("benchmark task handling", "[benchmark][task]") {
+TEST_CASE("benchmark task handling", "[benchmark][group:task-graph]") {
 	using namespace std::chrono_literals;
 	std::unique_ptr<task_manager> tm;
 
@@ -433,15 +433,16 @@ void run_benchmarks(BenchmarkContextFactory&& make_ctx) {
 	BENCHMARK("jacobi topology") { generate_jacobi_graph(make_ctx(), 50); };
 }
 
-TEST_CASE("generating large task graphs", "[benchmark][task-graph]") {
+TEST_CASE("generating large task graphs", "[benchmark][group:task-graph]") {
 	run_benchmarks([] { return task_manager_benchmark_context{}; });
 }
 
-TEMPLATE_TEST_CASE_SIG("generating large command graphs for N nodes", "[benchmark][command-graph]", ((size_t NumNodes), NumNodes), 1, 4, 16) {
+TEMPLATE_TEST_CASE_SIG("generating large command graphs for N nodes", "[benchmark][group:command-graph]", ((size_t NumNodes), NumNodes), 1, 4, 16) {
 	run_benchmarks([] { return graph_generator_benchmark_context{NumNodes}; });
 }
 
-TEMPLATE_TEST_CASE_SIG("building command graphs in a dedicated scheduler thread for N nodes", "[benchmark][scheduler]", ((size_t NumNodes), NumNodes), 1, 4) {
+TEMPLATE_TEST_CASE_SIG(
+    "building command graphs in a dedicated scheduler thread for N nodes", "[benchmark][group:scheduler]", ((size_t NumNodes), NumNodes), 1, 4) {
 	SECTION("reference: single-threaded immediate graph generation") {
 		run_benchmarks([&] { return graph_generator_benchmark_context{NumNodes}; });
 	}

diff --git a/test/grid_benchmarks.cc b/test/grid_benchmarks.cc
@@ -31,7 +31,7 @@ box_vector<Dims> create_random_boxes(const size_t grid_size, const size_t max_bo
 	return boxes;
 }
 
-TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][grid]") {
+TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][group:grid]") {
 	const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
 	    {"small", 10, 5, 4},
 	    {"medium", 50, 1, 50},
@@ -52,7 +52,7 @@ TEST_CASE("normalizing randomized box sets - 2d", "[benchmark][grid]") {
 	test_utils::render_boxes(normalized_2d, fmt::format("{}-normalized", label));
 }
 
-TEST_CASE("normalizing randomized box sets - 3d", "[benchmark][grid]") {
+TEST_CASE("normalizing randomized box sets - 3d", "[benchmark][group:grid]") {
 	const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
 	    {"small", 10, 5, 4},
 	    {"medium", 50, 1, 50},
@@ -85,7 +85,7 @@ box_vector<Dims> create_box_tiling(const size_t n_per_side) {
 	return boxes;
 }
 
-TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchmark][grid]", ((int Dims), Dims), 1, 2, 3) {
+TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchmark][group:grid]", ((int Dims), Dims), 1, 2, 3) {
 	const auto [label, n] = GENERATE(values<std::tuple<const char*, size_t>>({
 	    {"small", 4},
 	    {"medium", 50},
@@ -111,7 +111,7 @@ TEMPLATE_TEST_CASE_SIG("normalizing a fully mergeable tiling of boxes", "[benchm
 	}
 }
 
-TEST_CASE("performing set operations between randomized regions - 2d", "[benchmark][grid]") {
+TEST_CASE("performing set operations between randomized regions - 2d", "[benchmark][group:grid]") {
 	const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
 	    {"small", 10, 5, 4},
 	    {"medium", 50, 1, 50},
@@ -148,7 +148,7 @@ TEST_CASE("performing set operations between randomized regions - 2d", "[benchma
 	test_utils::render_boxes(difference_2d.get_boxes(), fmt::format("difference-{}", label));
 }
 
-TEST_CASE("performing set operations between randomized regions - 3d", "[benchmark][grid]") {
+TEST_CASE("performing set operations between randomized regions - 3d", "[benchmark][group:grid]") {
 	const auto [label, grid_size, max_box_size, num_boxes] = GENERATE(values<std::tuple<const char*, size_t, size_t, size_t>>({
 	    {"small", 10, 5, 4},
 	    {"medium", 50, 1, 50},
@@ -177,7 +177,7 @@ box_vector<2> create_interlocking_boxes(const size_t num_boxes_per_side) {
 	return boxes;
 }
 
-TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[benchmark][grid]") {
+TEST_CASE("normalizing a fully mergeable, complex tiling of boxes - 2d", "[benchmark][group:grid]") {
 	const auto [label, n] = GENERATE(values<std::tuple<const char*, size_t>>({
 	    {"small", 10},
 	    {"large", 200},

diff --git a/test/system_benchmarks.cc b/test/system_benchmarks.cc
@@ -13,7 +13,7 @@ template <int Dims>
 class bench_runtime_fixture : public test_utils::runtime_fixture {};
 
 TEMPLATE_TEST_CASE_METHOD_SIG(
-    bench_runtime_fixture, "benchmark independent task pattern with N tasks", "[benchmark][system-benchmarks][indep-tasks]", ((int N), N), 100, 1000, 5000) {
+    bench_runtime_fixture, "benchmark independent task pattern with N tasks", "[benchmark][group:system][indep-tasks]", ((int N), N), 100, 1000, 5000) {
 	constexpr size_t num_tasks = N;
 	constexpr size_t num_repeats = 2;
 	constexpr size_t items_per_task = 256;
@@ -69,7 +69,7 @@ TEMPLATE_TEST_CASE_METHOD_SIG(
 }
 
 TEMPLATE_TEST_CASE_METHOD_SIG(
-    bench_runtime_fixture, "benchmark stencil pattern with N time steps", "[benchmark][system-benchmarks][stencil]", ((int N), N), 50, 1000) {
+    bench_runtime_fixture, "benchmark stencil pattern with N time steps", "[benchmark][group:system][stencil]", ((int N), N), 50, 1000) {
 	constexpr size_t num_iterations = N;
 	constexpr int side_length = 128; // sufficiently small to notice large-scale changes in runtime overhead