From ed01cb2b2a325e70533fa44fb1f4db3d686b1e5f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 30 Dec 2024 01:28:47 +0000 Subject: [PATCH] Allow use of benchmark scheduler within benchmarking pipeline This patch adds support for using the newly added benchmark scheduler classes within the benchmarking pipeline to assign a core to be used for benchmarking. Pull Request: https://github.com/google/gematria/pull/273 --- gematria/datasets/pipelines/BUILD.bazel | 3 ++ gematria/datasets/pipelines/benchmark_bbs.py | 16 +++++++++- .../datasets/pipelines/benchmark_bbs_lib.py | 29 +++++++++++++++---- .../pipelines/benchmark_bbs_lib_test.py | 9 ++++-- .../pipelines/benchmark_cpu_scheduler.py | 17 +++++++++++ 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/gematria/datasets/pipelines/BUILD.bazel b/gematria/datasets/pipelines/BUILD.bazel index 954bb2f1..c7fbcdd6 100644 --- a/gematria/datasets/pipelines/BUILD.bazel +++ b/gematria/datasets/pipelines/BUILD.bazel @@ -53,6 +53,7 @@ gematria_py_binary( name = "benchmark_bbs_lib", srcs = ["benchmark_bbs_lib.py"], deps = [ + ":benchmark_cpu_scheduler", "//gematria/datasets/python:exegesis_benchmark", "//gematria/proto:execution_annotation_py_pb2", ], @@ -63,6 +64,7 @@ gematria_py_binary( srcs = ["benchmark_bbs.py"], deps = [ ":benchmark_bbs_lib", + ":benchmark_cpu_scheduler", ], ) @@ -77,6 +79,7 @@ gematria_py_test( ], deps = [ ":benchmark_bbs_lib", + ":benchmark_cpu_scheduler", "//gematria/io/python:tfrecord", "//gematria/proto:execution_annotation_py_pb2", ], diff --git a/gematria/datasets/pipelines/benchmark_bbs.py b/gematria/datasets/pipelines/benchmark_bbs.py index 9e28ee55..d79f61fa 100644 --- a/gematria/datasets/pipelines/benchmark_bbs.py +++ b/gematria/datasets/pipelines/benchmark_bbs.py @@ -20,6 +20,7 @@ from apache_beam.options import pipeline_options from gematria.datasets.pipelines import benchmark_bbs_lib +from gematria.datasets.pipelines import benchmark_cpu_scheduler _INPUT_FILE_PATTERN = flags.DEFINE_string( 'input_file_pattern', @@ -30,6 +31,15 @@ _OUTPUT_FILE_PATTERN = flags.DEFINE_string( 'output_file_pattern', None, 'The output file path/pattern.', required=True ) +_BENCHMARK_SCHEDULER = flags.DEFINE_enum( + 'benchmark_scheduler', + 'NoScheduling', + [ + scheduler_type.name + for scheduler_type in benchmark_cpu_scheduler.BenchmarkSchedulerImplementations + ], + 'The scheduler to use for choosing a core for running benchmarks.', +) def main(argv) -> None: @@ -39,7 +49,11 @@ def main(argv) -> None: beam_options = pipeline_options.PipelineOptions() pipeline_constructor = benchmark_bbs_lib.benchmark_bbs( - _INPUT_FILE_PATTERN.value, _OUTPUT_FILE_PATTERN.value + _INPUT_FILE_PATTERN.value, + _OUTPUT_FILE_PATTERN.value, + benchmark_cpu_scheduler.BenchmarkSchedulerImplementations[ + _BENCHMARK_SCHEDULER.value + ], ) with beam.Pipeline(options=beam_options) as pipeline: diff --git a/gematria/datasets/pipelines/benchmark_bbs_lib.py b/gematria/datasets/pipelines/benchmark_bbs_lib.py index 035a2935..eb3300cb 100644 --- a/gematria/datasets/pipelines/benchmark_bbs_lib.py +++ b/gematria/datasets/pipelines/benchmark_bbs_lib.py @@ -20,6 +20,7 @@ from gematria.proto import execution_annotation_pb2 from gematria.datasets.python import exegesis_benchmark +from gematria.datasets.pipelines import benchmark_cpu_scheduler _BEAM_METRIC_NAMESPACE_NAME = 'benchmark_bbs' @@ -27,8 +28,11 @@ class BenchmarkBasicBlock(beam.DoFn): """A Beam function that benchmarks basic blocks.""" - def setup(self): - self._exegesis_benchmark = exegesis_benchmark.ExegesisBenchmark.create() + def __init__( + self, + benchmark_scheduler_type: benchmark_cpu_scheduler.BenchmarkSchedulerImplementations, + ): + self._benchmark_scheduler_type = benchmark_scheduler_type self._benchmark_success_blocks = metrics.Metrics.counter( _BEAM_METRIC_NAMESPACE_NAME, 'benchmark_bbs_success' ) @@ -36,6 +40,17 @@ def setup(self): _BEAM_METRIC_NAMESPACE_NAME, 'benchmark_blocks_failed' ) + def setup(self): + self._exegesis_benchmark = exegesis_benchmark.ExegesisBenchmark.create() + self._benchmark_scheduler = ( + benchmark_cpu_scheduler.construct_benchmark_scheduler( + self._benchmark_scheduler_type + ) + ) + self._benchmarking_core = ( + self._benchmark_scheduler.setup_and_get_benchmark_core() + ) + def process( self, block_with_annotations: execution_annotation_pb2.BlockWithExecutionAnnotations, @@ -44,8 +59,10 @@ def process( benchmark_code = self._exegesis_benchmark.process_annotated_block( block_with_annotations ) + + self._benchmark_scheduler.verify() benchmark_value = self._exegesis_benchmark.benchmark_basic_block( - benchmark_code + benchmark_code, self._benchmarking_core ) self._benchmark_success_blocks.inc() yield (block_with_annotations.block_hex, benchmark_value) @@ -65,7 +82,9 @@ def process( def benchmark_bbs( - input_file_pattern: str, output_file_pattern: str + input_file_pattern: str, + output_file_pattern: str, + benchmark_scheduler_type: benchmark_cpu_scheduler.BenchmarkSchedulerImplementations, ) -> Callable[[beam.Pipeline], None]: """Creates a pipeline to benchmark BBs.""" @@ -78,7 +97,7 @@ def pipeline(root: beam.Pipeline) -> None: ) annotated_bbs_shuffled = annotated_bbs | 'Shuffle' >> beam.Reshuffle() benchmarked_blocks = annotated_bbs_shuffled | 'Benchmarking' >> beam.ParDo( - BenchmarkBasicBlock() + BenchmarkBasicBlock(benchmark_scheduler_type) ) formatted_output = benchmarked_blocks | 'Formatting' >> beam.ParDo( FormatBBsForOutput() diff --git a/gematria/datasets/pipelines/benchmark_bbs_lib_test.py b/gematria/datasets/pipelines/benchmark_bbs_lib_test.py index 6ab41f5e..2b869c1d 100644 --- a/gematria/datasets/pipelines/benchmark_bbs_lib_test.py +++ b/gematria/datasets/pipelines/benchmark_bbs_lib_test.py @@ -21,6 +21,7 @@ from gematria.datasets.pipelines import benchmark_bbs_lib from gematria.proto import execution_annotation_pb2 from gematria.io.python import tfrecord +from gematria.datasets.pipelines import benchmark_cpu_scheduler BLOCK_FOR_TESTING = execution_annotation_pb2.BlockWithExecutionAnnotations( execution_annotations=execution_annotation_pb2.ExecutionAnnotations( @@ -45,7 +46,9 @@ class BenchmarkBBsTests(absltest.TestCase): def test_benchmark_basic_block(self): - benchmark_transform = benchmark_bbs_lib.BenchmarkBasicBlock() + benchmark_transform = benchmark_bbs_lib.BenchmarkBasicBlock( + benchmark_cpu_scheduler.BenchmarkSchedulerImplementations.NoScheduling + ) benchmark_transform.setup() block_outputs = list(benchmark_transform.process(BLOCK_FOR_TESTING)) @@ -74,7 +77,9 @@ def test_benchmark_bbs(self): output_file_pattern = os.path.join(output_folder, 'bhive-output') pipeline_constructor = benchmark_bbs_lib.benchmark_bbs( - test_tfrecord.full_path, output_file_pattern + test_tfrecord.full_path, + output_file_pattern, + benchmark_cpu_scheduler.BenchmarkSchedulerImplementations.NoScheduling, ) with test_pipeline.TestPipeline() as pipeline_under_test: diff --git a/gematria/datasets/pipelines/benchmark_cpu_scheduler.py b/gematria/datasets/pipelines/benchmark_cpu_scheduler.py index ecaba7c5..0bd370e6 100644 --- a/gematria/datasets/pipelines/benchmark_cpu_scheduler.py +++ b/gematria/datasets/pipelines/benchmark_cpu_scheduler.py @@ -16,6 +16,7 @@ from collections.abc import Iterable import os import re +from enum import Enum class BenchmarkScheduler(ABC): @@ -95,3 +96,19 @@ def verify(self): cpu_mask = list(os.sched_getaffinity(0)) if self._cpu_mask != cpu_mask: raise ValueError('Expected the CPU mask to not change.') + + +class BenchmarkSchedulerImplementations(Enum): + NoScheduling = 1 + Default = 2 + + +def construct_benchmark_scheduler( + scheduler_type: BenchmarkSchedulerImplementations, +) -> BenchmarkScheduler: + if scheduler_type == BenchmarkSchedulerImplementations.NoScheduling: + return NoSchedulingBenchmarkScheduler() + elif scheduler_type == BenchmarkSchedulerImplementations.Default: + return DefaultBenchmarkScheduler() + else: + raise ValueError('Unexpected Benchmark Scheduler Type.')