From 4746aedee878abd662590d76b0d5d7ff3151e3f2 Mon Sep 17 00:00:00 2001 From: Viraj Shah Date: Tue, 26 Nov 2024 19:17:45 +0530 Subject: [PATCH 1/2] Remove unnecessary `perf` information from `AnnotatingImporter` state. * Pass pointers to the perf profile proto and the main memory mapping instead of storing them as class members. * The `PerfParser` also does not need to be held on to - it is used only once. * The `PerfReader` must be held on to since it owns all of the perf related data. --- gematria/datasets/annotating_importer.cc | 97 ++++++++++++++---------- gematria/datasets/annotating_importer.h | 40 ++++++---- 2 files changed, 81 insertions(+), 56 deletions(-) diff --git a/gematria/datasets/annotating_importer.cc b/gematria/datasets/annotating_importer.cc index 82107443..94f3de10 100644 --- a/gematria/datasets/annotating_importer.cc +++ b/gematria/datasets/annotating_importer.cc @@ -37,7 +37,6 @@ #include "gematria/proto/throughput.pb.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" @@ -51,42 +50,50 @@ namespace gematria { AnnotatingImporter::AnnotatingImporter(const Canonicalizer *canonicalizer) - : importer_(canonicalizer), perf_parser_(&perf_reader_) { - quipper::PerfParserOptions parser_opts; - parser_opts.do_remap = true; - parser_opts.discard_unused_events = true; - parser_opts.sort_events_by_time = false; - perf_parser_.set_options(parser_opts); -} + : importer_(canonicalizer) {} -absl::Status AnnotatingImporter::LoadPerfData(std::string_view file_name) { +absl::StatusOr AnnotatingImporter::LoadPerfData( + std::string_view file_name) { // Read and parse the `perf.data`-like file into something more tractable. if (!perf_reader_.ReadFile(std::string(file_name))) { return absl::InvalidArgumentError(absl::StrFormat( "The given `perf.data`-like file (%s) could not be read.", file_name)); } - if (!perf_parser_.ParseRawEvents()) { + + quipper::PerfParser perf_parser( + &perf_reader_, quipper::PerfParserOptions{.do_remap = true, + .discard_unused_events = true, + .sort_events_by_time = false}); + if (!perf_parser.ParseRawEvents()) { return absl::InvalidArgumentError(absl::StrFormat( "The given `perf.data`-like file (%s) could not be parsed.", file_name)); } + return &perf_reader_.proto(); +} + +absl::StatusOr +AnnotatingImporter::GetMainMapping( + const llvm::object::ELFObjectFileBase *elf_object, + const quipper::PerfDataProto *perf_data) { // Find the relevant mapping. // TODO(virajbshah): Make sure the mapping was found. (Use num_mmap_events) - const quipper::PerfDataProto &perf_data_proto = perf_reader_.proto(); - for (const auto &event : perf_data_proto.events()) { + for (const auto &event : perf_data->events()) { // TODO(virajbshah): Not sure if this always works, i.e. does the main // binary always correspond to the first MMapEvent. Implement BuildID or // name based checking. if (event.has_mmap_event() && - event.mmap_event().prot() & 1 /* PROT_READ */ && - event.mmap_event().prot() & 4 /* PROT_EXEC */) { - main_mapping_ = event.mmap_event(); - break; + event.mmap_event().prot() & 0b001 /* PROT_READ */ && + event.mmap_event().prot() & 0b100 /* PROT_EXEC */) { + return &event.mmap_event(); } } - return absl::OkStatus(); + return absl::InvalidArgumentError(absl::StrFormat( + "The given `perf.data`-like file does not have a mapping corresponding " + "to the given object (%s).", + elf_object->getFileName())); } absl::StatusOr> @@ -211,23 +218,24 @@ AnnotatingImporter::GetBlocksFromELF( absl::StatusOr, std::unordered_map>>> -AnnotatingImporter::GetSamples() { - const quipper::PerfDataProto &perf_data_proto = perf_reader_.proto(); - const uint64_t mmap_begin_addr = main_mapping_.start(); - const uint64_t mmap_end_addr = main_mapping_.start() + main_mapping_.len(); +AnnotatingImporter::GetSamples( + const quipper::PerfDataProto *perf_data, + const quipper::PerfDataProto_MMapEvent *mapping) { + const uint64_t mmap_begin_addr = mapping->start(); + const uint64_t mmap_end_addr = mmap_begin_addr + mapping->len(); // Extract event type information, - const int num_sample_types = perf_data_proto.event_types_size(); + const int num_sample_types = perf_data->event_types_size(); std::vector sample_types(num_sample_types); std::unordered_map event_code_to_idx; for (int sample_type_idx = 0; sample_type_idx < num_sample_types; ++sample_type_idx) { - const auto &event_type = perf_data_proto.event_types()[sample_type_idx]; + const auto &event_type = perf_data->event_types()[sample_type_idx]; sample_types[sample_type_idx] = event_type.name(); event_code_to_idx[event_type.id()] = sample_type_idx; } std::unordered_map event_id_to_code; - for (const auto &event_type : perf_data_proto.file_attrs()) { + for (const auto &event_type : perf_data->file_attrs()) { // Mask out bits identifying the PMU and not the event. int event_code = event_type.attr().config() & 0xffff; for (int event_id : event_type.ids()) { @@ -243,7 +251,7 @@ AnnotatingImporter::GetSamples() { // Process sample events. std::unordered_map> samples; - for (const auto &event : perf_data_proto.events()) { + for (const auto &event : perf_data->events()) { // Filter out non-sample events. if (!event.has_sample_event()) { continue; @@ -271,13 +279,14 @@ AnnotatingImporter::GetSamples() { absl::StatusOr, std::vector>>> AnnotatingImporter::GetLBRBlocksWithLatency( - const llvm::object::ELFObjectFileBase *elf_object) { + const llvm::object::ELFObjectFileBase *elf_object, + const quipper::PerfDataProto *perf_data, + const quipper::PerfDataProto_MMapEvent *mapping) { // TODO(vbshah): Refactor this and other parameters as function arguments. constexpr int kMaxBlockSizeBytes = 65536; - const quipper::PerfDataProto &perf_data_proto = perf_reader_.proto(); - const uint64_t mmap_begin_addr = main_mapping_.start(); - const uint64_t mmap_end_addr = main_mapping_.start() + main_mapping_.len(); + const uint64_t mmap_begin_addr = mapping->start(); + const uint64_t mmap_end_addr = mmap_begin_addr + mapping->len(); // TODO(vbshah): Consider making it possible to use other ELFTs rather than // only ELF64LE since only the implementation of GetMainProgramHeader differs @@ -306,7 +315,7 @@ AnnotatingImporter::GetLBRBlocksWithLatency( std::unordered_map, int, absl::Hash>> index_map; - for (const auto &event : perf_data_proto.events()) { + for (const auto &event : perf_data->events()) { if (!event.has_sample_event() || !event.sample_event().branch_stack_size()) { continue; @@ -317,8 +326,8 @@ AnnotatingImporter::GetLBRBlocksWithLatency( const auto &branch_entry = branch_stack[branch_idx + 1]; const auto &next_branch_entry = branch_stack[branch_idx]; - uint64_t block_begin = branch_entry.to_ip(), - block_end = next_branch_entry.from_ip(); + const uint64_t block_begin = branch_entry.to_ip(); + const uint64_t block_end = next_branch_entry.from_ip(); // Simple validity checks: the block must start before it ends and cannot // be larger than some threshold. @@ -357,30 +366,38 @@ absl::StatusOr> AnnotatingImporter::GetAnnotatedBasicBlockProtos( std::string_view elf_file_name, std::string_view perf_data_file_name, std::string_view source_name) { + // Try to load the binary and cast it down to an ELF object. absl::StatusOr> owning_binary = LoadBinary(elf_file_name); if (!owning_binary.ok()) { return owning_binary.status(); } - absl::Status status = LoadPerfData(perf_data_file_name); - if (!status.ok()) { - return status; - } - - // Try to cast the binary down to an ELF object. const auto elf_object = GetELFFromBinary(owning_binary->getBinary()); if (!elf_object.ok()) { return elf_object.status(); } + // Try to load the perf profile and locate its main mapping, i.e. the one + // corresponding to the executable load segment of the given object file. + absl::StatusOr perf_data = + LoadPerfData(perf_data_file_name); + if (!perf_data.ok()) { + return perf_data.status(); + } + auto main_mapping = GetMainMapping(*elf_object, *perf_data); + if (!main_mapping.ok()) { + return main_mapping.status(); + } + // Get the raw basic blocks, perf samples, and LBR data for annotation. absl::StatusOr, std::vector>>> - basic_blocks = GetLBRBlocksWithLatency(*elf_object); + basic_blocks = + GetLBRBlocksWithLatency(*elf_object, *perf_data, *main_mapping); if (!basic_blocks.ok()) { return basic_blocks.status(); } - const auto sample_types_and_samples = GetSamples(); + const auto sample_types_and_samples = GetSamples(*perf_data, *main_mapping); if (!sample_types_and_samples.ok()) { return sample_types_and_samples.status(); } diff --git a/gematria/datasets/annotating_importer.h b/gematria/datasets/annotating_importer.h index 65f58bed..d44d35ec 100644 --- a/gematria/datasets/annotating_importer.h +++ b/gematria/datasets/annotating_importer.h @@ -59,9 +59,16 @@ class AnnotatingImporter { std::string_view source_name); private: - // Loads a `perf.data`-like file into the importer. Must be called before - // `GetSamples`, `GetLBRData`, and `GetLBRBlocksWithLatency`. - absl::Status LoadPerfData(std::string_view file_name); + // Loads a `perf.data`-like file for use by the importer. The returned pointer + // is valid only as long as this instance of `AnnotatingImporter` is alive. + absl::StatusOr LoadPerfData( + std::string_view file_name); + + // Searches all MMap events for the one that most likely corresponds to the + // executable load segment of the given object. + absl::StatusOr GetMainMapping( + const llvm::object::ELFObjectFileBase* elf_object, + const quipper::PerfDataProto* perf_data); // Loads a binary into for use by the importer. absl::StatusOr> LoadBinary( @@ -91,28 +98,29 @@ class AnnotatingImporter { absl::StatusOr>> GetBlocksFromELF(const llvm::object::ELFObjectFileBase* elf_object); - // Extracts samples from the `perf.data`-file loaded using `LoadPerfData`, - // usually obtained from `perf record`. Returns a {`sample_types`, `samples`} - // pair. `sample_types` is a vector of sample type names, while `samples` is - // a mapping between sample addresses and the corresponding sample values. + // Extracts samples belonging to `mapping` from the `perf_data`. Returns a + // {`sample_types`, `samples`} pair. `sample_types` is a vector of sample + // type names, while `samples` is a mapping between sample addresses and the + // corresponding sample values. // The ordering of the sample values matches the ordering of types in the // heading. absl::StatusOr, std::unordered_map>>> - GetSamples(); + GetSamples(const quipper::PerfDataProto* perf_data, + const quipper::PerfDataProto_MMapEvent* mapping); - // Extracts start and end pairs, as well as latencies in cycles, of sequences - // of straight-run code from branch stacks. - // LBR data is extracted from the `perf.data`-like file loaded using - // `LoadPerfData`. + // Extracts start and end pairs belonging to the given mapping, as well as + // their latencies in cycles, of sequences of straight-run code from + // LBR branch stacks (pseudo-basic blocks). absl::StatusOr, std::vector>>> - GetLBRBlocksWithLatency(const llvm::object::ELFObjectFileBase* elf_object); + GetLBRBlocksWithLatency(const llvm::object::ELFObjectFileBase* elf_object, + const quipper::PerfDataProto* perf_data, + const quipper::PerfDataProto_MMapEvent* mapping); BHiveImporter importer_; - quipper::PerfReader perf_reader_; - quipper::PerfParser perf_parser_; - quipper::PerfDataProto::MMapEvent main_mapping_; + quipper::PerfReader + perf_reader_; // Has ownership of the `PerfDataProto` used throughout. }; template From 2840701f0039e3e688ae8bdd8d0f586273c45974 Mon Sep 17 00:00:00 2001 From: Viraj Shah Date: Sat, 21 Dec 2024 18:43:10 +0530 Subject: [PATCH 2/2] Fix nits. --- gematria/datasets/annotating_importer.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gematria/datasets/annotating_importer.h b/gematria/datasets/annotating_importer.h index d44d35ec..9f39a7c6 100644 --- a/gematria/datasets/annotating_importer.h +++ b/gematria/datasets/annotating_importer.h @@ -119,8 +119,9 @@ class AnnotatingImporter { const quipper::PerfDataProto_MMapEvent* mapping); BHiveImporter importer_; - quipper::PerfReader - perf_reader_; // Has ownership of the `PerfDataProto` used throughout. + + // Owns the `PerfDataProto` that samples and branch records are read from. + quipper::PerfReader perf_reader_; }; template