From 4746aedee878abd662590d76b0d5d7ff3151e3f2 Mon Sep 17 00:00:00 2001 From: Viraj Shah Date: Tue, 26 Nov 2024 19:17:45 +0530 Subject: [PATCH 1/4] Remove unnecessary `perf` information from `AnnotatingImporter` state. * Pass pointers to the perf profile proto and the main memory mapping instead of storing them as class members. * The `PerfParser` also does not need to be held on to - it is used only once. * The `PerfReader` must be held on to since it owns all of the perf related data. --- gematria/datasets/annotating_importer.cc | 97 ++++++++++++++---------- gematria/datasets/annotating_importer.h | 40 ++++++---- 2 files changed, 81 insertions(+), 56 deletions(-) diff --git a/gematria/datasets/annotating_importer.cc b/gematria/datasets/annotating_importer.cc index 82107443..94f3de10 100644 --- a/gematria/datasets/annotating_importer.cc +++ b/gematria/datasets/annotating_importer.cc @@ -37,7 +37,6 @@ #include "gematria/proto/throughput.pb.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ELFTypes.h" @@ -51,42 +50,50 @@ namespace gematria { AnnotatingImporter::AnnotatingImporter(const Canonicalizer *canonicalizer) - : importer_(canonicalizer), perf_parser_(&perf_reader_) { - quipper::PerfParserOptions parser_opts; - parser_opts.do_remap = true; - parser_opts.discard_unused_events = true; - parser_opts.sort_events_by_time = false; - perf_parser_.set_options(parser_opts); -} + : importer_(canonicalizer) {} -absl::Status AnnotatingImporter::LoadPerfData(std::string_view file_name) { +absl::StatusOr AnnotatingImporter::LoadPerfData( + std::string_view file_name) { // Read and parse the `perf.data`-like file into something more tractable. if (!perf_reader_.ReadFile(std::string(file_name))) { return absl::InvalidArgumentError(absl::StrFormat( "The given `perf.data`-like file (%s) could not be read.", file_name)); } - if (!perf_parser_.ParseRawEvents()) { + + quipper::PerfParser perf_parser( + &perf_reader_, quipper::PerfParserOptions{.do_remap = true, + .discard_unused_events = true, + .sort_events_by_time = false}); + if (!perf_parser.ParseRawEvents()) { return absl::InvalidArgumentError(absl::StrFormat( "The given `perf.data`-like file (%s) could not be parsed.", file_name)); } + return &perf_reader_.proto(); +} + +absl::StatusOr +AnnotatingImporter::GetMainMapping( + const llvm::object::ELFObjectFileBase *elf_object, + const quipper::PerfDataProto *perf_data) { // Find the relevant mapping. // TODO(virajbshah): Make sure the mapping was found. (Use num_mmap_events) - const quipper::PerfDataProto &perf_data_proto = perf_reader_.proto(); - for (const auto &event : perf_data_proto.events()) { + for (const auto &event : perf_data->events()) { // TODO(virajbshah): Not sure if this always works, i.e. does the main // binary always correspond to the first MMapEvent. Implement BuildID or // name based checking. if (event.has_mmap_event() && - event.mmap_event().prot() & 1 /* PROT_READ */ && - event.mmap_event().prot() & 4 /* PROT_EXEC */) { - main_mapping_ = event.mmap_event(); - break; + event.mmap_event().prot() & 0b001 /* PROT_READ */ && + event.mmap_event().prot() & 0b100 /* PROT_EXEC */) { + return &event.mmap_event(); } } - return absl::OkStatus(); + return absl::InvalidArgumentError(absl::StrFormat( + "The given `perf.data`-like file does not have a mapping corresponding " + "to the given object (%s).", + elf_object->getFileName())); } absl::StatusOr> @@ -211,23 +218,24 @@ AnnotatingImporter::GetBlocksFromELF( absl::StatusOr, std::unordered_map>>> -AnnotatingImporter::GetSamples() { - const quipper::PerfDataProto &perf_data_proto = perf_reader_.proto(); - const uint64_t mmap_begin_addr = main_mapping_.start(); - const uint64_t mmap_end_addr = main_mapping_.start() + main_mapping_.len(); +AnnotatingImporter::GetSamples( + const quipper::PerfDataProto *perf_data, + const quipper::PerfDataProto_MMapEvent *mapping) { + const uint64_t mmap_begin_addr = mapping->start(); + const uint64_t mmap_end_addr = mmap_begin_addr + mapping->len(); // Extract event type information, - const int num_sample_types = perf_data_proto.event_types_size(); + const int num_sample_types = perf_data->event_types_size(); std::vector sample_types(num_sample_types); std::unordered_map event_code_to_idx; for (int sample_type_idx = 0; sample_type_idx < num_sample_types; ++sample_type_idx) { - const auto &event_type = perf_data_proto.event_types()[sample_type_idx]; + const auto &event_type = perf_data->event_types()[sample_type_idx]; sample_types[sample_type_idx] = event_type.name(); event_code_to_idx[event_type.id()] = sample_type_idx; } std::unordered_map event_id_to_code; - for (const auto &event_type : perf_data_proto.file_attrs()) { + for (const auto &event_type : perf_data->file_attrs()) { // Mask out bits identifying the PMU and not the event. int event_code = event_type.attr().config() & 0xffff; for (int event_id : event_type.ids()) { @@ -243,7 +251,7 @@ AnnotatingImporter::GetSamples() { // Process sample events. std::unordered_map> samples; - for (const auto &event : perf_data_proto.events()) { + for (const auto &event : perf_data->events()) { // Filter out non-sample events. if (!event.has_sample_event()) { continue; @@ -271,13 +279,14 @@ AnnotatingImporter::GetSamples() { absl::StatusOr, std::vector>>> AnnotatingImporter::GetLBRBlocksWithLatency( - const llvm::object::ELFObjectFileBase *elf_object) { + const llvm::object::ELFObjectFileBase *elf_object, + const quipper::PerfDataProto *perf_data, + const quipper::PerfDataProto_MMapEvent *mapping) { // TODO(vbshah): Refactor this and other parameters as function arguments. constexpr int kMaxBlockSizeBytes = 65536; - const quipper::PerfDataProto &perf_data_proto = perf_reader_.proto(); - const uint64_t mmap_begin_addr = main_mapping_.start(); - const uint64_t mmap_end_addr = main_mapping_.start() + main_mapping_.len(); + const uint64_t mmap_begin_addr = mapping->start(); + const uint64_t mmap_end_addr = mmap_begin_addr + mapping->len(); // TODO(vbshah): Consider making it possible to use other ELFTs rather than // only ELF64LE since only the implementation of GetMainProgramHeader differs @@ -306,7 +315,7 @@ AnnotatingImporter::GetLBRBlocksWithLatency( std::unordered_map, int, absl::Hash>> index_map; - for (const auto &event : perf_data_proto.events()) { + for (const auto &event : perf_data->events()) { if (!event.has_sample_event() || !event.sample_event().branch_stack_size()) { continue; @@ -317,8 +326,8 @@ AnnotatingImporter::GetLBRBlocksWithLatency( const auto &branch_entry = branch_stack[branch_idx + 1]; const auto &next_branch_entry = branch_stack[branch_idx]; - uint64_t block_begin = branch_entry.to_ip(), - block_end = next_branch_entry.from_ip(); + const uint64_t block_begin = branch_entry.to_ip(); + const uint64_t block_end = next_branch_entry.from_ip(); // Simple validity checks: the block must start before it ends and cannot // be larger than some threshold. @@ -357,30 +366,38 @@ absl::StatusOr> AnnotatingImporter::GetAnnotatedBasicBlockProtos( std::string_view elf_file_name, std::string_view perf_data_file_name, std::string_view source_name) { + // Try to load the binary and cast it down to an ELF object. absl::StatusOr> owning_binary = LoadBinary(elf_file_name); if (!owning_binary.ok()) { return owning_binary.status(); } - absl::Status status = LoadPerfData(perf_data_file_name); - if (!status.ok()) { - return status; - } - - // Try to cast the binary down to an ELF object. const auto elf_object = GetELFFromBinary(owning_binary->getBinary()); if (!elf_object.ok()) { return elf_object.status(); } + // Try to load the perf profile and locate its main mapping, i.e. the one + // corresponding to the executable load segment of the given object file. + absl::StatusOr perf_data = + LoadPerfData(perf_data_file_name); + if (!perf_data.ok()) { + return perf_data.status(); + } + auto main_mapping = GetMainMapping(*elf_object, *perf_data); + if (!main_mapping.ok()) { + return main_mapping.status(); + } + // Get the raw basic blocks, perf samples, and LBR data for annotation. absl::StatusOr, std::vector>>> - basic_blocks = GetLBRBlocksWithLatency(*elf_object); + basic_blocks = + GetLBRBlocksWithLatency(*elf_object, *perf_data, *main_mapping); if (!basic_blocks.ok()) { return basic_blocks.status(); } - const auto sample_types_and_samples = GetSamples(); + const auto sample_types_and_samples = GetSamples(*perf_data, *main_mapping); if (!sample_types_and_samples.ok()) { return sample_types_and_samples.status(); } diff --git a/gematria/datasets/annotating_importer.h b/gematria/datasets/annotating_importer.h index 65f58bed..d44d35ec 100644 --- a/gematria/datasets/annotating_importer.h +++ b/gematria/datasets/annotating_importer.h @@ -59,9 +59,16 @@ class AnnotatingImporter { std::string_view source_name); private: - // Loads a `perf.data`-like file into the importer. Must be called before - // `GetSamples`, `GetLBRData`, and `GetLBRBlocksWithLatency`. - absl::Status LoadPerfData(std::string_view file_name); + // Loads a `perf.data`-like file for use by the importer. The returned pointer + // is valid only as long as this instance of `AnnotatingImporter` is alive. + absl::StatusOr LoadPerfData( + std::string_view file_name); + + // Searches all MMap events for the one that most likely corresponds to the + // executable load segment of the given object. + absl::StatusOr GetMainMapping( + const llvm::object::ELFObjectFileBase* elf_object, + const quipper::PerfDataProto* perf_data); // Loads a binary into for use by the importer. absl::StatusOr> LoadBinary( @@ -91,28 +98,29 @@ class AnnotatingImporter { absl::StatusOr>> GetBlocksFromELF(const llvm::object::ELFObjectFileBase* elf_object); - // Extracts samples from the `perf.data`-file loaded using `LoadPerfData`, - // usually obtained from `perf record`. Returns a {`sample_types`, `samples`} - // pair. `sample_types` is a vector of sample type names, while `samples` is - // a mapping between sample addresses and the corresponding sample values. + // Extracts samples belonging to `mapping` from the `perf_data`. Returns a + // {`sample_types`, `samples`} pair. `sample_types` is a vector of sample + // type names, while `samples` is a mapping between sample addresses and the + // corresponding sample values. // The ordering of the sample values matches the ordering of types in the // heading. absl::StatusOr, std::unordered_map>>> - GetSamples(); + GetSamples(const quipper::PerfDataProto* perf_data, + const quipper::PerfDataProto_MMapEvent* mapping); - // Extracts start and end pairs, as well as latencies in cycles, of sequences - // of straight-run code from branch stacks. - // LBR data is extracted from the `perf.data`-like file loaded using - // `LoadPerfData`. + // Extracts start and end pairs belonging to the given mapping, as well as + // their latencies in cycles, of sequences of straight-run code from + // LBR branch stacks (pseudo-basic blocks). absl::StatusOr, std::vector>>> - GetLBRBlocksWithLatency(const llvm::object::ELFObjectFileBase* elf_object); + GetLBRBlocksWithLatency(const llvm::object::ELFObjectFileBase* elf_object, + const quipper::PerfDataProto* perf_data, + const quipper::PerfDataProto_MMapEvent* mapping); BHiveImporter importer_; - quipper::PerfReader perf_reader_; - quipper::PerfParser perf_parser_; - quipper::PerfDataProto::MMapEvent main_mapping_; + quipper::PerfReader + perf_reader_; // Has ownership of the `PerfDataProto` used throughout. }; template From 188f52492d84bb6ff5389d851974dfeafa1d43d6 Mon Sep 17 00:00:00 2001 From: Viraj Shah Date: Wed, 27 Nov 2024 01:42:38 +0530 Subject: [PATCH 2/4] Add a binary name check while identifying the main mapping. --- gematria/datasets/annotating_importer.cc | 23 +++++++++++++------ gematria/datasets/annotating_importer.h | 5 ++++ .../python/import_annotated_basic_blocks.py | 1 + 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/gematria/datasets/annotating_importer.cc b/gematria/datasets/annotating_importer.cc index 94f3de10..67c2dc08 100644 --- a/gematria/datasets/annotating_importer.cc +++ b/gematria/datasets/annotating_importer.cc @@ -73,17 +73,26 @@ absl::StatusOr AnnotatingImporter::LoadPerfData( return &perf_reader_.proto(); } +namespace { + +std::string GetFileNameFromPath(const std::string &path) { + int idx = path.find_last_of('/'); + if (idx == std::string::npos) { + return path; + } + return path.substr(idx + 1); +} + +} // namespace + absl::StatusOr AnnotatingImporter::GetMainMapping( const llvm::object::ELFObjectFileBase *elf_object, const quipper::PerfDataProto *perf_data) { - // Find the relevant mapping. - // TODO(virajbshah): Make sure the mapping was found. (Use num_mmap_events) + std::string file_name = GetFileNameFromPath(elf_object->getFileName().str()); for (const auto &event : perf_data->events()) { - // TODO(virajbshah): Not sure if this always works, i.e. does the main - // binary always correspond to the first MMapEvent. Implement BuildID or - // name based checking. if (event.has_mmap_event() && + GetFileNameFromPath(event.mmap_event().filename()) == file_name && event.mmap_event().prot() & 0b001 /* PROT_READ */ && event.mmap_event().prot() & 0b100 /* PROT_EXEC */) { return &event.mmap_event(); @@ -199,8 +208,8 @@ AnnotatingImporter::GetBlocksFromELF( for (const llvm::object::BBAddrMap::BBRangeEntry &bb_range : map.getBBRanges()) { for (const llvm::object::BBAddrMap::BBEntry &bb : bb_range.BBEntries) { - uint64_t begin_idx = function_addr + bb.Offset, - end_idx = begin_idx + bb.Size; + uint64_t begin_idx = function_addr + bb.Offset; + uint64_t end_idx = begin_idx + bb.Size; if (begin_idx == end_idx) { continue; // Skip any empty basic blocks. } diff --git a/gematria/datasets/annotating_importer.h b/gematria/datasets/annotating_importer.h index d44d35ec..501f83d0 100644 --- a/gematria/datasets/annotating_importer.h +++ b/gematria/datasets/annotating_importer.h @@ -66,6 +66,11 @@ class AnnotatingImporter { // Searches all MMap events for the one that most likely corresponds to the // executable load segment of the given object. + // This requires that the ELF object's filename has not changed from when it + // was profiled, since we check its name against the filenames from the + // recorded MMap events. Note the object file can still be moved, since we + // check only the name and not the path. + // TODO(virajbshah): Find a better way to identify the relevant mapping. absl::StatusOr GetMainMapping( const llvm::object::ELFObjectFileBase* elf_object, const quipper::PerfDataProto* perf_data); diff --git a/gematria/datasets/python/import_annotated_basic_blocks.py b/gematria/datasets/python/import_annotated_basic_blocks.py index 571fc50d..642d1298 100644 --- a/gematria/datasets/python/import_annotated_basic_blocks.py +++ b/gematria/datasets/python/import_annotated_basic_blocks.py @@ -93,6 +93,7 @@ def main(argv: Sequence[str]) -> None: with tf.io.TFRecordWriter(_OUTPUT_TFRECORD_FILE.value) as writer: for proto in protos: writer.write(proto.SerializeToString()) + print(f'Wrote {len(protos)} (pseudo-)basic block(s).') if __name__ == '__main__': From f8b23cbff2436c056e9d5343d4139fd4805528af Mon Sep 17 00:00:00 2001 From: Viraj Shah Date: Wed, 27 Nov 2024 01:48:04 +0530 Subject: [PATCH 3/4] Add some more checks for whether the LBR block belongs to the given object. * A new check of the sample PID against the main mapping PID. * Remove the check for whether the block's code was within the bounds of the executable segment in terms of file addresses, this is redundant as long as the mapping is valid. --- gematria/datasets/annotating_importer.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/gematria/datasets/annotating_importer.cc b/gematria/datasets/annotating_importer.cc index 67c2dc08..fdf4565b 100644 --- a/gematria/datasets/annotating_importer.cc +++ b/gematria/datasets/annotating_importer.cc @@ -267,6 +267,9 @@ AnnotatingImporter::GetSamples( } // Filter out sample events from outside the profiled binary. + if (!event.sample_event().has_pid() || + !(event.sample_event().pid() == mapping->pid())) + continue; uint64_t sample_ip = event.sample_event().ip(); if (sample_ip < mmap_begin_addr || sample_ip >= mmap_end_addr) continue; @@ -329,6 +332,13 @@ AnnotatingImporter::GetLBRBlocksWithLatency( !event.sample_event().branch_stack_size()) { continue; } + + // Check if the sample PID matches that of the relevant mapping. + if (!event.sample_event().has_pid() || + !(event.sample_event().pid() == mapping->pid())) { + continue; + } + const auto &branch_stack = event.sample_event().branch_stack(); for (int branch_idx = branch_stack.size() - 2; branch_idx >= 0; --branch_idx) { @@ -345,9 +355,6 @@ AnnotatingImporter::GetLBRBlocksWithLatency( // Remove blocks not belonging to the binary we are importing from. if (block_begin < mmap_begin_addr || mmap_end_addr < block_end) continue; - if (block_begin < main_header->p_offset || - main_header->p_offset + main_header->p_filesz < block_end) - continue; uint32_t block_latency = next_branch_entry.cycles(); From f53a5fb654e6152e8ac8a6d574fdbc4d04fb2ff7 Mon Sep 17 00:00:00 2001 From: Viraj Shah Date: Mon, 13 Jan 2025 20:52:16 +0530 Subject: [PATCH 4/4] Address review comments. --- gematria/datasets/annotating_importer.cc | 51 +++++++++++++++++------- gematria/datasets/annotating_importer.h | 4 +- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/gematria/datasets/annotating_importer.cc b/gematria/datasets/annotating_importer.cc index fdf4565b..8a44aad7 100644 --- a/gematria/datasets/annotating_importer.cc +++ b/gematria/datasets/annotating_importer.cc @@ -49,6 +49,11 @@ namespace gematria { +// Memory mapping protection flag bits on Linux, from `sys/mman.h`. +constexpr int kProtRead = 0b001; /* PROT_READ */ +constexpr int kProtWrite = 0b010; /* PROT_WRITE */ +constexpr int kProtExec = 0b100; /* PROT_EXEC */ + AnnotatingImporter::AnnotatingImporter(const Canonicalizer *canonicalizer) : importer_(canonicalizer) {} @@ -63,7 +68,8 @@ absl::StatusOr AnnotatingImporter::LoadPerfData( quipper::PerfParser perf_parser( &perf_reader_, quipper::PerfParserOptions{.do_remap = true, .discard_unused_events = true, - .sort_events_by_time = false}); + .sort_events_by_time = false, + .combine_mappings = true}); if (!perf_parser.ParseRawEvents()) { return absl::InvalidArgumentError(absl::StrFormat( "The given `perf.data`-like file (%s) could not be parsed.", @@ -75,9 +81,9 @@ absl::StatusOr AnnotatingImporter::LoadPerfData( namespace { -std::string GetFileNameFromPath(const std::string &path) { +llvm::StringRef GetBasenameFromPath(const llvm::StringRef path) { int idx = path.find_last_of('/'); - if (idx == std::string::npos) { + if (idx == llvm::StringRef::npos) { return path; } return path.substr(idx + 1); @@ -89,19 +95,23 @@ absl::StatusOr AnnotatingImporter::GetMainMapping( const llvm::object::ELFObjectFileBase *elf_object, const quipper::PerfDataProto *perf_data) { - std::string file_name = GetFileNameFromPath(elf_object->getFileName().str()); + llvm::StringRef file_name = + GetBasenameFromPath(elf_object->getFileName().str()); + // TODO(vbshah): There may be multiple mappings corresponding to the profiled + // binary. Record and match samples from all of them instead of assuming + // there is only one and returning after finding it. for (const auto &event : perf_data->events()) { if (event.has_mmap_event() && - GetFileNameFromPath(event.mmap_event().filename()) == file_name && - event.mmap_event().prot() & 0b001 /* PROT_READ */ && - event.mmap_event().prot() & 0b100 /* PROT_EXEC */) { + GetBasenameFromPath(event.mmap_event().filename()) == file_name && + event.mmap_event().prot() & kProtRead && + event.mmap_event().prot() & kProtExec) { return &event.mmap_event(); } } return absl::InvalidArgumentError(absl::StrFormat( - "The given `perf.data`-like file does not have a mapping corresponding " - "to the given object (%s).", + "The given `perf.data`-like file does not have a mapping corresponding" + " to the given object (%s).", elf_object->getFileName())); } @@ -268,10 +278,13 @@ AnnotatingImporter::GetSamples( // Filter out sample events from outside the profiled binary. if (!event.sample_event().has_pid() || - !(event.sample_event().pid() == mapping->pid())) + !(event.sample_event().pid() == mapping->pid())) { continue; + } uint64_t sample_ip = event.sample_event().ip(); - if (sample_ip < mmap_begin_addr || sample_ip >= mmap_end_addr) continue; + if (sample_ip < mmap_begin_addr || sample_ip >= mmap_end_addr) { + continue; + } std::vector &samples_at_same_addr = samples[sample_ip]; if (samples_at_same_addr.empty()) { @@ -350,11 +363,17 @@ AnnotatingImporter::GetLBRBlocksWithLatency( // Simple validity checks: the block must start before it ends and cannot // be larger than some threshold. - if (block_begin >= block_end) continue; - if (block_end - block_begin > kMaxBlockSizeBytes) continue; + if (block_begin >= block_end) { + continue; + } + if (block_end - block_begin > kMaxBlockSizeBytes) { + continue; + } // Remove blocks not belonging to the binary we are importing from. - if (block_begin < mmap_begin_addr || mmap_end_addr < block_end) continue; + if (block_begin < mmap_begin_addr || mmap_end_addr < block_end) { + continue; + } uint32_t block_latency = next_branch_entry.cycles(); @@ -437,7 +456,9 @@ AnnotatingImporter::GetAnnotatedBasicBlockProtos( uint64_t instruction_addr = basic_block_proto.basic_block() .machine_instructions()[instruction_idx] .address(); - if (!samples.count(instruction_addr)) continue; + if (!samples.count(instruction_addr)) { + continue; + } const std::vector &annotations = samples.at(instruction_addr); auto &instruction_proto = basic_block_proto.mutable_basic_block() diff --git a/gematria/datasets/annotating_importer.h b/gematria/datasets/annotating_importer.h index 501f83d0..48416e8e 100644 --- a/gematria/datasets/annotating_importer.h +++ b/gematria/datasets/annotating_importer.h @@ -144,8 +144,8 @@ AnnotatingImporter::GetMainProgramHeader( program_header.p_flags & llvm::ELF::PF_X) { if (found_main_header) { return absl::InvalidArgumentError( - "The given object has multiple executable segments. This is " - "currently not supported."); + "The given object has multiple executable segments. This is" + " currently not supported."); } main_header = program_header; found_main_header = true;