From 35c7ef70fb2ac307ac9b547eb13ea708fe5b7c7d Mon Sep 17 00:00:00 2001 From: br0kej Date: Sun, 8 Oct 2023 20:02:28 +0100 Subject: [PATCH 01/20] [feature/refactor] Adding support for adding metadata to all nodes in all call graph variants + simplification --- src/agcj.rs | 34 +++++-- src/main.rs | 257 +++++++++++++++++++++++----------------------------- 2 files changed, 140 insertions(+), 151 deletions(-) diff --git a/src/agcj.rs b/src/agcj.rs index 936290f..68b258d 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -165,13 +165,20 @@ impl AGCJFunctionCallGraphs { global_cg: &AGCJFile, output_path: &String, binary_name: &str, + with_metadata: &bool, ) { let mut graph = self.build_local_call_graph(); self.get_callees_of_callees(global_cg, &mut graph); - let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "1hop") + if *with_metadata { + let networkx_graph = + NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); + self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "1hop") + } else { + let networkx_graph = NetworkxDiGraph::from(graph); + self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "1hop") + }; } pub fn to_petgraph_with_callers( @@ -179,11 +186,19 @@ impl AGCJFunctionCallGraphs { global_cg: &AGCJFile, output_path: &String, binary_name: &str, + with_metadata: &bool ) { let mut graph = self.build_local_call_graph(); self.get_target_func_callers(global_cg, &mut graph); - let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "cg-callers") + + if *with_metadata { + let networkx_graph = + NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); + self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "cg-callers") + } else { + let networkx_graph = NetworkxDiGraph::from(graph); + self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "cg-callers") + }; } pub fn one_hop_to_petgraph_with_callers( @@ -191,14 +206,21 @@ impl AGCJFunctionCallGraphs { global_cg: &AGCJFile, output_path: &String, binary_name: &str, + with_metadata: &bool, ) { let mut graph = self.build_local_call_graph(); self.get_target_func_callers(global_cg, &mut graph); self.get_callees_of_callees(global_cg, &mut graph); - let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "1hop-callers") + if *with_metadata { + let networkx_graph = + NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); + self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "1hop-callers") + } else { + let networkx_graph = NetworkxDiGraph::from(graph); + self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "1hop-callers") + }; } pub fn print_callees(&self) { diff --git a/src/main.rs b/src/main.rs index 50d681e..1fd1a31 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,8 @@ // the error and some extra info #![allow(clippy::expect_fun_call)] +use std::fmt; +use std::fmt::write; use clap::{Parser, Subcommand}; #[macro_use] extern crate log; @@ -63,6 +65,19 @@ enum DataType { Invalid, } +impl fmt::Display for DataType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DataType::Cfg => write!(f, "Control Flow Graph"), + DataType::Cg=> write!(f, "Call Graph"), + DataType::CgWithCallers => write!(f, "Call Graph with Callers"), + DataType::OneHopCg => write!(f, "One Hop Call Graph"), + DataType::OneHopCgWithcallers => write!(f, "One Hop Call Graph with Callers"), + DataType::Invalid => write!(f, "Invalid"), + } + } +} + /// Turn binaries into machine learning ready formats #[derive(Parser)] #[command(author, version, about, long_about = None)] @@ -322,9 +337,8 @@ fn main() { error!("{} does not exist!", path); exit(1) } - + info!("Chosen Graph Type: {}", graph_type); if graph_type == DataType::Cfg { - info!("Chosen Graph Type: Control Flow Graph"); if feature_type.is_some() { let feature_vec_type = match feature_type.as_ref().unwrap().as_str() { "gemini" => FeatureType::Gemini, @@ -396,8 +410,8 @@ fn main() { } else { error!("--feature-type/-f is required for creating CFG's") } - } else if graph_type == DataType::Cg { - info!("Chosen Graph Type: Call Graph"); + } else { + // If its only one file if Path::new(path).is_file() { let mut file = if *with_features { if metadata_path.is_none() { @@ -429,16 +443,40 @@ fn main() { }; file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); - - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); + if graph_type == DataType::Cg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_type == DataType::OneHopCg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename, with_features); + } + } else if graph_type == DataType::CgWithCallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features + ); + } + } else if graph_type == DataType::OneHopCgWithcallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features + ); + } } } else { + // its more than one file if metadata_path.is_none() { error!("with features active - require --metadata-path argument"); exit(1) @@ -449,9 +487,7 @@ fn main() { "{} files found. Beginning Processing.", file_paths_vec.len() ); - // if without metadata - if !with_features { for path in file_paths_vec.iter() { let mut file = AGCJFile { @@ -464,13 +500,37 @@ fn main() { file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); + if graph_type == DataType::Cg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_type == DataType::OneHopCg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename, with_features); + } + } else if graph_type == DataType::CgWithCallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features + ); + } + } else if graph_type == DataType::OneHopCgWithcallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features + ); + } } } } else { @@ -507,134 +567,41 @@ fn main() { file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); + if graph_type == DataType::Cg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_type == DataType::OneHopCg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename, with_features); + } + } else if graph_type == DataType::CgWithCallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features + ); + } + } else if graph_type == DataType::OneHopCgWithcallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features + ); + } } } } } - } else if graph_type == DataType::OneHopCg { - info!("Chosen Graph Type: One Hop Call Graph"); - if Path::new(path).is_file() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename); - } - } else { - let file_paths_vec = get_json_paths_from_dir(path, Some("_cg".to_string())); - info!( - "{} files found. Beginning Processing.", - file_paths_vec.len() - ); - for path in file_paths_vec.iter() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename); - } - } - } - } else if graph_type == DataType::CgWithCallers { - if Path::new(path).is_file() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph_with_callers(&file, &file.output_path, &file.filename); - } - } else { - let file_paths_vec = get_json_paths_from_dir(path, Some("_cg".to_string())); - info!( - "{} files found. Beginning Processing.", - file_paths_vec.len() - ); - for path in file_paths_vec.iter() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - ); - } - } - } - } else if graph_type == DataType::OneHopCgWithcallers { - if Path::new(path).is_file() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - ); - } - } - } else { - let file_paths_vec = get_json_paths_from_dir(path, Some("_cg".to_string())); - info!( - "{} files found. Beginning Processing.", - file_paths_vec.len() - ); - for path in file_paths_vec.iter() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - ); - } - } } } GenerateSubCommands::Metadata { From 9f7943695c0f39c6727e64130dface1ccc92685a Mon Sep 17 00:00:00 2001 From: br0kej Date: Sun, 8 Oct 2023 20:10:05 +0100 Subject: [PATCH 02/20] [refactor] removing code re-use --- src/agcj.rs | 46 +++++++++++++--------------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/src/agcj.rs b/src/agcj.rs index 68b258d..906a11a 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -147,15 +147,7 @@ impl AGCJFunctionCallGraphs { with_metadata: &bool, ) { let graph = self.build_local_call_graph(); - - if *with_metadata { - let networkx_graph = - NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "cg") - } else { - let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "cg") - }; + self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "cg") } // Creates a petgraph object of a given function, all of the functions called functions and @@ -170,15 +162,7 @@ impl AGCJFunctionCallGraphs { let mut graph = self.build_local_call_graph(); self.get_callees_of_callees(global_cg, &mut graph); - - if *with_metadata { - let networkx_graph = - NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "1hop") - } else { - let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "1hop") - }; + self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "1hop") } pub fn to_petgraph_with_callers( @@ -190,15 +174,7 @@ impl AGCJFunctionCallGraphs { ) { let mut graph = self.build_local_call_graph(); self.get_target_func_callers(global_cg, &mut graph); - - if *with_metadata { - let networkx_graph = - NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "cg-callers") - } else { - let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "cg-callers") - }; + self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "cg-callers"); } pub fn one_hop_to_petgraph_with_callers( @@ -212,18 +188,22 @@ impl AGCJFunctionCallGraphs { self.get_target_func_callers(global_cg, &mut graph); self.get_callees_of_callees(global_cg, &mut graph); + self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "1hop-callers"); + + } + pub fn print_callees(&self) { + println!("{:?}", self.imports) + } + + fn convert_graph_to_networkx(&self, graph: Graph, global_cg: &AGCJFile, binary_name: &str, output_path: &String, with_metadata: &bool, type_suffix: &str) { if *with_metadata { let networkx_graph = NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, "1hop-callers") + self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, type_suffix) } else { let networkx_graph = NetworkxDiGraph::from(graph); - self.graph_to_json_func_node(binary_name, output_path, networkx_graph, "1hop-callers") + self.graph_to_json_func_node(binary_name, output_path, networkx_graph, type_suffix) }; } - - pub fn print_callees(&self) { - println!("{:?}", self.imports) - } } From 547e37fa04c4712034f7e59d7e12958489038344 Mon Sep 17 00:00:00 2001 From: br0kej Date: Sun, 8 Oct 2023 20:21:36 +0100 Subject: [PATCH 03/20] [tweak] file path is changed when creating call graphs with metadata --- src/agcj.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/agcj.rs b/src/agcj.rs index 906a11a..0f62c5d 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -1,3 +1,4 @@ +use std::fmt::format; use crate::files::AGCJFile; use crate::networkx::{CallGraphFuncNameNode, CallGraphFuncWithMetadata, NetworkxDiGraph}; use crate::utils::{check_or_create_dir, get_save_file_path}; @@ -198,6 +199,7 @@ impl AGCJFunctionCallGraphs { fn convert_graph_to_networkx(&self, graph: Graph, global_cg: &AGCJFile, binary_name: &str, output_path: &String, with_metadata: &bool, type_suffix: &str) { if *with_metadata { + let type_suffix = format!("{}-{}", type_suffix, "meta").as_str(); let networkx_graph = NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, type_suffix) From e8d73ff9598528dfbd253d6c61d670aade5faf75 Mon Sep 17 00:00:00 2001 From: br0kej Date: Sun, 8 Oct 2023 20:47:09 +0100 Subject: [PATCH 04/20] [tweak] fixing metadata file check + fixing call graph file name generation --- .../graphs/test_bin_cg/entry0-cg.json | 24 -------------- .../processed/graphs/test_bin_cg/main-cg.json | 24 -------------- .../sym.__do_global_dtors_aux-cg.json | 33 ------------------- .../test_bin_cg/sym.__libc_csu_init-cg.json | 33 ------------------- .../graphs/test_bin_cg/sym._init-cg.json | 24 -------------- src/agcj.rs | 4 +-- src/main.rs | 11 ++++--- 7 files changed, 8 insertions(+), 145 deletions(-) delete mode 100644 data-examples/processed/graphs/test_bin_cg/entry0-cg.json delete mode 100644 data-examples/processed/graphs/test_bin_cg/main-cg.json delete mode 100644 data-examples/processed/graphs/test_bin_cg/sym.__do_global_dtors_aux-cg.json delete mode 100644 data-examples/processed/graphs/test_bin_cg/sym.__libc_csu_init-cg.json delete mode 100644 data-examples/processed/graphs/test_bin_cg/sym._init-cg.json diff --git a/data-examples/processed/graphs/test_bin_cg/entry0-cg.json b/data-examples/processed/graphs/test_bin_cg/entry0-cg.json deleted file mode 100644 index 91dc603..0000000 --- a/data-examples/processed/graphs/test_bin_cg/entry0-cg.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "adjacency": [ - [ - { - "id": 1, - "weight": 0 - } - ], - [] - ], - "directed": "True", - "graph": [], - "multigraph": false, - "nodes": [ - { - "id": 0, - "funcName": "entry0" - }, - { - "id": 1, - "funcName": "unk.0x3fe0" - } - ] -} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg/main-cg.json b/data-examples/processed/graphs/test_bin_cg/main-cg.json deleted file mode 100644 index c6341fd..0000000 --- a/data-examples/processed/graphs/test_bin_cg/main-cg.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "adjacency": [ - [ - { - "id": 1, - "weight": 0 - } - ], - [] - ], - "directed": "True", - "graph": [], - "multigraph": false, - "nodes": [ - { - "id": 0, - "funcName": "main" - }, - { - "id": 1, - "funcName": "sym.imp.printf" - } - ] -} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg/sym.__do_global_dtors_aux-cg.json b/data-examples/processed/graphs/test_bin_cg/sym.__do_global_dtors_aux-cg.json deleted file mode 100644 index fdcc0d6..0000000 --- a/data-examples/processed/graphs/test_bin_cg/sym.__do_global_dtors_aux-cg.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "adjacency": [ - [ - { - "id": 2, - "weight": 0 - }, - { - "id": 1, - "weight": 0 - } - ], - [], - [] - ], - "directed": "True", - "graph": [], - "multigraph": false, - "nodes": [ - { - "id": 0, - "funcName": "sym.__do_global_dtors_aux" - }, - { - "id": 1, - "funcName": "sym..plt.got" - }, - { - "id": 2, - "funcName": "sym.deregister_tm_clones" - } - ] -} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg/sym.__libc_csu_init-cg.json b/data-examples/processed/graphs/test_bin_cg/sym.__libc_csu_init-cg.json deleted file mode 100644 index 2c7e467..0000000 --- a/data-examples/processed/graphs/test_bin_cg/sym.__libc_csu_init-cg.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "adjacency": [ - [ - { - "id": 2, - "weight": 0 - }, - { - "id": 1, - "weight": 0 - } - ], - [], - [] - ], - "directed": "True", - "graph": [], - "multigraph": false, - "nodes": [ - { - "id": 0, - "funcName": "sym.__libc_csu_init" - }, - { - "id": 1, - "funcName": "sym._init" - }, - { - "id": 2, - "funcName": "rsp" - } - ] -} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg/sym._init-cg.json b/data-examples/processed/graphs/test_bin_cg/sym._init-cg.json deleted file mode 100644 index fd2c80b..0000000 --- a/data-examples/processed/graphs/test_bin_cg/sym._init-cg.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "adjacency": [ - [ - { - "id": 1, - "weight": 0 - } - ], - [] - ], - "directed": "True", - "graph": [], - "multigraph": false, - "nodes": [ - { - "id": 0, - "funcName": "sym._init" - }, - { - "id": 1, - "funcName": "unk.0xffffffffffffffff" - } - ] -} \ No newline at end of file diff --git a/src/agcj.rs b/src/agcj.rs index 0f62c5d..f78a289 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -199,10 +199,10 @@ impl AGCJFunctionCallGraphs { fn convert_graph_to_networkx(&self, graph: Graph, global_cg: &AGCJFile, binary_name: &str, output_path: &String, with_metadata: &bool, type_suffix: &str) { if *with_metadata { - let type_suffix = format!("{}-{}", type_suffix, "meta").as_str(); + let type_suffix = type_suffix.to_owned() + "-meta"; let networkx_graph = NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, type_suffix) + self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, type_suffix.as_str()) } else { let networkx_graph = NetworkxDiGraph::from(graph); self.graph_to_json_func_node(binary_name, output_path, networkx_graph, type_suffix) diff --git a/src/main.rs b/src/main.rs index 1fd1a31..0a2f055 100644 --- a/src/main.rs +++ b/src/main.rs @@ -476,11 +476,6 @@ fn main() { } } } else { - // its more than one file - if metadata_path.is_none() { - error!("with features active - require --metadata-path argument"); - exit(1) - }; let mut file_paths_vec = get_json_paths_from_dir(path, Some("_cg".to_string())); info!( @@ -534,6 +529,12 @@ fn main() { } } } else { + // its more than one file + if metadata_path.is_none() { + error!("with features active - require --metadata-path argument"); + exit(1) + }; + let mut metadata_paths_vec = get_json_paths_from_dir( &metadata_path.as_ref().unwrap(), Some("finfo".to_string()), From 555c9053c03759c3ecaa158a96f6031ba6ce3df0 Mon Sep 17 00:00:00 2001 From: br0kej Date: Sun, 8 Oct 2023 20:48:07 +0100 Subject: [PATCH 05/20] adding up to date data samples for call graphs with and without metadata --- .../entry0-1hop-callers-meta.json | 1 + .../main-1hop-callers-meta.json | 1 + ...do_global_dtors_aux-1hop-callers-meta.json | 1 + ...sym.__libc_csu_init-1hop-callers-meta.json | 1 + .../sym._init-1hop-callers-meta.json | 1 + .../entry0-1hop-meta.json | 1 + .../test_bin_cg-1hop-meta/main-1hop-meta.json | 1 + .../sym.__do_global_dtors_aux-1hop-meta.json | 1 + .../sym.__libc_csu_init-1hop-meta.json | 1 + .../sym._init-1hop-meta.json | 1 + .../entry0-cg-callers-meta.json | 1 + .../main-cg-callers-meta.json | 1 + ...__do_global_dtors_aux-cg-callers-meta.json | 1 + .../sym.__libc_csu_init-cg-callers-meta.json | 1 + .../sym._init-cg-callers-meta.json | 1 + .../entry0-cg-callers.json | 1 + .../main-cg-callers.json | 1 + .../sym.__do_global_dtors_aux-cg-callers.json | 1 + .../sym.__libc_csu_init-cg-callers.json | 1 + .../sym._init-cg-callers.json | 1 + .../test_bin_cg-cg-meta/entry0-cg-meta.json | 1 + .../test_bin_cg-cg-meta/main-cg-meta.json | 1 + .../sym.__do_global_dtors_aux-cg-meta.json | 1 + .../sym.__libc_csu_init-cg-meta.json | 1 + .../sym._init-cg-meta.json | 1 + .../graphs/test_bin_cg-cg/entry0-cg.json | 1 + .../graphs/test_bin_cg-cg/main-cg.json | 24 +++++++++++++++++++ .../sym.__do_global_dtors_aux-cg.json | 1 + .../sym.__libc_csu_init-cg.json | 1 + .../graphs/test_bin_cg-cg/sym._init-cg.json | 1 + 30 files changed, 53 insertions(+) create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/entry0-1hop-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/main-1hop-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__do_global_dtors_aux-1hop-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__libc_csu_init-1hop-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym._init-1hop-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-meta/entry0-1hop-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-meta/main-1hop-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__do_global_dtors_aux-1hop-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__libc_csu_init-1hop-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-1hop-meta/sym._init-1hop-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers-meta/entry0-cg-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers-meta/main-cg-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__do_global_dtors_aux-cg-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__libc_csu_init-cg-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym._init-cg-callers-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers/entry0-cg-callers.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers/main-cg-callers.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__do_global_dtors_aux-cg-callers.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__libc_csu_init-cg-callers.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-callers/sym._init-cg-callers.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-meta/entry0-cg-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-meta/main-cg-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__do_global_dtors_aux-cg-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__libc_csu_init-cg-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg-meta/sym._init-cg-meta.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg/entry0-cg.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg/main-cg.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg/sym.__do_global_dtors_aux-cg.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg/sym.__libc_csu_init-cg.json create mode 100644 data-examples/processed/graphs/test_bin_cg-cg/sym._init-cg.json diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/entry0-1hop-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/entry0-1hop-callers-meta.json new file mode 100644 index 0000000..8880d49 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/entry0-1hop-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}},{"id":1,"funcName":"unk.0x3fe0","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/main-1hop-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/main-1hop-callers-meta.json new file mode 100644 index 0000000..f4104bb --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/main-1hop-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":3,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__do_global_dtors_aux-1hop-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__do_global_dtors_aux-1hop-callers-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__do_global_dtors_aux-1hop-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__libc_csu_init-1hop-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__libc_csu_init-1hop-callers-meta.json new file mode 100644 index 0000000..ed4cccb --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym.__libc_csu_init-1hop-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[{"id":3,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}},{"id":3,"funcName":"unk.0xffffffffffffffff","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym._init-1hop-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym._init-1hop-callers-meta.json new file mode 100644 index 0000000..c84f3ac --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-callers-meta/sym._init-1hop-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[],[{"id":0,"weight":0}]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":1,"funcName":"unk.0xffffffffffffffff","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}},{"id":2,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-meta/entry0-1hop-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-meta/entry0-1hop-meta.json new file mode 100644 index 0000000..8880d49 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-meta/entry0-1hop-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}},{"id":1,"funcName":"unk.0x3fe0","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-meta/main-1hop-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-meta/main-1hop-meta.json new file mode 100644 index 0000000..f4104bb --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-meta/main-1hop-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":3,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__do_global_dtors_aux-1hop-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__do_global_dtors_aux-1hop-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__do_global_dtors_aux-1hop-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__libc_csu_init-1hop-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__libc_csu_init-1hop-meta.json new file mode 100644 index 0000000..ed4cccb --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym.__libc_csu_init-1hop-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[{"id":3,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}},{"id":3,"funcName":"unk.0xffffffffffffffff","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym._init-1hop-meta.json b/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym._init-1hop-meta.json new file mode 100644 index 0000000..a9b9b30 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-1hop-meta/sym._init-1hop-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":1,"funcName":"unk.0xffffffffffffffff","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/entry0-cg-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/entry0-cg-callers-meta.json new file mode 100644 index 0000000..8880d49 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/entry0-cg-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}},{"id":1,"funcName":"unk.0x3fe0","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/main-cg-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/main-cg-callers-meta.json new file mode 100644 index 0000000..f4104bb --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/main-cg-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":3,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__do_global_dtors_aux-cg-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__do_global_dtors_aux-cg-callers-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__do_global_dtors_aux-cg-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__libc_csu_init-cg-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__libc_csu_init-cg-callers-meta.json new file mode 100644 index 0000000..64ab240 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym.__libc_csu_init-cg-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym._init-cg-callers-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym._init-cg-callers-meta.json new file mode 100644 index 0000000..c84f3ac --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers-meta/sym._init-cg-callers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[],[{"id":0,"weight":0}]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":1,"funcName":"unk.0xffffffffffffffff","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}},{"id":2,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers/entry0-cg-callers.json b/data-examples/processed/graphs/test_bin_cg-cg-callers/entry0-cg-callers.json new file mode 100644 index 0000000..1ef8dbe --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers/entry0-cg-callers.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0"},{"id":1,"funcName":"unk.0x3fe0"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers/main-cg-callers.json b/data-examples/processed/graphs/test_bin_cg-cg-callers/main-cg-callers.json new file mode 100644 index 0000000..7318bbd --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers/main-cg-callers.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main"},{"id":1,"funcName":"sym.imp.printf"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__do_global_dtors_aux-cg-callers.json b/data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__do_global_dtors_aux-cg-callers.json new file mode 100644 index 0000000..a4eaaf2 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__do_global_dtors_aux-cg-callers.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux"},{"id":1,"funcName":"sym..plt.got"},{"id":2,"funcName":"sym.deregister_tm_clones"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__libc_csu_init-cg-callers.json b/data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__libc_csu_init-cg-callers.json new file mode 100644 index 0000000..80d5b6a --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers/sym.__libc_csu_init-cg-callers.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init"},{"id":1,"funcName":"sym._init"},{"id":2,"funcName":"rsp"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-callers/sym._init-cg-callers.json b/data-examples/processed/graphs/test_bin_cg-cg-callers/sym._init-cg-callers.json new file mode 100644 index 0000000..0ea5c06 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-callers/sym._init-cg-callers.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[],[{"id":0,"weight":0}]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym._init"},{"id":1,"funcName":"unk.0xffffffffffffffff"},{"id":2,"funcName":"sym.__libc_csu_init"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-meta/entry0-cg-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-meta/entry0-cg-meta.json new file mode 100644 index 0000000..8880d49 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-meta/entry0-cg-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}},{"id":1,"funcName":"unk.0x3fe0","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-meta/main-cg-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-meta/main-cg-meta.json new file mode 100644 index 0000000..f4104bb --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-meta/main-cg-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":3,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__do_global_dtors_aux-cg-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__do_global_dtors_aux-cg-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__do_global_dtors_aux-cg-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__libc_csu_init-cg-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__libc_csu_init-cg-meta.json new file mode 100644 index 0000000..64ab240 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-meta/sym.__libc_csu_init-cg-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg-meta/sym._init-cg-meta.json b/data-examples/processed/graphs/test_bin_cg-cg-meta/sym._init-cg-meta.json new file mode 100644 index 0000000..a9b9b30 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg-meta/sym._init-cg-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":1,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":1,"funcName":"unk.0xffffffffffffffff","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg/entry0-cg.json b/data-examples/processed/graphs/test_bin_cg-cg/entry0-cg.json new file mode 100644 index 0000000..1ef8dbe --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg/entry0-cg.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0"},{"id":1,"funcName":"unk.0x3fe0"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg/main-cg.json b/data-examples/processed/graphs/test_bin_cg-cg/main-cg.json new file mode 100644 index 0000000..c6341fd --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg/main-cg.json @@ -0,0 +1,24 @@ +{ + "adjacency": [ + [ + { + "id": 1, + "weight": 0 + } + ], + [] + ], + "directed": "True", + "graph": [], + "multigraph": false, + "nodes": [ + { + "id": 0, + "funcName": "main" + }, + { + "id": 1, + "funcName": "sym.imp.printf" + } + ] +} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg/sym.__do_global_dtors_aux-cg.json b/data-examples/processed/graphs/test_bin_cg-cg/sym.__do_global_dtors_aux-cg.json new file mode 100644 index 0000000..a4eaaf2 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg/sym.__do_global_dtors_aux-cg.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux"},{"id":1,"funcName":"sym..plt.got"},{"id":2,"funcName":"sym.deregister_tm_clones"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg/sym.__libc_csu_init-cg.json b/data-examples/processed/graphs/test_bin_cg-cg/sym.__libc_csu_init-cg.json new file mode 100644 index 0000000..80d5b6a --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg/sym.__libc_csu_init-cg.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init"},{"id":1,"funcName":"sym._init"},{"id":2,"funcName":"rsp"}]} \ No newline at end of file diff --git a/data-examples/processed/graphs/test_bin_cg-cg/sym._init-cg.json b/data-examples/processed/graphs/test_bin_cg-cg/sym._init-cg.json new file mode 100644 index 0000000..d4c2564 --- /dev/null +++ b/data-examples/processed/graphs/test_bin_cg-cg/sym._init-cg.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym._init"},{"id":1,"funcName":"unk.0xffffffffffffffff"}]} \ No newline at end of file From 0a0f8131e189bf40c57263cb0a3b31a58fd11071 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Sat, 14 Oct 2023 07:12:44 +0100 Subject: [PATCH 06/20] [tweak] adding additional logging for debugging + a check to see if finfo is already present, if so skip extraction --- src/extract.rs | 46 ++++++++++++++++++++++++++++------------------ src/files.rs | 2 +- src/main.rs | 12 ++++++++++-- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/extract.rs b/src/extract.rs index 911bb27..77b00e5 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -403,25 +403,35 @@ impl FileToBeProcessed { pub fn extract_function_info(&self, debug: &bool) { info!("Starting function metdata extraction"); - let mut r2p = self.setup_r2_pipe(&self.file_path, debug); - let function_details = self.get_function_name_list(&mut r2p); - let mut function_info: Vec> = Vec::new(); - info!("Extracting function metadata"); - for function in function_details.iter() { - debug!("Processing {}", function.name); - let ret = self.get_function_info(function.offset, &mut r2p); - debug!("Metadata Collected: {:?}", ret); - function_info.push(ret); - } - info!("All functions processed"); - r2p.close(); - info!("r2p closed"); + let mut fp_filename = Path::new(self.file_path.as_str()) + .file_name() + .expect("Unable to get filename") + .to_string_lossy() + .to_string(); - info!("Writing extracted data to file"); - self.write_to_json(&json!(function_info - .into_iter() - .flatten() - .collect::>())) + fp_filename = fp_filename + "_" + &self.job_type_suffix.clone(); + let f_name = format!("{}/{}.json", self.output_path, fp_filename); + if !Path::new(&f_name).exists() { + let mut r2p = self.setup_r2_pipe(&self.file_path, debug); + let function_details = self.get_function_name_list(&mut r2p); + let mut function_info: Vec> = Vec::new(); + info!("Extracting function metadata"); + for function in function_details.iter() { + debug!("Processing {}", function.name); + let ret = self.get_function_info(function.offset, &mut r2p); + debug!("Metadata Collected: {:?}", ret); + function_info.push(ret); + } + info!("All functions processed"); + r2p.close(); + info!("r2p closed"); + + info!("Writing extracted data to file"); + self.write_to_json(&json!(function_info + .into_iter() + .flatten() + .collect::>())) + } } // r2 commands to structs diff --git a/src/files.rs b/src/files.rs index eb3af7e..770f2a2 100644 --- a/src/files.rs +++ b/src/files.rs @@ -389,7 +389,7 @@ impl AFIJFile { pub fn subset(&mut self) -> Vec { let mut func_info_subsets: Vec = Vec::new(); - info!("Starting to process functions"); + debug!("Starting to subset functions"); for function in self.function_info.as_ref().unwrap().iter() { let subset = AFIJFeatureSubset::from(function); func_info_subsets.push(subset) diff --git a/src/main.rs b/src/main.rs index 50d681e..b24f0e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -439,6 +439,8 @@ fn main() { ); } } else { + debug!("Multiple files found"); + if metadata_path.is_none() { error!("with features active - require --metadata-path argument"); exit(1) @@ -453,6 +455,7 @@ fn main() { // if without metadata if !with_features { + debug!("Creating call graphs without any node features"); for path in file_paths_vec.iter() { let mut file = AGCJFile { filename: path.to_owned(), @@ -474,6 +477,8 @@ fn main() { } } } else { + debug!("Creating call graphs with node features"); + debug!("Getting metadata file paths"); let mut metadata_paths_vec = get_json_paths_from_dir( &metadata_path.as_ref().unwrap(), Some("finfo".to_string()), @@ -492,9 +497,10 @@ fn main() { function_info: None, output_path: "".to_string(), }; + debug!("Attempting to load metadata file: {}", metadata_path); let _ = metadata .load_and_deserialize() - .expect("Unable to load file"); + .expect("Unable to load assocaited metadata file"); let metadata_subset = metadata.subset(); AGCJFile { filename: path.to_owned(), @@ -503,10 +509,11 @@ fn main() { function_metadata: Some(metadata_subset), } }; - debug!("Proceissing {}", file.filename); + debug!("Attempting to load {}", file.filename); file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); + debug!("Generating call graphs using loaded cgs + metadata"); for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.to_petgraph( &file, @@ -515,6 +522,7 @@ fn main() { with_features, ); } + debug!("Finished generating cgs + metadata for {}", file.filename); } } } From d512440782aba06e3e45810df4c96ca0a67583b6 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Sat, 14 Oct 2023 07:55:34 +0100 Subject: [PATCH 07/20] [feature] adding parallel processing to cg generation --- src/main.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main.rs b/src/main.rs index b24f0e6..2e51856 100644 --- a/src/main.rs +++ b/src/main.rs @@ -488,22 +488,25 @@ fn main() { metadata_paths_vec.sort(); assert_eq!(file_paths_vec.len(), metadata_paths_vec.len()); - for (path, metadata_path) in - file_paths_vec.iter().zip(metadata_paths_vec) - { + let combined_cgs_metadata = file_paths_vec + .into_iter() + .zip(metadata_paths_vec) + .collect::>(); + + combined_cgs_metadata.par_iter().for_each(|tup| { let mut file = { let mut metadata = AFIJFile { - filename: metadata_path.clone(), + filename: tup.1.clone(), function_info: None, output_path: "".to_string(), }; - debug!("Attempting to load metadata file: {}", metadata_path); + debug!("Attempting to load metadata file: {}", tup.1); let _ = metadata .load_and_deserialize() .expect("Unable to load assocaited metadata file"); let metadata_subset = metadata.subset(); AGCJFile { - filename: path.to_owned(), + filename: tup.0.to_owned(), function_call_graphs: None, output_path: output_path.to_owned(), function_metadata: Some(metadata_subset), @@ -523,7 +526,7 @@ fn main() { ); } debug!("Finished generating cgs + metadata for {}", file.filename); - } + }); } } } else if graph_type == DataType::OneHopCg { From bcc25d8ea8a06083480d4574ea58516368922d75 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Sat, 14 Oct 2023 10:52:32 +0100 Subject: [PATCH 08/20] fixing the naming convention of folders to line up with cli input --- src/agcj.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/src/agcj.rs b/src/agcj.rs index f78a289..fcbbb74 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -1,10 +1,10 @@ -use std::fmt::format; use crate::files::AGCJFile; use crate::networkx::{CallGraphFuncNameNode, CallGraphFuncWithMetadata, NetworkxDiGraph}; use crate::utils::{check_or_create_dir, get_save_file_path}; use itertools::Itertools; use petgraph::prelude::Graph; use serde::{Deserialize, Serialize}; +use std::fmt::format; use std::fs::File; #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -114,7 +114,7 @@ impl AGCJFunctionCallGraphs { let callee = graph.add_node(ele.clone()); let import_node_index = graph.node_indices().find(|i| &graph[*i] == import).unwrap(); - debug!("{:?} -> {:?}", import, ele); + trace!("{:?} -> {:?}", import, ele); graph.update_edge(import_node_index, callee, 0); } } @@ -148,7 +148,14 @@ impl AGCJFunctionCallGraphs { with_metadata: &bool, ) { let graph = self.build_local_call_graph(); - self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "cg") + self.convert_graph_to_networkx( + graph, + global_cg, + binary_name, + output_path, + with_metadata, + "cg", + ) } // Creates a petgraph object of a given function, all of the functions called functions and @@ -163,7 +170,14 @@ impl AGCJFunctionCallGraphs { let mut graph = self.build_local_call_graph(); self.get_callees_of_callees(global_cg, &mut graph); - self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "1hop") + self.convert_graph_to_networkx( + graph, + global_cg, + binary_name, + output_path, + with_metadata, + "onehopcg", + ) } pub fn to_petgraph_with_callers( @@ -171,11 +185,18 @@ impl AGCJFunctionCallGraphs { global_cg: &AGCJFile, output_path: &String, binary_name: &str, - with_metadata: &bool + with_metadata: &bool, ) { let mut graph = self.build_local_call_graph(); self.get_target_func_callers(global_cg, &mut graph); - self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "cg-callers"); + self.convert_graph_to_networkx( + graph, + global_cg, + binary_name, + output_path, + with_metadata, + "cgcallers", + ); } pub fn one_hop_to_petgraph_with_callers( @@ -189,20 +210,39 @@ impl AGCJFunctionCallGraphs { self.get_target_func_callers(global_cg, &mut graph); self.get_callees_of_callees(global_cg, &mut graph); - self.convert_graph_to_networkx(graph,global_cg, binary_name, output_path, with_metadata, "1hop-callers"); - + self.convert_graph_to_networkx( + graph, + global_cg, + binary_name, + output_path, + with_metadata, + "onehopcgcallers", + ); } pub fn print_callees(&self) { println!("{:?}", self.imports) } - fn convert_graph_to_networkx(&self, graph: Graph, global_cg: &AGCJFile, binary_name: &str, output_path: &String, with_metadata: &bool, type_suffix: &str) { + fn convert_graph_to_networkx( + &self, + graph: Graph, + global_cg: &AGCJFile, + binary_name: &str, + output_path: &String, + with_metadata: &bool, + type_suffix: &str, + ) { if *with_metadata { let type_suffix = type_suffix.to_owned() + "-meta"; let networkx_graph = NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata(binary_name, output_path, networkx_graph, type_suffix.as_str()) + self.graph_to_json_func_metadata( + binary_name, + output_path, + networkx_graph, + type_suffix.as_str(), + ) } else { let networkx_graph = NetworkxDiGraph::from(graph); self.graph_to_json_func_node(binary_name, output_path, networkx_graph, type_suffix) From 080da035ef7c065a38db7c09cecb52f244afa35c Mon Sep 17 00:00:00 2001 From: Br0kej Date: Sat, 14 Oct 2023 10:53:31 +0100 Subject: [PATCH 09/20] adding check for if file exists when generating cgs with metadata --- src/main.rs | 103 ++++++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 48 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7fdbda6..e3a0dfb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ use indicatif::{ParallelProgressIterator, ProgressIterator}; use mimalloc::MiMalloc; use rayon::iter::ParallelIterator; use rayon::prelude::IntoParallelRefIterator; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::process::exit; use walkdir::WalkDir; @@ -40,6 +40,7 @@ use crate::dedup::EsilFuncStringCorpus; use crate::extract::ExtractionJobType; use crate::files::{AFIJFile, AGCJFile}; use crate::tokeniser::{train_byte_bpe_tokeniser, TokeniserType}; +use crate::utils::get_save_file_path; use bb::{FeatureType, InstructionMode}; #[cfg(feature = "goblin")] use binnfo::goblin_info; @@ -320,7 +321,7 @@ fn main() { with_features, metadata_path, } => { - let graph_type = match graph_type.as_str() { + let graph_data_type = match graph_type.as_str() { "cfg" => DataType::Cfg, "cg" => DataType::Cg, "onehopcg" => DataType::OneHopCg, @@ -329,7 +330,7 @@ fn main() { _ => DataType::Invalid, }; - if graph_type == DataType::Cfg && *with_features == true { + if graph_data_type == DataType::Cfg && *with_features == true { warn!("The 'with_features' toggle is set but is not support for CFG generation. Will ignore.") }; @@ -337,8 +338,8 @@ fn main() { error!("{} does not exist!", path); exit(1) } - info!("Chosen Graph Type: {}", graph_type); - if graph_type == DataType::Cfg { + info!("Chosen Graph Type: {}", graph_data_type); + if graph_data_type == DataType::Cfg { if feature_type.is_some() { let feature_vec_type = match feature_type.as_ref().unwrap().as_str() { "gemini" => FeatureType::Gemini, @@ -443,7 +444,7 @@ fn main() { }; file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); - if graph_type == DataType::Cg { + if graph_data_type == DataType::Cg { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.to_petgraph( &file, @@ -452,7 +453,7 @@ fn main() { with_features, ); } - } else if graph_type == DataType::OneHopCg { + } else if graph_data_type == DataType::OneHopCg { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.one_hop_to_petgraph( &file, @@ -461,7 +462,7 @@ fn main() { with_features, ); } - } else if graph_type == DataType::CgWithCallers { + } else if graph_data_type == DataType::CgWithCallers { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.to_petgraph_with_callers( &file, @@ -470,7 +471,7 @@ fn main() { with_features, ); } - } else if graph_type == DataType::OneHopCgWithcallers { + } else if graph_data_type == DataType::OneHopCgWithcallers { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.one_hop_to_petgraph_with_callers( &file, @@ -508,7 +509,7 @@ fn main() { file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); - if graph_type == DataType::Cg { + if graph_data_type == DataType::Cg { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.to_petgraph( &file, @@ -517,7 +518,7 @@ fn main() { with_features, ); } - } else if graph_type == DataType::OneHopCg { + } else if graph_data_type == DataType::OneHopCg { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.one_hop_to_petgraph( &file, @@ -526,7 +527,7 @@ fn main() { with_features, ); } - } else if graph_type == DataType::CgWithCallers { + } else if graph_data_type == DataType::CgWithCallers { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.to_petgraph_with_callers( &file, @@ -535,7 +536,7 @@ fn main() { with_features, ); } - } else if graph_type == DataType::OneHopCgWithcallers { + } else if graph_data_type == DataType::OneHopCgWithcallers { for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.one_hop_to_petgraph_with_callers( &file, @@ -570,44 +571,48 @@ fn main() { .collect::>(); combined_cgs_metadata.par_iter().for_each(|tup| { - let mut file = { - let mut metadata = AFIJFile { - filename: tup.1.clone(), - function_info: None, - output_path: "".to_string(), + let suffix = format!("{}-meta", graph_type.to_owned()); + let full_output_path = + PathBuf::from(get_save_file_path(&tup.0, output_path, Some(suffix))); + if !full_output_path.is_dir() { + let mut file = { + let mut metadata = AFIJFile { + filename: tup.1.clone(), + function_info: None, + output_path: "".to_string(), + }; + debug!("Attempting to load metadata file: {}", tup.1); + let _ = metadata + .load_and_deserialize() + .expect("Unable to load assocaited metadata file"); + let metadata_subset = metadata.subset(); + AGCJFile { + filename: tup.0.to_owned(), + function_call_graphs: None, + output_path: output_path.to_owned(), + function_metadata: Some(metadata_subset), + } }; - debug!("Attempting to load metadata file: {}", tup.1); - let _ = metadata - .load_and_deserialize() - .expect("Unable to load assocaited metadata file"); - let metadata_subset = metadata.subset(); - AGCJFile { - filename: tup.0.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: Some(metadata_subset), - } - }; - debug!("Attempting to load {}", file.filename); - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - if graph_type == DataType::Cg { - debug!("Generating call graphs using loaded cgs + metadata"); - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_type == DataType::OneHopCg { + debug!("Attempting to load {}", file.filename); + file.load_and_deserialize() + .expect("Unable to load and desearilize JSON"); + + if graph_data_type == DataType::Cg { + debug!("Generating call graphs using loaded cgs + metadata"); + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_data_type == DataType::OneHopCg { debug!("Generating one hop call graphs using loaded cgs + metadata"); for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename, with_features); } - } else if graph_type == DataType::CgWithCallers { + } else if graph_data_type == DataType::CgWithCallers { debug!("Generating call graphs with callers using loaded cgs + metadata"); for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.to_petgraph_with_callers( @@ -617,7 +622,7 @@ fn main() { with_features ); } - } else if graph_type == DataType::OneHopCgWithcallers { + } else if graph_data_type == DataType::OneHopCgWithcallers { debug!("Generating one hop call graphs with callers using loaded cgs + metadata"); for fcg in file.function_call_graphs.as_ref().unwrap() { fcg.one_hop_to_petgraph_with_callers( @@ -629,7 +634,9 @@ fn main() { } } debug!("Finished generating cgs + metadata for {}", file.filename); - }); + } else { + info!("Skipping {} as already exists", full_output_path.to_string_lossy()) + }}); } } } From b92f99574d83408c31d66807ea1bd6a5a493c815 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Sat, 14 Oct 2023 11:16:47 +0100 Subject: [PATCH 10/20] [feature] adding parallel support to cgs without metadata --- src/main.rs | 112 +++++++++++++++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 49 deletions(-) diff --git a/src/main.rs b/src/main.rs index e3a0dfb..5733865 100644 --- a/src/main.rs +++ b/src/main.rs @@ -133,7 +133,7 @@ enum GenerateSubCommands { embed_dim: Option, /// Toggle for call graphs to include AFIJ feature subsets - #[arg(long)] + #[arg(long, default_value = "false")] with_features: bool, /// Filepath to the AFIJ function metadata @@ -484,7 +484,7 @@ fn main() { } else { debug!("Multiple files found"); - if metadata_path.is_none() { + if metadata_path.is_none() & with_features { error!("with features active - require --metadata-path argument"); exit(1) }; @@ -498,55 +498,69 @@ fn main() { // if without metadata if !with_features { debug!("Creating call graphs without any node features"); - for path in file_paths_vec.iter() { - let mut file = AGCJFile { - filename: path.to_owned(), - function_call_graphs: None, - output_path: output_path.to_owned(), - function_metadata: None, - }; - debug!("Proceissing {}", file.filename); - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - - if graph_data_type == DataType::Cg { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_data_type == DataType::OneHopCg { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_data_type == DataType::CgWithCallers { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_data_type == DataType::OneHopCgWithcallers { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - with_features, - ); + //for path in file_paths_vec.iter() { + file_paths_vec.par_iter().for_each(|path| { + let suffix = format!("{}", graph_type.to_owned()); + let full_output_path = PathBuf::from(get_save_file_path( + path, + output_path, + Some(suffix), + )); + if !full_output_path.is_dir() { + let mut file = AGCJFile { + filename: path.to_owned(), + function_call_graphs: None, + output_path: output_path.to_owned(), + function_metadata: None, + }; + debug!("Proceissing {}", file.filename); + file.load_and_deserialize() + .expect("Unable to load and desearilize JSON"); + + if graph_data_type == DataType::Cg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_data_type == DataType::OneHopCg { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_data_type == DataType::CgWithCallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } + } else if graph_data_type == DataType::OneHopCgWithcallers { + for fcg in file.function_call_graphs.as_ref().unwrap() { + fcg.one_hop_to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features, + ); + } } + } else { + info!( + "Skipping {} as already exists", + full_output_path.to_string_lossy() + ) } - } + }) } else { debug!("Creating call graphs with node features"); debug!("Getting metadata file paths"); From 2b0c501b035b7656bac79b612756019e16bb2950 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Sat, 14 Oct 2023 11:30:46 +0100 Subject: [PATCH 11/20] [tidy] clippy and rust fmt --- src/afij.rs | 4 ++-- src/agcj.rs | 3 +-- src/extract.rs | 1 - src/main.rs | 11 +++++------ src/networkx.rs | 4 ++-- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/afij.rs b/src/afij.rs index d962adf..417eb90 100644 --- a/src/afij.rs +++ b/src/afij.rs @@ -115,8 +115,8 @@ impl From<&AFIJFunctionInfo> for AFIJFeatureSubset { fn from(src: &AFIJFunctionInfo) -> AFIJFeatureSubset { AFIJFeatureSubset { name: src.name.clone(), - ninstrs: src.ninstrs.clone(), - edges: src.edges.clone(), + ninstrs: src.ninstrs, + edges: src.edges, indegree: src.indegree.unwrap_or(0), outdegree: src.outdegree.unwrap_or(0), nlocals: src.nlocals.unwrap_or(0), diff --git a/src/agcj.rs b/src/agcj.rs index fcbbb74..3eea8ae 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -4,7 +4,6 @@ use crate::utils::{check_or_create_dir, get_save_file_path}; use itertools::Itertools; use petgraph::prelude::Graph; use serde::{Deserialize, Serialize}; -use std::fmt::format; use std::fs::File; #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -30,7 +29,7 @@ impl AGCJFunctionCallGraphs { if self.imports.is_some() { for ele in self.imports.as_ref().unwrap().iter() { let callee = graph.add_node(ele.clone()); - graph.update_edge(calling_func.clone(), callee, 0); + graph.update_edge(calling_func, callee, 0); } graph } else { diff --git a/src/extract.rs b/src/extract.rs index 77b00e5..2847844 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -9,7 +9,6 @@ use serde::{Deserialize, Serialize}; use serde_aux::prelude::*; use serde_json; use serde_json::{json, Value}; -use serde_with::serde_as; use std::collections::HashMap; use std::fs; use std::fs::File; diff --git a/src/main.rs b/src/main.rs index 5733865..6f2238e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,6 @@ use clap::{Parser, Subcommand}; use std::fmt; -use std::fmt::write; #[macro_use] extern crate log; use clap::builder::TypedValueParser; @@ -330,7 +329,7 @@ fn main() { _ => DataType::Invalid, }; - if graph_data_type == DataType::Cfg && *with_features == true { + if graph_data_type == DataType::Cfg && *with_features { warn!("The 'with_features' toggle is set but is not support for CFG generation. Will ignore.") }; @@ -424,7 +423,7 @@ fn main() { function_info: None, output_path: "".to_string(), }; - let _ = metadata + metadata .load_and_deserialize() .expect("Unable to load file"); let metadata_subset = metadata.subset(); @@ -500,7 +499,7 @@ fn main() { debug!("Creating call graphs without any node features"); //for path in file_paths_vec.iter() { file_paths_vec.par_iter().for_each(|path| { - let suffix = format!("{}", graph_type.to_owned()); + let suffix = graph_type.to_owned().to_string(); let full_output_path = PathBuf::from(get_save_file_path( path, output_path, @@ -571,7 +570,7 @@ fn main() { }; let mut metadata_paths_vec = get_json_paths_from_dir( - &metadata_path.as_ref().unwrap(), + metadata_path.as_ref().unwrap(), Some("finfo".to_string()), ); @@ -596,7 +595,7 @@ fn main() { output_path: "".to_string(), }; debug!("Attempting to load metadata file: {}", tup.1); - let _ = metadata + metadata .load_and_deserialize() .expect("Unable to load assocaited metadata file"); let metadata_subset = metadata.subset(); diff --git a/src/networkx.rs b/src/networkx.rs index 1d11213..7b00e54 100644 --- a/src/networkx.rs +++ b/src/networkx.rs @@ -194,11 +194,11 @@ impl From<(Graph, &Vec)> let mut nodes: Vec = vec![]; for (i, node_weight) in node_weights.enumerate() { let subset_object = src_graph.1.iter().find(|ele| &ele.name == node_weight); - if subset_object.is_some() { + if let Some(subset_object) = subset_object { nodes.push(CallGraphFuncWithMetadata { id: i as i64, func_name: node_weight.to_owned(), - function_feature_subset: subset_object.unwrap().clone(), + function_feature_subset: subset_object.clone(), }) } else { nodes.push(CallGraphFuncWithMetadata { From 368c4edda8970cfcf23f96a0a9e2b5f8c5bec700 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Wed, 22 Nov 2023 20:11:45 +0000 Subject: [PATCH 12/20] adding support for onehopcg dedup + bumping version --- Cargo.toml | 2 +- src/afij.rs | 2 +- src/dedup.rs | 132 +++++++++++++++++++++++++++++++++++++++++++++++- src/main.rs | 49 ++++++++++++++---- src/networkx.rs | 6 +-- 5 files changed, 173 insertions(+), 18 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 003dd31..27d882b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bin2ml" -version = "0.2.3" +version = "0.2.4" edition = "2021" [dependencies] diff --git a/src/afij.rs b/src/afij.rs index 417eb90..cc8e4ca 100644 --- a/src/afij.rs +++ b/src/afij.rs @@ -98,7 +98,7 @@ pub struct Regvar { pub ref_field: String, } -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct AFIJFeatureSubset { pub name: String, diff --git a/src/dedup.rs b/src/dedup.rs index 33e5055..36a3457 100644 --- a/src/dedup.rs +++ b/src/dedup.rs @@ -1,15 +1,18 @@ +use crate::networkx::{CallGraphFuncWithMetadata, NetworkxDiGraph}; use anyhow::Result; +use indicatif::ProgressIterator; use itertools::Itertools; use prettytable::row; use prettytable::Table; use serde::{Deserialize, Serialize}; use serde_json::json; use std::collections::hash_map::DefaultHasher; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs::{read_to_string, File}; use std::hash::{Hash, Hasher}; +use std::path::Path; use std::string::String; -use std::vec; +use std::{fs, vec}; use walkdir::{DirEntry, WalkDir}; #[derive(Serialize, Deserialize, Debug)] @@ -273,3 +276,128 @@ impl EsilFuncStringCorpus { } } } + +#[derive(Debug)] +pub struct OneHopCGCorpus { + pub loaded_data: Vec>, + pub filepaths: Vec, + pub output_path: String, +} + +impl OneHopCGCorpus { + pub fn new(directory: &String, output_path: &String) -> Result { + if !Path::new(output_path).exists() { + fs::create_dir(output_path).expect("Failed to create output directory!"); + info!("Output path not found - Creating {}", output_path) + } + + let mut filepaths = Vec::new(); + let mut loaded_data = Vec::new(); + + for file in WalkDir::new(directory) + .into_iter() + .filter_map(|file| file.ok()) + { + if file.path().to_string_lossy().ends_with(".json") { + filepaths.push(file.clone().path().to_string_lossy().to_string()); + } + } + info!("Loading the filepaths"); + for ele in filepaths.iter().progress() { + let data = read_to_string(&ele).expect(&format!("Unable to read file - {:?}", ele)); + + let json: NetworkxDiGraph = serde_json::from_str(&data) + .expect(&format!("Unable to load function data from {}", ele)); + + if !json.nodes.is_empty() { + loaded_data.push(Some(json)) + } else { + loaded_data.push(None) + } + //info!("Load complete - {}", loaded_data.len()) + } + info!("Len Pre Filtering: {}", filepaths.len()); + info!("Removing any None loads"); + loaded_data.retain(|c| c.is_some()); + + info!("Starting to deduplicate the corpus"); + let (loaded_data, filepaths) = Self::dedup_corpus(loaded_data, filepaths); + let loaded_data = loaded_data.into_iter().flatten().collect(); + + Ok(OneHopCGCorpus { + loaded_data, + filepaths, + output_path: output_path.to_string(), + }) + } + + fn calculate_hash(t: &T) -> u64 { + let mut s = DefaultHasher::new(); + t.hash(&mut s); + s.finish() + } + + // This is very slow O(N)^2 + fn dedup_corpus( + mut data: Vec>>, + mut filepaths: Vec, + ) -> ( + Vec>>, + Vec, + ) { + info!("Creating the removal index"); + + let mut seen = HashSet::new(); + let mut indices_to_remove = Vec::new(); + for (i, data_ele) in data.iter_mut().enumerate().progress() { + let hash_value = Self::calculate_hash(&data_ele); + + if seen.contains(&hash_value) { + indices_to_remove.push(i) + } else { + seen.insert(hash_value); + } + } + info!("Starting the duplicate removal!"); + for ele in indices_to_remove.iter().rev().progress() { + data.remove(*ele); + filepaths.remove(ele.clone()); + } + return (data, filepaths); + } + + pub fn save_corpus(self) { + info!("Saving Deduplicated files..."); + for (data_ele, filepath) in self + .loaded_data + .iter() + .zip(self.filepaths.iter()) + .progress() + { + // need last two bits + + let fixed_path: Vec<_> = Path::new(filepath) + .components() + .rev() + .take(2) + .collect::>(); + + let fixed_path = fixed_path + .iter() + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .rev() + .collect::>(); + + let dirs = format!("{}{}", self.output_path, fixed_path[0]); + fs::create_dir_all(&dirs).expect("Failed to create output directory!"); + + let fixed_path = format!("{}/{}", dirs, fixed_path[1]); + debug!("Path: {:?}", fixed_path); + serde_json::to_writer( + &File::create(fixed_path).expect("Failed to create writer"), + &data_ele, + ) + .expect("Unable to write JSON"); + } + } +} diff --git a/src/main.rs b/src/main.rs index 6f2238e..f581f75 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,7 +35,7 @@ pub mod processors; pub mod tokeniser; pub mod utils; -use crate::dedup::EsilFuncStringCorpus; +use crate::dedup::{EsilFuncStringCorpus, OneHopCGCorpus}; use crate::extract::ExtractionJobType; use crate::files::{AFIJFile, AGCJFile}; use crate::tokeniser::{train_byte_bpe_tokeniser, TokeniserType}; @@ -269,6 +269,15 @@ enum Commands { #[arg(short, long, value_name = "FILENAME")] filename: String, + /// Type of dedup + #[arg(short, long, value_name = "TYPE", value_parser = clap::builder::PossibleValuesParser::new(["esilfstr", "onehopcgs"]) + .map(|s| s.parse::().unwrap()))] + datatype: String, + + /// Output path to save dedup corpus - Only works for onehopcgs atm + #[arg(short, long, value_name = "OUTPUT_PATH")] + output_path: String, + /// Toggle to print statistics of number of functions before and after dedup #[arg(long, default_value = "false")] print_stats: bool, @@ -497,7 +506,7 @@ fn main() { // if without metadata if !with_features { debug!("Creating call graphs without any node features"); - //for path in file_paths_vec.iter() { + file_paths_vec.par_iter().for_each(|path| { let suffix = graph_type.to_owned().to_string(); let full_output_path = PathBuf::from(get_save_file_path( @@ -860,20 +869,38 @@ fn main() { } Commands::Dedup { filename, + datatype, + output_path, print_stats, just_stats, num_threads, just_hash_value, } => { - warn!("THIS ONLY SUPPORTS FILES WITH THE FOLLOWING NAMING CONVENTION: ---_-.json"); - rayon::ThreadPoolBuilder::new() - .num_threads(*num_threads) - .build_global() - .unwrap(); - let corpus = EsilFuncStringCorpus::new(filename).unwrap(); - corpus.uniq_binaries.par_iter().progress().for_each(|name| { - corpus.dedup_subset(name, *print_stats, *just_stats, *just_hash_value) - }); + if datatype == "esilfstr" { + warn!("THIS ONLY SUPPORTS FILES WITH THE FOLLOWING NAMING CONVENTION: ---_-.json"); + rayon::ThreadPoolBuilder::new() + .num_threads(*num_threads) + .build_global() + .unwrap(); + let corpus = EsilFuncStringCorpus::new(filename).unwrap(); + corpus.uniq_binaries.par_iter().progress().for_each(|name| { + corpus.dedup_subset(name, *print_stats, *just_stats, *just_hash_value) + }); + } else if datatype == "onehopcgs" { + println!("Onehopcgs - {}!", filename); + if Path::new(filename).exists() { + info!("Starting decuplication process for One Hop Call Graphs"); + let corpus = OneHopCGCorpus::new(filename, output_path).unwrap(); + println!( + "{:?} - {:?}", + &corpus.filepaths.len(), + &corpus.loaded_data.len() + ); + corpus.save_corpus() + } else { + error!("Filename provided does not exist! - {}", filename) + } + } } } } diff --git a/src/networkx.rs b/src/networkx.rs index 7b00e54..7b0f9c2 100644 --- a/src/networkx.rs +++ b/src/networkx.rs @@ -5,7 +5,7 @@ use petgraph::prelude::Graph; use petgraph::visit::EdgeRef; use serde::{Deserialize, Serialize}; -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct NetworkxDiGraph { pub adjacency: Vec>, @@ -15,7 +15,7 @@ pub struct NetworkxDiGraph { pub nodes: Vec, } -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Adjacency { pub id: usize, @@ -141,7 +141,7 @@ pub struct CallGraphFuncNameNode { pub func_name: String, } -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct CallGraphFuncWithMetadata { pub id: i64, From 39519f19a31ba542fb8fa40bcfa84b2f98fdbc29 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Mon, 27 Nov 2023 12:13:37 +0000 Subject: [PATCH 13/20] adding rayon support to speed up dedup --- src/dedup.rs | 172 +++++++++++++++++++++++++++++++++++---------------- src/utils.rs | 4 ++ 2 files changed, 121 insertions(+), 55 deletions(-) diff --git a/src/dedup.rs b/src/dedup.rs index 36a3457..0254491 100644 --- a/src/dedup.rs +++ b/src/dedup.rs @@ -1,9 +1,10 @@ use crate::networkx::{CallGraphFuncWithMetadata, NetworkxDiGraph}; use anyhow::Result; -use indicatif::ProgressIterator; +use indicatif::ParallelProgressIterator; use itertools::Itertools; use prettytable::row; use prettytable::Table; +use rayon::prelude::*; use serde::{Deserialize, Serialize}; use serde_json::json; use std::collections::hash_map::DefaultHasher; @@ -12,6 +13,8 @@ use std::fs::{read_to_string, File}; use std::hash::{Hash, Hasher}; use std::path::Path; use std::string::String; + +use std::sync::{Arc, Mutex}; use std::{fs, vec}; use walkdir::{DirEntry, WalkDir}; @@ -292,8 +295,9 @@ impl OneHopCGCorpus { } let mut filepaths = Vec::new(); - let mut loaded_data = Vec::new(); + let mut fp_binaries = Vec::new(); + // Load all JSON filepaths for file in WalkDir::new(directory) .into_iter() .filter_map(|file| file.ok()) @@ -302,27 +306,85 @@ impl OneHopCGCorpus { filepaths.push(file.clone().path().to_string_lossy().to_string()); } } - info!("Loading the filepaths"); - for ele in filepaths.iter().progress() { - let data = read_to_string(&ele).expect(&format!("Unable to read file - {:?}", ele)); - let json: NetworkxDiGraph = serde_json::from_str(&data) - .expect(&format!("Unable to load function data from {}", ele)); + // Process the file paths to get the associated binary of each path + for file in &filepaths { + let binary_intermediate = Path::new(file).parent().unwrap().file_name().unwrap(); + let binary = binary_intermediate + .to_string_lossy() + .split("_") + .nth(1) + .unwrap() + .to_string(); + + fp_binaries.push(binary) + } - if !json.nodes.is_empty() { - loaded_data.push(Some(json)) - } else { - loaded_data.push(None) - } - //info!("Load complete - {}", loaded_data.len()) + // Generate binary specific filepath vectors + let unqiue_binaries: Vec<_> = fp_binaries.iter().unique().collect(); + let mut unique_binaries_fps: Vec> = vec![Vec::new(); unqiue_binaries.len()]; + + for (file, binary) in filepaths.iter().zip(fp_binaries.iter()) { + unique_binaries_fps + [unqiue_binaries.iter().position(|&x| x == binary).unwrap() as usize] + .push(file.clone()); } - info!("Len Pre Filtering: {}", filepaths.len()); - info!("Removing any None loads"); - loaded_data.retain(|c| c.is_some()); - info!("Starting to deduplicate the corpus"); - let (loaded_data, filepaths) = Self::dedup_corpus(loaded_data, filepaths); - let loaded_data = loaded_data.into_iter().flatten().collect(); + // Create a Vec of Vec where each vec is a unique binary + let deduped_data = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); + let deduped_paths = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); + + info!("Loading the filepaths"); + unique_binaries_fps + .par_iter() + .progress() + .enumerate() + .for_each(|(idx, fp_subset)| { + let mut subset_loaded_data = Vec::new(); + + for ele in fp_subset.iter() { + let data = + read_to_string(&ele).expect(&format!("Unable to read file - {:?}", ele)); + + let json: NetworkxDiGraph = + serde_json::from_str(&data) + .expect(&format!("Unable to load function data from {}", ele)); + + if !json.nodes.is_empty() { + subset_loaded_data.push(Some(json)) + } else { + subset_loaded_data.push(None) + } + } + + //info!("Len Pre Filtering: {:?}", fp_subset.len()); + //info!("Removing any None loads"); + subset_loaded_data.retain(|c| c.is_some()); + + //info!("Starting to deduplicate the corpus"); + let (subset_loaded_data, fp_subset) = + Self::dedup_corpus(&mut subset_loaded_data, fp_subset.to_vec()); + let subset_loaded_data: Vec> = + subset_loaded_data.into_iter().filter_map(|x| x).collect(); + + deduped_data + .lock() + .unwrap() + .insert(idx, subset_loaded_data.clone()); + deduped_paths.lock().unwrap().insert(idx, fp_subset); + }); + info!("File loading complete"); + let deduped_data = Arc::try_unwrap(deduped_data).unwrap().into_inner().unwrap(); + let deduped_paths = Arc::try_unwrap(deduped_paths) + .unwrap() + .into_inner() + .unwrap(); + + let loaded_data = deduped_data.into_iter().flatten().collect(); + let filepaths: Vec = deduped_paths.into_iter().flatten().collect(); + let filepaths = filepaths.iter().map(|x| x.to_string()).collect(); + + info!("Returning One Hop CG Corpus Struct"); Ok(OneHopCGCorpus { loaded_data, @@ -339,17 +401,17 @@ impl OneHopCGCorpus { // This is very slow O(N)^2 fn dedup_corpus( - mut data: Vec>>, + data: &mut Vec>>, mut filepaths: Vec, ) -> ( Vec>>, Vec, ) { - info!("Creating the removal index"); + //info!("Creating the removal index"); let mut seen = HashSet::new(); let mut indices_to_remove = Vec::new(); - for (i, data_ele) in data.iter_mut().enumerate().progress() { + for (i, data_ele) in data.iter_mut().enumerate() { let hash_value = Self::calculate_hash(&data_ele); if seen.contains(&hash_value) { @@ -358,46 +420,46 @@ impl OneHopCGCorpus { seen.insert(hash_value); } } - info!("Starting the duplicate removal!"); - for ele in indices_to_remove.iter().rev().progress() { + //info!("Starting the duplicate removal!"); + for ele in indices_to_remove.iter().rev() { data.remove(*ele); filepaths.remove(ele.clone()); } - return (data, filepaths); + return (data.to_vec(), filepaths); } pub fn save_corpus(self) { info!("Saving Deduplicated files..."); - for (data_ele, filepath) in self - .loaded_data - .iter() - .zip(self.filepaths.iter()) + //for (data_ele, filepath) in self.loaded_data.par_iter().zip(self.filepaths.par_iter()) { + // need last two bits + self.loaded_data + .par_iter() + .zip(self.filepaths.par_iter()) .progress() - { - // need last two bits - - let fixed_path: Vec<_> = Path::new(filepath) - .components() - .rev() - .take(2) - .collect::>(); - - let fixed_path = fixed_path - .iter() - .map(|c| c.as_os_str().to_string_lossy().to_string()) - .rev() - .collect::>(); - - let dirs = format!("{}{}", self.output_path, fixed_path[0]); - fs::create_dir_all(&dirs).expect("Failed to create output directory!"); - - let fixed_path = format!("{}/{}", dirs, fixed_path[1]); - debug!("Path: {:?}", fixed_path); - serde_json::to_writer( - &File::create(fixed_path).expect("Failed to create writer"), - &data_ele, - ) - .expect("Unable to write JSON"); - } + .for_each(|(data_ele, filepath)| { + let fixed_path: Vec<_> = Path::new(filepath) + .components() + .rev() + .take(2) + .collect::>(); + + let fixed_path = fixed_path + .iter() + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .rev() + .collect::>(); + + let dirs = format!("{}{}", self.output_path, fixed_path[0]); + fs::create_dir_all(&dirs).expect("Failed to create output directory!"); + + let fixed_path = format!("{}/{}", dirs, fixed_path[1]); + debug!("Path: {:?}", fixed_path); + serde_json::to_writer( + &File::create(fixed_path).expect("Failed to create writer"), + &data_ele, + ) + .expect("Unable to write JSON"); + }); + info!("All files saved!"); } } diff --git a/src/utils.rs b/src/utils.rs index d795561..10995ca 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -19,6 +19,10 @@ pub fn get_save_file_path( output_path: &String, optional_suffix: Option, ) -> String { + debug!( + "Building Filepath - Binary Path: {:?} Output Path: {:?}", + binary_path, output_path + ); let file_name = Path::new(binary_path) .file_stem() .unwrap() From eb5a2742bb4116cc5b7a34c33b39a21b0ee861be Mon Sep 17 00:00:00 2001 From: Br0kej Date: Mon, 27 Nov 2023 12:15:45 +0000 Subject: [PATCH 14/20] removing un-needed print + fix typo --- src/main.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index f581f75..81f0ef3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -887,9 +887,8 @@ fn main() { corpus.dedup_subset(name, *print_stats, *just_stats, *just_hash_value) }); } else if datatype == "onehopcgs" { - println!("Onehopcgs - {}!", filename); if Path::new(filename).exists() { - info!("Starting decuplication process for One Hop Call Graphs"); + info!("Starting duplication process for One Hop Call Graphs"); let corpus = OneHopCGCorpus::new(filename, output_path).unwrap(); println!( "{:?} - {:?}", From 3478cdee851fc49a80d638257ebbf3fed4aaf2ce Mon Sep 17 00:00:00 2001 From: Br0kej Date: Tue, 28 Nov 2023 11:35:57 +0000 Subject: [PATCH 15/20] fixing logging --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index 81f0ef3..53a938c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -807,7 +807,7 @@ fn main() { .progress() .for_each(|path| path.extract_register_behaviour(debug)); } else if job.job_type == ExtractionJobType::FunctionXrefs { - info!("Extraction Job Type: Register Behaviour"); + info!("Extraction Job Type: Function Xrefs"); info!("Starting Parallel generation."); #[allow(clippy::redundant_closure)] job.files_to_be_processed @@ -815,7 +815,7 @@ fn main() { .progress() .for_each(|path| path.extract_function_xrefs(debug)); } else if job.job_type == ExtractionJobType::CallGraphs { - info!("Extraction Job Type: Register Behaviour"); + info!("Extraction Job Type: Call Graphs"); info!("Starting Parallel generation."); #[allow(clippy::redundant_closure)] job.files_to_be_processed From e0b414cdd8643c7b1043cdf54a4c3cedbbe03c0a Mon Sep 17 00:00:00 2001 From: Br0kej Date: Tue, 28 Nov 2023 12:12:00 +0000 Subject: [PATCH 16/20] refactored the parellel processing of onehopcg dedup to a specific function rather than the new() method and added saving in by default --- src/dedup.rs | 139 ++++++++++++++++++++++----------------------------- src/main.rs | 7 +-- 2 files changed, 62 insertions(+), 84 deletions(-) diff --git a/src/dedup.rs b/src/dedup.rs index 0254491..e9ce20f 100644 --- a/src/dedup.rs +++ b/src/dedup.rs @@ -282,7 +282,6 @@ impl EsilFuncStringCorpus { #[derive(Debug)] pub struct OneHopCGCorpus { - pub loaded_data: Vec>, pub filepaths: Vec, pub output_path: String, } @@ -295,7 +294,6 @@ impl OneHopCGCorpus { } let mut filepaths = Vec::new(); - let mut fp_binaries = Vec::new(); // Load all JSON filepaths for file in WalkDir::new(directory) @@ -307,8 +305,54 @@ impl OneHopCGCorpus { } } + info!("Returning One Hop CG Corpus Struct"); + + Ok(OneHopCGCorpus { + filepaths, + output_path: output_path.to_string(), + }) + } + + fn calculate_hash(t: &T) -> u64 { + let mut s = DefaultHasher::new(); + t.hash(&mut s); + s.finish() + } + + // This is very slow O(N)^2 + fn dedup_corpus( + data: &mut Vec>>, + mut filepaths: Vec, + ) -> ( + Vec>>, + Vec, + ) { + //info!("Creating the removal index"); + + let mut seen = HashSet::new(); + let mut indices_to_remove = Vec::new(); + for (i, data_ele) in data.iter_mut().enumerate() { + let hash_value = Self::calculate_hash(&data_ele); + + if seen.contains(&hash_value) { + indices_to_remove.push(i) + } else { + seen.insert(hash_value); + } + } + //info!("Starting the duplicate removal!"); + for ele in indices_to_remove.iter().rev() { + data.remove(*ele); + filepaths.remove(ele.clone()); + } + return (data.to_vec(), filepaths); + } + + pub fn process_corpus(self) { + let mut fp_binaries = Vec::new(); // Process the file paths to get the associated binary of each path - for file in &filepaths { + info!("Processing Filepaths to get binaries"); + for file in &self.filepaths { let binary_intermediate = Path::new(file).parent().unwrap().file_name().unwrap(); let binary = binary_intermediate .to_string_lossy() @@ -324,15 +368,15 @@ impl OneHopCGCorpus { let unqiue_binaries: Vec<_> = fp_binaries.iter().unique().collect(); let mut unique_binaries_fps: Vec> = vec![Vec::new(); unqiue_binaries.len()]; - for (file, binary) in filepaths.iter().zip(fp_binaries.iter()) { + for (file, binary) in self.filepaths.iter().zip(fp_binaries.iter()) { unique_binaries_fps [unqiue_binaries.iter().position(|&x| x == binary).unwrap() as usize] .push(file.clone()); } // Create a Vec of Vec where each vec is a unique binary - let deduped_data = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); - let deduped_paths = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); + //let deduped_data = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); + //let deduped_paths = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); info!("Loading the filepaths"); unique_binaries_fps @@ -357,85 +401,26 @@ impl OneHopCGCorpus { } } - //info!("Len Pre Filtering: {:?}", fp_subset.len()); - //info!("Removing any None loads"); subset_loaded_data.retain(|c| c.is_some()); - //info!("Starting to deduplicate the corpus"); + info!("Starting to deduplicate the corpus - {}", idx); let (subset_loaded_data, fp_subset) = Self::dedup_corpus(&mut subset_loaded_data, fp_subset.to_vec()); let subset_loaded_data: Vec> = subset_loaded_data.into_iter().filter_map(|x| x).collect(); - - deduped_data - .lock() - .unwrap() - .insert(idx, subset_loaded_data.clone()); - deduped_paths.lock().unwrap().insert(idx, fp_subset); + info!("Starting to save - {}", idx); + self.save_corpus(subset_loaded_data, fp_subset); + info!("File processing complete - {}", idx); }); - info!("File loading complete"); - let deduped_data = Arc::try_unwrap(deduped_data).unwrap().into_inner().unwrap(); - let deduped_paths = Arc::try_unwrap(deduped_paths) - .unwrap() - .into_inner() - .unwrap(); - - let loaded_data = deduped_data.into_iter().flatten().collect(); - let filepaths: Vec = deduped_paths.into_iter().flatten().collect(); - let filepaths = filepaths.iter().map(|x| x.to_string()).collect(); - - info!("Returning One Hop CG Corpus Struct"); - - Ok(OneHopCGCorpus { - loaded_data, - filepaths, - output_path: output_path.to_string(), - }) - } - - fn calculate_hash(t: &T) -> u64 { - let mut s = DefaultHasher::new(); - t.hash(&mut s); - s.finish() } - - // This is very slow O(N)^2 - fn dedup_corpus( - data: &mut Vec>>, - mut filepaths: Vec, - ) -> ( - Vec>>, - Vec, + pub fn save_corpus( + &self, + subset_loaded_data: Vec>, + fp_subset: Vec, ) { - //info!("Creating the removal index"); - - let mut seen = HashSet::new(); - let mut indices_to_remove = Vec::new(); - for (i, data_ele) in data.iter_mut().enumerate() { - let hash_value = Self::calculate_hash(&data_ele); - - if seen.contains(&hash_value) { - indices_to_remove.push(i) - } else { - seen.insert(hash_value); - } - } - //info!("Starting the duplicate removal!"); - for ele in indices_to_remove.iter().rev() { - data.remove(*ele); - filepaths.remove(ele.clone()); - } - return (data.to_vec(), filepaths); - } - - pub fn save_corpus(self) { - info!("Saving Deduplicated files..."); - //for (data_ele, filepath) in self.loaded_data.par_iter().zip(self.filepaths.par_iter()) { - // need last two bits - self.loaded_data - .par_iter() - .zip(self.filepaths.par_iter()) - .progress() + subset_loaded_data + .iter() + .zip(fp_subset.iter()) .for_each(|(data_ele, filepath)| { let fixed_path: Vec<_> = Path::new(filepath) .components() @@ -453,13 +438,11 @@ impl OneHopCGCorpus { fs::create_dir_all(&dirs).expect("Failed to create output directory!"); let fixed_path = format!("{}/{}", dirs, fixed_path[1]); - debug!("Path: {:?}", fixed_path); serde_json::to_writer( &File::create(fixed_path).expect("Failed to create writer"), &data_ele, ) .expect("Unable to write JSON"); }); - info!("All files saved!"); } } diff --git a/src/main.rs b/src/main.rs index 53a938c..69e7807 100644 --- a/src/main.rs +++ b/src/main.rs @@ -890,12 +890,7 @@ fn main() { if Path::new(filename).exists() { info!("Starting duplication process for One Hop Call Graphs"); let corpus = OneHopCGCorpus::new(filename, output_path).unwrap(); - println!( - "{:?} - {:?}", - &corpus.filepaths.len(), - &corpus.loaded_data.len() - ); - corpus.save_corpus() + corpus.process_corpus(); } else { error!("Filename provided does not exist! - {}", filename) } From 9438f7caa56fcbb53b4ec6bcb1cc1789f280cfc1 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Wed, 29 Nov 2023 14:05:32 +0000 Subject: [PATCH 17/20] tidying up comments/logging --- src/dedup.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/dedup.rs b/src/dedup.rs index e9ce20f..fd0a2ca 100644 --- a/src/dedup.rs +++ b/src/dedup.rs @@ -327,7 +327,7 @@ impl OneHopCGCorpus { Vec>>, Vec, ) { - //info!("Creating the removal index"); + debug!("Creating the removal index"); let mut seen = HashSet::new(); let mut indices_to_remove = Vec::new(); @@ -340,7 +340,7 @@ impl OneHopCGCorpus { seen.insert(hash_value); } } - //info!("Starting the duplicate removal!"); + debug!("Starting the duplicate removal!"); for ele in indices_to_remove.iter().rev() { data.remove(*ele); filepaths.remove(ele.clone()); @@ -374,10 +374,6 @@ impl OneHopCGCorpus { .push(file.clone()); } - // Create a Vec of Vec where each vec is a unique binary - //let deduped_data = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); - //let deduped_paths = Arc::new(Mutex::new(vec![Vec::new(); unqiue_binaries.len()])); - info!("Loading the filepaths"); unique_binaries_fps .par_iter() From 022547198b4dca4d60bf51de6d5b5582993dba07 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Wed, 29 Nov 2023 14:06:18 +0000 Subject: [PATCH 18/20] applying clippy suggestions --- src/dedup.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/dedup.rs b/src/dedup.rs index fd0a2ca..78e5202 100644 --- a/src/dedup.rs +++ b/src/dedup.rs @@ -14,7 +14,7 @@ use std::hash::{Hash, Hasher}; use std::path::Path; use std::string::String; -use std::sync::{Arc, Mutex}; + use std::{fs, vec}; use walkdir::{DirEntry, WalkDir}; @@ -343,9 +343,9 @@ impl OneHopCGCorpus { debug!("Starting the duplicate removal!"); for ele in indices_to_remove.iter().rev() { data.remove(*ele); - filepaths.remove(ele.clone()); + filepaths.remove(*ele); } - return (data.to_vec(), filepaths); + (data.to_vec(), filepaths) } pub fn process_corpus(self) { @@ -356,7 +356,7 @@ impl OneHopCGCorpus { let binary_intermediate = Path::new(file).parent().unwrap().file_name().unwrap(); let binary = binary_intermediate .to_string_lossy() - .split("_") + .split('_') .nth(1) .unwrap() .to_string(); @@ -370,7 +370,7 @@ impl OneHopCGCorpus { for (file, binary) in self.filepaths.iter().zip(fp_binaries.iter()) { unique_binaries_fps - [unqiue_binaries.iter().position(|&x| x == binary).unwrap() as usize] + [unqiue_binaries.iter().position(|&x| x == binary).unwrap()] .push(file.clone()); } @@ -384,7 +384,7 @@ impl OneHopCGCorpus { for ele in fp_subset.iter() { let data = - read_to_string(&ele).expect(&format!("Unable to read file - {:?}", ele)); + read_to_string(ele).expect(&format!("Unable to read file - {:?}", ele)); let json: NetworkxDiGraph = serde_json::from_str(&data) @@ -403,7 +403,7 @@ impl OneHopCGCorpus { let (subset_loaded_data, fp_subset) = Self::dedup_corpus(&mut subset_loaded_data, fp_subset.to_vec()); let subset_loaded_data: Vec> = - subset_loaded_data.into_iter().filter_map(|x| x).collect(); + subset_loaded_data.into_iter().flatten().collect(); info!("Starting to save - {}", idx); self.save_corpus(subset_loaded_data, fp_subset); info!("File processing complete - {}", idx); From fa6294a4f9fafcd1b060808f786357e120071a53 Mon Sep 17 00:00:00 2001 From: Br0kej Date: Wed, 29 Nov 2023 14:07:28 +0000 Subject: [PATCH 19/20] fixing --- src/bb.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bb.rs b/src/bb.rs index 1e0618e..77d367d 100644 --- a/src/bb.rs +++ b/src/bb.rs @@ -98,7 +98,7 @@ pub struct ACFJBlock { } impl FeatureType { - // Returns the correpsonding feature map given a provided FeatureType + // Returns the corresponding feature map given a provided FeatureType // These feature maps are used to provide the functionality that handles // writing the output graphs to Networkx compatible JSON with // node attribute names. @@ -115,7 +115,7 @@ impl FeatureType { impl ACFJBlock { // Generates integer encodings of a basic blocks ESIL instructions // - // This is to provide a means of conduting the extraction and tokenisation + // This is to provide a means of conducting the extraction and tokenisation // of ESIL instructions within Rust but then load the features in Python // to conduct inference and convert them into embeddings. #[cfg(feature = "inference")] @@ -183,7 +183,7 @@ impl ACFJBlock { // Generates the features from the Gemini paper // - // Setting reduced = True is equalivant of generating the basic block + // Setting reduced = True is equivalent of generating the basic block // features from the DISCOVRE paper (Eshweiler et al (2016)) // // Note: The Betweenness feature used in Gemini is calculated down stream using From fdc35ec61a9a8de67306f9af8f2a8ea057f39a4a Mon Sep 17 00:00:00 2001 From: Br0kej Date: Wed, 6 Dec 2023 15:47:55 +0000 Subject: [PATCH 20/20] [tweak] updating warning when using dedup that it currently only supports Cisco Talos naming convetion --- src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 69e7807..dcbf2e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -877,7 +877,7 @@ fn main() { just_hash_value, } => { if datatype == "esilfstr" { - warn!("THIS ONLY SUPPORTS FILES WITH THE FOLLOWING NAMING CONVENTION: ---_-.json"); + warn!("This only supports the Cisco Talos Binary Sim Dataset naming convention"); rayon::ThreadPoolBuilder::new() .num_threads(*num_threads) .build_global() @@ -887,6 +887,7 @@ fn main() { corpus.dedup_subset(name, *print_stats, *just_stats, *just_hash_value) }); } else if datatype == "onehopcgs" { + warn!("This only supports the Cisco Talos Binary Sim Dataset naming convention"); if Path::new(filename).exists() { info!("Starting duplication process for One Hop Call Graphs"); let corpus = OneHopCGCorpus::new(filename, output_path).unwrap();