diff --git a/Cargo.toml b/Cargo.toml index 27d882b..f6cc545 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bin2ml" -version = "0.2.4" +version = "0.2.6" edition = "2021" [dependencies] @@ -26,6 +26,7 @@ log = "0.4.19" env_logger = "0.10.0" thiserror = "1.0.47" enum-as-inner = "0.6.0" +ordered-float = { version = "4.2.0", features = ["serde"] } [dependencies.petgraph] version = "0.6.2" features = ["serde-1"] diff --git a/src/afij.rs b/src/afij.rs index cc8e4ca..18c6e43 100644 --- a/src/afij.rs +++ b/src/afij.rs @@ -7,7 +7,7 @@ use serde_json::Value; pub struct AFIJFunctionInfo { pub offset: i64, pub name: String, - pub size: i64, + pub size: i128, #[serde(rename = "is-pure")] pub is_pure: String, pub realsz: i64, @@ -26,8 +26,8 @@ pub struct AFIJFunctionInfo { pub edges: i64, pub ebbs: i64, pub signature: String, - pub minbound: i64, - pub maxbound: i64, + pub minbound: u64, + pub maxbound: i128, pub callrefs: Option>, // TODO: Need to fix this and change to string instead of i64 to get round large random numbers pub datarefs: Option>, diff --git a/src/agcj.rs b/src/agcj.rs index 3eea8ae..0c24c8e 100644 --- a/src/agcj.rs +++ b/src/agcj.rs @@ -1,5 +1,7 @@ use crate::files::AGCJFile; -use crate::networkx::{CallGraphFuncNameNode, CallGraphFuncWithMetadata, NetworkxDiGraph}; +use crate::networkx::{ + CallGraphFuncNameNode, CallGraphFuncWithMetadata, CallGraphTikNibFeatures, NetworkxDiGraph, +}; use crate::utils::{check_or_create_dir, get_save_file_path}; use itertools::Itertools; use petgraph::prelude::Graph; @@ -23,25 +25,41 @@ pub struct AGCJParsedObjects { } impl AGCJFunctionCallGraphs { - fn build_local_call_graph(&self) -> Graph { - let mut graph = Graph::::new(); - let calling_func = graph.add_node(self.name.clone()); - if self.imports.is_some() { - for ele in self.imports.as_ref().unwrap().iter() { - let callee = graph.add_node(ele.clone()); - graph.update_edge(calling_func, callee, 0); - } - graph - } else { - graph + fn graph_to_json_func_node( + &self, + binary_name: &str, + output_path: &String, + networkx_graph: NetworkxDiGraph, + type_suffix: &str, + ) { + let full_output_path = + get_save_file_path(binary_name, output_path, Some(type_suffix.to_string())); + check_or_create_dir(&full_output_path); + + let mut function_name = self.name.clone(); + + // This is a pretty dirty fix and may break things + if function_name.chars().count() > 100 { + function_name = self.name[..75].to_string(); } + + let filename = format!( + "{}/{}-{}.json", + full_output_path, function_name, type_suffix + ); + + serde_json::to_writer( + &File::create(filename).expect("Failed to create writer"), + &networkx_graph, + ) + .expect("Unable to write JSON"); } - fn graph_to_json_func_node( + fn graph_to_json_func_metadata_tiknib( &self, binary_name: &str, output_path: &String, - networkx_graph: NetworkxDiGraph, + networkx_graph: NetworkxDiGraph, type_suffix: &str, ) { let full_output_path = @@ -67,7 +85,7 @@ impl AGCJFunctionCallGraphs { .expect("Unable to write JSON"); } - fn graph_to_json_func_metadata( + fn graph_to_json_func_metadata_finfo( &self, binary_name: &str, output_path: &String, @@ -97,9 +115,39 @@ impl AGCJFunctionCallGraphs { .expect("Unable to write JSON"); } - fn get_callees_of_callees(&self, global_cg: &AGCJFile, graph: &mut Graph) { + fn build_local_call_graph(&self, include_unk: &bool) -> Graph { + let mut graph = Graph::::new(); + let calling_func = graph.add_node(self.name.clone()); if self.imports.is_some() { + for ele in self.imports.as_ref().unwrap().iter() { + if !include_unk { + if !ele.starts_with("unk.") { + let callee = graph.add_node(ele.clone()); + graph.update_edge(calling_func, callee, 0); + } + } else { + let callee = graph.add_node(ele.clone()); + graph.update_edge(calling_func, callee, 0); + } + } + graph + } else { + graph + } + } + + fn get_callees_of_callees( + &self, + global_cg: &AGCJFile, + graph: &mut Graph, + include_unk: &bool, + ) { + trace!("Starting getting callees of callees for: {:?}", self.name); + trace!("Graph: {:?}", graph); + if self.imports.is_some() { + trace!("Imports: {:?}", self.imports); for import in self.imports.as_ref().unwrap().iter() { + trace! {"Starting to Process {:?}", import}; let import_object: &Vec<&AGCJFunctionCallGraphs> = &global_cg .function_call_graphs .as_ref() @@ -108,13 +156,16 @@ impl AGCJFunctionCallGraphs { .filter(|cg| cg.name == *import) .collect_vec(); if !import_object.is_empty() { + trace!("Import Object: {:?}", import_object); for entry in import_object { - for ele in entry.imports.as_ref().unwrap().iter() { - let callee = graph.add_node(ele.clone()); - let import_node_index = - graph.node_indices().find(|i| &graph[*i] == import).unwrap(); - trace!("{:?} -> {:?}", import, ele); - graph.update_edge(import_node_index, callee, 0); + for importee in entry.imports.as_ref().unwrap().iter() { + if !include_unk { + if !importee.starts_with("unk.") { + self.process_callee(graph, import, importee) + } + } else { + self.process_callee(graph, import, importee) + } } } } @@ -122,7 +173,33 @@ impl AGCJFunctionCallGraphs { } } - fn get_target_func_callers(&self, global_cg: &AGCJFile, graph: &mut Graph) { + fn process_callee(&self, graph: &mut Graph, import: &String, importee: &String) { + let import_node_index = graph.node_indices().find(|i| &graph[*i] == import).unwrap(); + let importee_node_index = graph.node_indices().find(|i| &graph[*i] == importee); + + if let Some(importee_node_index_value) = importee_node_index { + trace!( + "Importee Present - Import -> Ele: {:?} -> {:?}", + import, + importee + ); + graph.update_edge(import_node_index, importee_node_index_value, 0); + } else { + let importee_node_index = graph.add_node(importee.clone()); + trace!( + "Importee Not Present - Import -> Ele: {:?} -> {:?}", + import, + importee + ); + graph.update_edge(import_node_index, importee_node_index, 0); + } + } + fn get_target_func_callers( + &self, + global_cg: &AGCJFile, + graph: &mut Graph, + include_unk: &bool, + ) { let callers = &global_cg .function_call_graphs .as_ref() @@ -133,8 +210,15 @@ impl AGCJFunctionCallGraphs { for cg in callers.iter() { let caller = graph.add_node(cg.name.clone()); - let func_target_index = graph.node_indices().find(|i| graph[*i] == self.name); - graph.update_edge(caller, func_target_index.unwrap(), 0); + if !include_unk { + if !cg.name.starts_with("unk.") { + let func_target_index = graph.node_indices().find(|i| graph[*i] == self.name); + graph.update_edge(caller, func_target_index.unwrap(), 0); + } + } else { + let func_target_index = graph.node_indices().find(|i| graph[*i] == self.name); + graph.update_edge(caller, func_target_index.unwrap(), 0); + } } } @@ -145,14 +229,18 @@ impl AGCJFunctionCallGraphs { output_path: &String, binary_name: &str, with_metadata: &bool, + include_unk: &bool, + node_feature_type: Option, ) { - let graph = self.build_local_call_graph(); + let graph = self.build_local_call_graph(include_unk); + debug!("{:?}", graph); self.convert_graph_to_networkx( graph, global_cg, binary_name, output_path, with_metadata, + node_feature_type, "cg", ) } @@ -165,16 +253,19 @@ impl AGCJFunctionCallGraphs { output_path: &String, binary_name: &str, with_metadata: &bool, + include_unk: &bool, + node_feature_type: Option, ) { - let mut graph = self.build_local_call_graph(); - - self.get_callees_of_callees(global_cg, &mut graph); + let mut graph = self.build_local_call_graph(include_unk); + self.get_callees_of_callees(global_cg, &mut graph, include_unk); + debug!("{:?}", graph); self.convert_graph_to_networkx( graph, global_cg, binary_name, output_path, with_metadata, + node_feature_type, "onehopcg", ) } @@ -185,15 +276,19 @@ impl AGCJFunctionCallGraphs { output_path: &String, binary_name: &str, with_metadata: &bool, + include_unk: &bool, + node_feature_type: Option, ) { - let mut graph = self.build_local_call_graph(); - self.get_target_func_callers(global_cg, &mut graph); + let mut graph = self.build_local_call_graph(include_unk); + self.get_target_func_callers(global_cg, &mut graph, include_unk); + debug!("{:?}", graph); self.convert_graph_to_networkx( graph, global_cg, binary_name, output_path, with_metadata, + node_feature_type, "cgcallers", ); } @@ -204,17 +299,21 @@ impl AGCJFunctionCallGraphs { output_path: &String, binary_name: &str, with_metadata: &bool, + include_unk: &bool, + node_feature_type: Option, ) { - let mut graph = self.build_local_call_graph(); + let mut graph = self.build_local_call_graph(include_unk); - self.get_target_func_callers(global_cg, &mut graph); - self.get_callees_of_callees(global_cg, &mut graph); + self.get_target_func_callers(global_cg, &mut graph, include_unk); + self.get_callees_of_callees(global_cg, &mut graph, include_unk); + debug!("{:?}", graph); self.convert_graph_to_networkx( graph, global_cg, binary_name, output_path, with_metadata, + node_feature_type, "onehopcgcallers", ); } @@ -223,6 +322,7 @@ impl AGCJFunctionCallGraphs { println!("{:?}", self.imports) } + #[allow(clippy::too_many_arguments)] fn convert_graph_to_networkx( &self, graph: Graph, @@ -230,21 +330,167 @@ impl AGCJFunctionCallGraphs { binary_name: &str, output_path: &String, with_metadata: &bool, + node_feature_type: Option, type_suffix: &str, ) { - if *with_metadata { - let type_suffix = type_suffix.to_owned() + "-meta"; - let networkx_graph = - NetworkxDiGraph::from((graph, global_cg.function_metadata.as_ref().unwrap())); - self.graph_to_json_func_metadata( - binary_name, - output_path, - networkx_graph, - type_suffix.as_str(), - ) + // TODO: It look likes in downstream datasets, there are cases where graphs with a single node + // can make it through and dont't play very well with the loading in PyG. + // Need to devise a plan to format these correctly so they can still be loaded! + // One option may be to include a self loop - Or probably better, just bounce em' + if *with_metadata & node_feature_type.is_some() { + if node_feature_type.as_ref().unwrap() == "finfo" { + let type_suffix = type_suffix.to_owned() + "-meta"; + let networkx_graph = NetworkxDiGraph::from(( + graph, + global_cg + .function_metadata + .as_ref() + .unwrap() + .as_afij() + .unwrap(), + )); + self.graph_to_json_func_metadata_finfo( + binary_name, + output_path, + networkx_graph, + type_suffix.as_str(), + ) + } else if node_feature_type.as_ref().unwrap() == "tiknib" { + let type_suffix = type_suffix.to_owned() + "-tiknib"; + let networkx_graph: NetworkxDiGraph = + NetworkxDiGraph::from(( + graph, + global_cg + .function_metadata + .as_ref() + .unwrap() + .as_agfj() + .unwrap(), + )); + self.graph_to_json_func_metadata_tiknib( + binary_name, + output_path, + networkx_graph, + type_suffix.as_str(), + ) + } } else { let networkx_graph = NetworkxDiGraph::from(graph); self.graph_to_json_func_node(binary_name, output_path, networkx_graph, type_suffix) }; } } + +#[cfg(test)] +mod tests { + use crate::files::AGCJFile; + use env_logger; + + fn return_test_file_oject() -> AGCJFile { + let mut call_graph_file = AGCJFile { + filename: "test-files/ls_cg.json".to_string(), + function_call_graphs: None, + output_path: "".to_string(), + function_metadata: None, + include_unk: false, + }; + + call_graph_file + .load_and_deserialize() + .expect("Failed to load data"); + call_graph_file + } + #[test] + fn test_function_call_graph_without_unks() { + let mut call_graph_file = return_test_file_oject(); + + // Get main function - No Unks + let raw_call_graph_data = &call_graph_file.function_call_graphs.clone().unwrap()[0]; + assert_eq!(raw_call_graph_data.name, "main".to_string()); + let local_call_graph = raw_call_graph_data.build_local_call_graph(&true); + assert_eq!(local_call_graph.node_count(), 20); + assert_eq!(local_call_graph.edge_count(), 19); + let local_call_graph = raw_call_graph_data.build_local_call_graph(&false); + assert_eq!(local_call_graph.node_count(), 20); + assert_eq!(local_call_graph.edge_count(), 19); + } + + #[test] + fn test_function_call_graph_with_callees_without_unks() { + let mut call_graph_file = return_test_file_oject(); + + // Unk False + let raw_call_graph_data = &call_graph_file.function_call_graphs.clone().unwrap()[0]; + assert_eq!(raw_call_graph_data.name, "main".to_string()); + let mut local_call_graph = raw_call_graph_data.build_local_call_graph(&true); + raw_call_graph_data.get_callees_of_callees(&call_graph_file, &mut local_call_graph, &true); + assert_eq!(local_call_graph.node_count(), 37); + assert_eq!(local_call_graph.edge_count(), 39); + + // Unk True + let mut local_call_graph = raw_call_graph_data.build_local_call_graph(&false); + raw_call_graph_data.get_callees_of_callees(&call_graph_file, &mut local_call_graph, &false); + assert_eq!(local_call_graph.node_count(), 37); + assert_eq!(local_call_graph.edge_count(), 39); + } + + #[test] + fn test_function_call_graph_with_unks() { + let call_graph_file = return_test_file_oject(); + + // sym.func.100004d11 - One unknown + let raw_call_graph_data = &call_graph_file.function_call_graphs.unwrap()[2]; + assert_eq!(raw_call_graph_data.name, "sym.func.100004d11".to_string()); + + let local_call_graph = raw_call_graph_data.build_local_call_graph(&true); + assert_eq!(local_call_graph.node_count(), 26); + assert_eq!(local_call_graph.edge_count(), 25); + let local_call_graph = raw_call_graph_data.build_local_call_graph(&false); + assert_eq!(local_call_graph.node_count(), 25); + assert_eq!(local_call_graph.edge_count(), 24); + } + + #[test] + fn test_function_call_graph_with_callees_with_unks() { + let mut call_graph_file = return_test_file_oject(); + + // sym.func.100004d11 - One unknown + let raw_call_graph_data = &call_graph_file.function_call_graphs.clone().unwrap()[2]; + assert_eq!(raw_call_graph_data.name, "sym.func.100004d11".to_string()); + + let mut local_call_graph = raw_call_graph_data.build_local_call_graph(&true); + raw_call_graph_data.get_callees_of_callees(&call_graph_file, &mut local_call_graph, &true); + assert_eq!(local_call_graph.node_count(), 31); + assert_eq!(local_call_graph.edge_count(), 33); + + let mut local_call_graph = raw_call_graph_data.build_local_call_graph(&false); + raw_call_graph_data.get_callees_of_callees(&call_graph_file, &mut local_call_graph, &false); + assert_eq!(local_call_graph.node_count(), 30); + assert_eq!(local_call_graph.edge_count(), 32); + } + + #[test] + fn test_function_call_graph_callees_and_callers_with_unks() { + let mut call_graph_file = return_test_file_oject(); + + // sym.func.100004d11 - One unknown + let raw_call_graph_data = &call_graph_file.function_call_graphs.clone().unwrap()[2]; + assert_eq!(raw_call_graph_data.name, "sym.func.100004d11".to_string()); + + let mut local_call_graph = raw_call_graph_data.build_local_call_graph(&true); + raw_call_graph_data.get_callees_of_callees(&call_graph_file, &mut local_call_graph, &true); + raw_call_graph_data.get_target_func_callers(&call_graph_file, &mut local_call_graph, &true); + assert_eq!(local_call_graph.node_count(), 32); + assert_eq!(local_call_graph.edge_count(), 34); + + let mut local_call_graph = raw_call_graph_data.build_local_call_graph(&false); + raw_call_graph_data.get_callees_of_callees(&call_graph_file, &mut local_call_graph, &false); + raw_call_graph_data.get_target_func_callers( + &call_graph_file, + &mut local_call_graph, + &false, + ); + assert_eq!(local_call_graph.node_count(), 31); + assert_eq!(local_call_graph.edge_count(), 33); + } +} diff --git a/src/agfj.rs b/src/agfj.rs index 097fa3d..83c52aa 100644 --- a/src/agfj.rs +++ b/src/agfj.rs @@ -1,9 +1,10 @@ -use crate::bb::{ACFJBlock, FeatureType}; +use crate::bb::{ACFJBlock, FeatureType, TikNibFeaturesBB}; #[cfg(feature = "inference")] use crate::inference::InferenceJob; use crate::networkx::{DGISNode, DiscovreNode, GeminiNode, NetworkxDiGraph, NodeType}; -use crate::utils::{check_or_create_dir, get_save_file_path}; +use crate::utils::{average, check_or_create_dir, get_save_file_path}; use itertools::Itertools; +use ordered_float::OrderedFloat; use petgraph::prelude::Graph; use petgraph::visit::Dfs; use serde::{Deserialize, Serialize}; @@ -30,7 +31,7 @@ struct EdgePair { #[derive(Serialize, Deserialize, Debug)] pub struct AGFJFunc { - name: String, + pub name: String, nargs: u64, ninstr: u64, nlocals: u64, @@ -421,6 +422,120 @@ impl AGFJFunc { graph[idx] = format!("{hex:#x} / {hex}"); } } + + pub fn generate_tiknib_cfg_features(&self, architecture: &String) -> TikNibFunc { + let mut basic_block_features = Vec::new(); + + for block in &self.blocks { + let feats = block.get_tiknib_features(architecture); + basic_block_features.push(feats) + } + + TikNibFunc::from((&self.name, basic_block_features)) + } +} + +#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)] +pub struct TikNibFunc { + pub name: String, + pub features: TikNibFuncFeatures, +} + +impl Default for TikNibFunc { + fn default() -> Self { + TikNibFunc { + name: "default".to_string(), + features: TikNibFuncFeatures::default(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)] +pub struct TikNibFuncFeatures { + // Averages + pub avg_arithshift: OrderedFloat, + pub avg_compare: OrderedFloat, + pub avg_ctransfer: OrderedFloat, + pub avg_ctransfercond: OrderedFloat, + pub avg_dtransfer: OrderedFloat, + pub avg_float: OrderedFloat, + pub avg_total: OrderedFloat, + // Sum + pub sum_arithshift: OrderedFloat, + pub sum_compare: OrderedFloat, + pub sum_ctransfer: OrderedFloat, + pub sum_ctransfercond: OrderedFloat, + pub sum_dtransfer: OrderedFloat, + pub sum_float: OrderedFloat, + pub sum_total: OrderedFloat, +} + +impl Default for TikNibFuncFeatures { + fn default() -> Self { + TikNibFuncFeatures { + avg_arithshift: OrderedFloat(0.0), + avg_compare: OrderedFloat(0.0), + avg_ctransfer: OrderedFloat(0.0), + avg_ctransfercond: OrderedFloat(0.0), + avg_dtransfer: OrderedFloat(0.0), + avg_float: OrderedFloat(0.0), + avg_total: OrderedFloat(0.0), + sum_arithshift: OrderedFloat(0.0), + sum_compare: OrderedFloat(0.0), + sum_ctransfer: OrderedFloat(0.0), + sum_ctransfercond: OrderedFloat(0.0), + sum_dtransfer: OrderedFloat(0.0), + sum_float: OrderedFloat(0.0), + sum_total: OrderedFloat(0.0), + } + } +} + +// This is a bit odd but is to make sure the JSON output is formatted nice! +impl From<(&String, Vec)> for TikNibFunc { + fn from(input: (&String, Vec)) -> Self { + TikNibFunc { + name: input.0.to_string(), + features: TikNibFuncFeatures { + avg_arithshift: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.arithshift).collect(), + )), + avg_compare: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.arithshift).collect(), + )), + avg_ctransfer: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.ctransfer).collect(), + )), + avg_ctransfercond: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.ctransfercond).collect(), + )), + avg_dtransfer: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.dtransfer).collect(), + )), + avg_float: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.float).collect(), + )), + avg_total: OrderedFloat::from(average( + input.1.iter().map(|ele| ele.total).collect(), + )), + sum_arithshift: OrderedFloat::from( + input.1.iter().map(|ele| ele.arithshift).sum::(), + ), + sum_compare: OrderedFloat::from(input.1.iter().map(|ele| ele.compare).sum::()), + sum_ctransfer: OrderedFloat::from( + input.1.iter().map(|ele| ele.ctransfer).sum::(), + ), + sum_ctransfercond: OrderedFloat::from( + input.1.iter().map(|ele| ele.ctransfercond).sum::(), + ), + sum_dtransfer: OrderedFloat::from( + input.1.iter().map(|ele| ele.dtransfer).sum::(), + ), + sum_float: OrderedFloat::from(input.1.iter().map(|ele| ele.float).sum::()), + sum_total: OrderedFloat::from(input.1.iter().map(|ele| ele.total).sum::()), + }, + } + } } #[cfg(test)] diff --git a/src/bb.rs b/src/bb.rs index 77d367d..271bd5d 100644 --- a/src/bb.rs +++ b/src/bb.rs @@ -18,6 +18,7 @@ pub enum FeatureType { Gemini, DiscovRE, DGIS, + Tiknib, ModelEmbedded, Encoded, Invalid, @@ -97,6 +98,18 @@ pub struct ACFJBlock { pub switchop: Option, } +// Data Transfer + Misc have been removed. +// Paper shows its a weak feature +pub struct TikNibFeaturesBB { + pub arithshift: f32, + pub compare: f32, + pub ctransfer: f32, + pub ctransfercond: f32, + pub dtransfer: f32, + pub float: f32, + pub total: f32, +} + impl FeatureType { // Returns the corresponding feature map given a provided FeatureType // These feature maps are used to provide the functionality that handles @@ -477,6 +490,139 @@ impl ACFJBlock { n_ins } + + pub fn get_tiknib_features(&self, architecture: &String) -> TikNibFeaturesBB { + let mut features = TikNibFeaturesBB { + arithshift: 0.0, + compare: 0.0, + ctransfer: 0.0, + ctransfercond: 0.0, + dtransfer: 0.0, + float: 0.0, + total: 0.0, + }; + + for ins in self.ops.iter() { + if ins.r#type != "invalid" { + let opcode = ins + .opcode + .as_ref() + .unwrap() + .split_whitespace() + .next() + .unwrap(); + if architecture == "ARM" { + // Arith + Shifts + if ARM_GRP_ARITH.contains(&opcode) || ARM_GRP_SHIFT.contains(&opcode) { + features.arithshift += 1.0 + } + // Compare + if ARM_GRP_CMP.contains(&opcode) || ARM_GRP_FLOAT_CMP.contains(&opcode) { + features.compare += 1.0 + } + // Call Transfer + if ARM_GRP_CTRANSFER.contains(&opcode) { + features.ctransfer += 1.0 + } + // Call Transfer + Cond + if ARM_GRP_CTRANSFER.contains(&opcode) + || ARM_GRP_COND_CTRANSFER.contains(&opcode) + { + features.ctransfercond += 1.0 + } + // Data Transfer + if ARM_GRP_DTRANSFER.contains(&opcode) + || ARM_GRP_FLOAT_DTRANSFER.contains(&opcode) + { + features.dtransfer += 1.0 + } + + // FLoat Operations + if ARM_GRP_FLOAT_DTRANSFER.contains(&opcode) + || ARM_GRP_FLOAT_CMP.contains(&opcode) + || ARM_GRP_FLOAT_ARITH.contains(&opcode) + { + features.float += 1.0 + } + // total + features.total += 1.0 + } else if architecture == "MIPS" { + // Arith + Shifts + if MIPS_GRP_ARITH.contains(&opcode) || MIPS_GRP_SHIFT.contains(&opcode) { + features.arithshift += 1.0 + } + // Compare + if MIPS_GRP_CMP.contains(&opcode) || MIPS_GRP_FLOAT_CMP.contains(&opcode) { + features.compare += 1.0 + } + // Call Transfer + if MIPS_GRP_CTRANSFER.contains(&opcode) { + features.ctransfer += 1.0 + } + // Call Transfer + Cond + if MIPS_GRP_CTRANSFER.contains(&opcode) + || MIPS_GRP_COND_CTRANSFER.contains(&opcode) + { + features.ctransfercond += 1.0 + } + // Data Transfer + if MIPS_GRP_DTRANSFER.contains(&opcode) + || MIPS_GRP_FLOAT_DTRANSFER.contains(&opcode) + { + features.dtransfer += 1.0 + } + + // FLoat Operations + if MIPS_GRP_FLOAT_DTRANSFER.contains(&opcode) + || MIPS_GRP_FLOAT_CMP.contains(&opcode) + || MIPS_GRP_FLOAT_ARITH.contains(&opcode) + { + features.float += 1.0 + } + // total + features.total += 1.0 + } else if architecture == "X86" { + // Arith + Shifts + if X86_GRP_ARITH.contains(&opcode) || X86_GRP_SHIFT.contains(&opcode) { + features.arithshift += 1.0 + } + // Compare + if X86_GRP_CMP.contains(&opcode) || X86_GRP_FLOAT_CMP.contains(&opcode) { + features.compare += 1.0 + } + // Call Transfer + if X86_GRP_CTRANSFER.contains(&opcode) { + features.ctransfer += 1.0 + } + // Call Transfer + Cond + if X86_GRP_CTRANSFER.contains(&opcode) + || X86_GRP_COND_CTRANSFER.contains(&opcode) + { + features.ctransfercond += 1.0 + } + // Data Transfer + if X86_GRP_DTRANSFER.contains(&opcode) + || X86_GRP_FLOAT_DTRANSFER.contains(&opcode) + { + features.dtransfer += 1.0 + } + + // FLoat Operations + if X86_GRP_FLOAT_DTRANSFER.contains(&opcode) + || X86_GRP_FLOAT_CMP.contains(&opcode) + || X86_GRP_FLOAT_ARITH.contains(&opcode) + { + features.float += 1.0 + } + // total + features.total += 1.0 + } else { + unreachable!("The architecture provided is not possible.") + } + } + } + features + } } mod tests { diff --git a/src/consts.rs b/src/consts.rs index 607c88c..575d211 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -2611,3 +2611,1097 @@ pub const MIPS_CALL: [&str; 8] = [ ]; pub const MIPS_COMPARE: [&str; 4] = ["slt", "sltu", "slti", "sltiu"]; + +// TikNib Instruction Categories +// Shamlessly taken from https://github.com/SoftSec-KAIST/TikNib/blob/bb8d3f33808d4cbe8128d52e252525ebd6f05c3e/tiknib/feature/asm_const.py +// I think all of these have been derived from Capstone some how - Something to look at another day +pub const X86_GRP_DTRANSFER: [&str; 147] = [ + // general purpose instructions + "cmov", + "cmova", + "cmovae", + "cmovb", + "cmovbe", + "cmovc", + "cmove", + "cmovg", + "cmovge", + "cmovl", + "cmovle", + "cmovna", + "cmovnae", + "cmovnb", + "cmovnbe", + "cmovnc", + "cmovne", + "cmovng", + "cmovnge", + "cmovnl", + "cmovnle", + "cmovno", + "cmovnp", + "cmovns", + "cmovnz", + "cmovo", + "cmovp", + "cmovpe", + "cmovpo", + "cmovs", + "cmovz", + "bswap", + "xchg", + "xadd", + "cmpxchg", + "cmpxchg8b", + "pop", + "popa", + "popad", + "push", + "pusha", + "pushad", + "cdq", + "cdqe", + "cbw", + "cwd", + "cwde", + "mov", + "movd", + "movq", + "movabs", + "movsx", + "movsxd", + "movzx", + "movzxd", + // string + "movs", + "movsb", + "movsd", + "movsw", + "stos", + "stosb", + "stosd", + "stosw", + "lods", + "lodsb", + "lodsd", + "lodsw", + // segment register + "lds", + "les", + "lfs", + "lgs", + "lss", + // user mode extended + "xsave", + "xsavec", + "xsaveopt", + "xrstor", + "xgetbv", + "xsetbv", + // bmi1, bmi2 + "bextr", + "blsi", + "pdep", + "pext", + // mmx + "packssdw", + "packsswb", + "packusdw", + "packuswb", + "punpckhbw", + "punpckhdq", + "punpckhwd", + "punpcklbw", + "punpckldq", + "punpcklwd", + "emms", + // sse 64-bit integer + "pmovmskb", + "pshufw", + // sse2 128-bit integer + "movdqa", + "movdqu", + "movq2dq", + "movdq2q", + "pshuflw", + "pshufhw", + "pshufd", + "punpcklqdq", + "punpckhqdq", + // ssse2 + "pshufb", + "palignr", + // sse4 + "movntdqa", + "pblendvb", + "pblendw", + "pinsrb", + "pinsrd", + "pinsrq", + "pextrb", + "pextrw", + "pextrd", + "pextrq", + "pmovsxbw", + "pmovzxbw", + "pmovsxbd", + "pmovzxbd", + "pmovsxwd", + "pmovzxwd", + "pmovsxbq", + "pmovzxbq", + "pmovsxwq", + "pmovzxwq", + "pmovsxdq", + "pmovzxdq", + "packusdw", + "lgdt", + "sgdt", + "lldt", + "sldt", + "ltr", + "str", + "lidt", + "sidt", + "mov", + "lmsw", + "smsw", + "clts", + "lsl", + "lar", + "verr", + "verw", + // 64-bit + "cdqe", + "cqo", +]; + +pub const X86_GRP_FLOAT_DTRANSFER: [&str; 126] = [ + // floating point instrutions + "fld", + "fst", + "fstp", + "fild", + "fist", + "fistp", + "fbld", + "fbstp", + "fxch", + "fcmovb", + "fcmovbe", + "fcmove", + "fcmovnb", + "fcmovnbe", + "fcmovne", + "fcmovnu", + "fcmovu", + // floating point load const instructions + "fld1", + "fldz", + "fldpi", + "fldl2e", + "fldln2", + "fldl2t", + "fldlg2", + // fpu register related + "fclex", + "ffree", + "finit", + "fldcw", + "fldenv", + "fnclex", + "fninit", + "fnop", + "fnsave", + "fnstcw", + "fnstenv", + "fnstsw", + "frstor", + "fsave", + "fstcw", + "fstenv", + "fstsw", + // sse + "movaps", + "movups", + "movhps", + "movhlps", + "movlps", + "movlhps", + "movmskps", + "movss", + // sse2 + "movapd", + "movupd", + "movhpd", + "movhlpd", + "movlpd", + "movlhpd", + "movmskpd", + "movsd", + // sse shuffle + "shufps", + "unpckhps", + "unpcklps", + // sse2 shuffle + "shufpd", + "unpckhpd", + "unpcklpd", + // sse conversion + "cvtpi2ps", + "cvtsi2ss", + "cvtps2pi", + "cvttps2pi", + "cvtss2si", + "cvttss2si", + // sse2 conversion + "cvtpd2pi", + "cvttpd2pi", + "cvtpi2pd", + "cvtpd2dq", + "cvttpd2dq", + "cvtdq2pd", + "cvtps2pd", + "cvtpd2ps", + "cvtss2sd", + "cvtsd2ss", + "cvtsd2si", + "cvttsd2si", + "cvtsi2sd", + "cvtdq2ps", + "cvtps2dq", + "cvttps2dq", + // sse mxcsr state + "ldmxcsr", + "stmxcsr", + // sse 64-bit + "pextrw", + "pinsrw", + // sse cache + "maskmovq", + "movntq", + "movntps", + "prefetch", + "sfence", + // sse3 + "fisttp", + "lddqu", + "movshdup", + "movsldup", + "movddup", + // sse4 + "blendpd", + "blendps", + "blendvpd", + "blendvps", + "extractps", + "insertps", + // 16-bit fp + "vcvtps2ph", + "vcvtps2ph", + // vector + "valign", + "vblend", + "vcompress", + "vextract", + "vinsert", + "vmov", + "vfixup", + "vget", + "vexpand", + "vcvt", + "vpblend", + "vpbroad", + "vpcompress", + "vperm", + "vpexpand", + "vpmov", + "vpscatter", + "vscatter", + "vshuf", +]; + +// Miscellaneous Instructions: +pub const X86_GRP_MISC: [&str; 19] = [ + "nop", + "ud", + "ud2", + "lea", + "xlat", + "xlatb", + "cpuid", + "movbe", + "prefetchw", + "prefetchwt1", + "clflush", + "clflushopt", + // sse2 cache + "clflush", + "lfence", + "mfence", + "maskmovdqu", + "movntpd", + "movntdq", + "movnti", +]; + +pub const X86_GRP_ARITH: [&str; 106] = [ + // general purpose binary arithmetic instructions + "adcx", + "adox", + "adc", + "add", + "xadd", + "sub", + "sbb", + "imul", + "mul", + "idiv", + "div", + "inc", + "dec", + "neg", + "cmp", + // decimal arithmetic instructions + "daa", + "das", + "aaa", + "aas", + "aam", + "aad", + // flag + "stc", + "clc", + "cmc", + "cld", + "std", + // bmi1, bmi2 + "mulx", + // mmx + "padd", + "paddb", + "paddw", + "paddd", + "paddsb", + "paddsw", + "paddusb", + "paddusw", + "psub", + "psubb", + "psubw", + "psubd", + "psubsb", + "psubsw", + "psubusb", + "psubusw", + "pmulhw", + "pmullw", + "pmaddwd", + // sse 64bit integer + "pavgb", + "pavgw", + "pmaxub", + "pmaxsb", + "pminub", + "pminsb", + "pmulhuw", + "psadbw", + // sse 128-bit integer + "pmuludq", + "paddq", + "psubq", + // ssse3 + "phaddw", + "phaddsw", + "phaddd", + "phsubw", + "phsubsw", + "phsubd", + "pabsb", + "pabsw", + "pabsd", + "pabsq", + "pmaddubsw", + "pmulhrsw", + "psignb", + "psignw", + "psignd", + // sse4 + "pmulld", + "pmuldq", + "pminuw", + "pminud", + "pminsb", + "pminsd", + "pmaxuw", + "pmaxud", + "pmaxsb", + "pmaxsd", + "roundps", + "roundpd", + "roundss", + "roundsd", + "pmpsadbw", + // aesni + "aesdec", + "aesdeclast", + "aesenc", + "aesenclast", + "aesimc", + "aeskeygenassist", + "pclmulqdq", + // sha1 + "sha1msg1", + "sha1msg2", + "sha1nexte", + "sha1rnds4", + "sha256msg1", + "sha256msg2", + "sha256rnds2", + "crc32", + // bmi1, bmi2 + "blsmsk", + "blsr", + "clac", + "stac", +]; + +pub const X86_GRP_FLOAT_CMP: [&str; 39] = [ + // floating point compare instructions + "fcom", "fcomp", "fcompp", "fucom", "fucomp", "fucompp", "ficom", "ficomp", "fcomi", "fucomi", + "fcomip", "fucomip", "ftst", "fxam", // sse + "cmpps", "cmpeqps", "cmpneqps", "cmpltps", "cmpnltps", "cmpss", "cmpeqss", "cmpneqss", + "cmpltss", "cmpnltss", "comiss", "ucomiss", "cmppd", "cmpeqpd", "cmpneqpd", "cmpltpd", + "cmpnltpd", "cmpsd", "cmpeqsd", "cmpneqsd", "cmpltsd", "cmpnltsd", "comisd", "ucomisd", + // vector + "vpcmp", +]; + +pub const X86_GRP_FLOAT_ARITH: [&str; 87] = [ + // - floating point instructions: + "fadd", "faddp", "fiadd", "fsub", "fsubp", "fisub", "fsubr", "fsubrp", "fisubr", "fmul", + "fmulp", "fimul", "fdiv", "fdivp", "fidiv", "fdivr", "fdivrp", "fidivr", "fprem", "fprem1", + "fabs", "fchs", "frndint", "fscale", "fsqrt", "fxtract", + // floating point transcendental instructions + "fsin", "fcos", "fsincos", "fptan", "fpatan", "f2xm1", "fyl2x", "fyl2xp1", + // fpu register related + "fincstp", "fdecstp", // sse + "addps", "addss", "subps", "subss", "mulps", "mulss", "divps", "divss", "rcpps", "rcpss", + "sqrtps", "sqrtss", "rsqrtps", "rsqrtss", "maxps", "maxss", "minps", "minss", + // sse2 + "addsd", "subsd", "mulsd", "divsd", "rcpsd", "sqrtsd", "rsqrtsd", "maxsd", "minsd", + // sse3 + "addsubps", "addsubpd", "haddps", "hsubps", "haddpd", "hsubpd", // sse4 + "dppd", "dpps", // vector + "vpmax", "vpmin", "vrcp", "vrndscal", "vrsqrt", "vscale", "addpd", "addsd", "mulpd", "mulsd", + "subpd", "subsd", "divpd", "divsd", "rcppd", "rcpsd", +]; + +pub const X86_GRP_CMP: [&str; 25] = [ + "cmp", + "comi", + "clt", + // from dtransfer + "cmpxchg", + "cmpxchg8b", + // from bit + "test", + // from string + "cmps", + "cmpsb", + "cmpsd", + "cmpsw", + // mmx + "pcmpeqb", + "pcmpeqw", + "pcmpeqd", + "pcmpgtb", + "pcmpgtw", + "pcmpgtd", + // sse4 + "phminposuw", + "ptest", + "pcmpeqq", + // sse4.2 + "pcmpestri", + "pcmpestrm", + "pcmpistri", + "pcmpistrm", + "pcmpgtq", + // vector + "vptest", +]; + +// Shift and Rotate Instructions: +pub const X86_GRP_SHIFT: [&str; 29] = [ + // general purpose instructions + "sar", "shr", "sal", "shl", "shrd", "shld", "ror", "rol", "rcr", "rcl", + // bmi1, bmi2 + "rorx", "sarx", "shlx", "shrx", // mmx + "psllw", "pslld", "psllq", "psrlw", "psrld", "psrlq", "psraw", "psrad", + // sse2 128-bit integer + "pslldq", "psrldq", // vector + "vprol", "vpror", "vpsra", "vpsll", "vpsra", +]; + +// Logical Instructions: +pub const X86_GRP_LOGIC: [&str; 18] = [ + // general purpose instructions + "and", "not", "or", "xor", // bmi1, bmi2 + "andn", // mmx + "pand", "pandn", "por", "pxor", // sse + "andps", "andnps", "orps", "xorps", // sse2 + "andpd", "andnpd", "orpd", "xorpd", // vector + "vpterlog", +]; + +// bit and byte instructions: +pub const X86_GRP_BIT: [&str; 46] = [ + // general purpose instructions + "seta", "setae", "setb", "setbe", "setc", "sete", "setg", "setge", "setl", "setle", "setna", + "setnae", "setnb", "setnbe", "setnc", "setne", "setng", "setnge", "setnl", "setnle", "setno", + "setnp", "setns", "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "test", "crc32", + // bmi1, bmi2 + "blsmsk", "blsr", "clac", "stac", // from bit + "test", "bt", "bts", "btr", "btc", "bsf", "bsr", "popcnt", "tzcnt", "lzcnt", +]; + +// control transfer instructions: +pub const X86_GRP_CTRANSFER: [&str; 36] = [ + // general purpose instructions + "jmp", "call", "ret", "iret", "int", "into", "bound", "enter", "leave", // flag + "cli", "sti", // sse2 + "pause", // sse3 + "monitor", "mwait", "xabort", "xacquire", "xrelease", "xbegin", "xend", "xtest", "hlt", + "syscall", "sysenter", "sysexit", "sysret", "fwait", "wait", + // vm related instructions + "vmcall", "vmlaunch", "vmmcall", "vmresume", "vmrun", "vmfunc", "vmclear", "vmxon", "vmxoff", +]; + +pub const X86_GRP_COND_CTRANSFER: [&str; 44] = [ + // general purpose instructions + "ja", + "jae", + "jb", + "jbe", + "jc", + "jcxz", + "je", + "jecxz", + "jrcxz", + "jg", + "jge", + "jl", + "jle", + "jnae", + "jnb", + "jnbe", + "jnc", + "jne", + "jng", + "jnge", + "jnl", + "jnle", + "jno", + "jnp", + "jns", + "jnz", + "jo", + "jp", + "jpe", + "jpo", + "js", + "jz", + "loop", + "loope", + "loopne", + "loopnz", + "loopz", + // string + "rep", + "rep movsq", + "rep stosq", + "repne", + "repnz", + "repe", + "repz", +]; + +// ==================== ARM 32 ============================================= +pub const ARM_GRP_DTRANSFER: [&str; 101] = [ + // general purpose instructions + "lda", "adr", "adrp", "ldr", "ldrd", "ldrb", "ldrbt", "ldrh", "ldrs", "ldrsb", "ldrsbt", + "ldrsh", "ldrsht", "ldrt", "ldrht", "str", "strb", "strd", "strh", "strbt", "strt", "ldm", + "ldmda", "ldmdb", "ldmib", "stm", "stmda", "stmdb", "stmib", "pld", "swp", "mov", "movi", + "movk", "movz", "movt", "movn", "mvn", "mvni", "stp", "ldp", "rfeib", + // coprocessor data operations + "cdp", "mcr", "mcrr", "mrc", "mrr", "ldc", "ldcl", "stc", "stcl", "push", "sbfx", "sbfiz", + "bfx", "bfxil", "ubfx", "ubfiz", "vld", "vst", "vst2", "vstmdb", "vtbl", "vtbx", "zip", "zip1", + "zip2", "uzp", "uzp1", "uzp2", "xtn", "xtn2", "csel", "ld1", "ld2", "ld4", "st1", "st2", "st4", + "ldpsw", "ldrsw", "sxtab", "sxtb", "sxth", "sxtw", "ext", "extr", "ins", "uxtab", "uxtb", + "uxth", "uxtw", "bfc", "bfi", "bic", "clz", "rev", "rev16", "rev32", "rev64", "cset", +]; + +pub const ARM_GRP_FLOAT_DTRANSFER: [&str; 33] = [ + // floating point data transfer instructions + "fcpy", "fcvtms", "fcvtmu", "fcvtzs", "fcvtzu", "fcvt", "fld", "fst", "fmr", "fmd", "fms", + "fmx", "fsito", "fuito", "ftosi", "ftoui", "fmov", "umov", "ldur", "ldurb", "ldurh", "ldursb", + "ldursh", "ldursw", "stur", "sturb", "sturh", "stursb", "stursh", "stursw", "dup", "scvtf", + "ucvtf", +]; + +pub const ARM_GRP_MISC: [&str; 13] = [ + "udf", "nop", "mrs", "msr", "mar", "mra", "vmrs", "vmsr", "dbg", "dmb", "dsb", "isb", "setend", +]; + +// binary arithmetic instructions: +pub const ARM_GRP_ARITH: [&str; 106] = [ + // general purpose instructions + "add", "addw", "addp", "addv", "adc", "sub", "sbc", "rsb", "rsc", "cmn", "clz", "mul", "mla", + "mls", "cinc", "cinv", "neg", "negs", "div", "smax", "smaxv", "smin", "sminv", "umull", + "umlal", "umlal2", "smla", "smlal", "smlaltt", "smul", "smsub", "madd", "mneg", "msub", + "smaddl", "smnegl", "smsubl", "smulh", "smull", "umaddl", "umnegl", "umsubl", "umulh", "umull", + "sdiv", "udiv", "mia", "qadd", "qsub", "qdadd", "qdsub", "qasx", "sadd", "saddw", "saddw2", + "sasx", "shadd", "shasx", "smlsd", "smmla", "smuad", "smusd", "ssub", "sat", "sax", "uadd", + "uaddw", "uaddw2", "usat", "usax", "uasx", "uhadd", "uhasx", "umlsd", "ummla", "uqadd", + "uqsax", "uqsub", "uhsax", "vaba", "vabd", "max", "min", "vmla", "vmls", "vnmul", "vnmla", + "vnmls", "vfms", "vfms", "vfma", "vfms", "vfnma", "vfnms", "vrecpe", "vsqrt", "vqrsh", "umull", + "umaal", "umlal", "usada8", "vneg", "cneg", "csinc", "csinv", "csneg", +]; + +pub const ARM_GRP_FLOAT_ARITH: [&str; 26] = [ + // floating point arithmetic instructions + "fabs", "fabd", "fadd", "fsub", "fdiv", "fmul", "fnmul", "fsqrt", "fmac", "fnmac", "fmsc", + "fnmsc", "fneg", "fmadd", "fmsub", "fnmadd", "fnmsub", "fpint", "fcsel", "fmax", "fmin", + "fmla", "fmls", "frintm", "frintp", "frint", +]; + +pub const ARM_GRP_SHIFT: [&str; 17] = [ + // shift operations + "asr", "lsl", "lsr", "ror", "rrx", "pkhbt", "pkhtb", "shl", "ushl", "ushll", "ushll2", "ushr", + "usra", "sshl", "sshll", "sshll2", "sshr", +]; + +pub const ARM_GRP_CMP: [&str; 14] = [ + // compare instructions + "cmeq", "cmgt", "cmhi", "cmhs", "cmp", "ccmn", "ccmp", "vceq", "vcge", "vcgt", "vcle", "vclt", + // from bit + "tst", "teq", +]; + +pub const ARM_GRP_FLOAT_CMP: [&str; 8] = [ + "vcmp", "vcmpe", "fcmpe", "fcmgt", "fcm", "fcmp", "fccmp", "vcm", +]; + +// Logical Instructions: +pub const ARM_GRP_LOGIC: [&str; 5] = ["and", "orr", "eor", "eon", "orn"]; + +// bit and byte instructions: +pub const ARM_GRP_BIT: [&str; 15] = [ + "tst", "teq", "bsl", "bif", "bit", "bfc", "bfi", "bic", "clz", "rbit", "rev", "rev16", "rev32", + "rev64", "cset", +]; + +// control transfer instructions: +pub const ARM_GRP_CTRANSFER: [&str; 29] = [ + "b", "br", "bl", "blr", "bx", "blx", "bxj", "bal", "blal", "bxal", "blxal", "bxjal", "swi", + "bkpt", "ret", "yield", "wfe", "wfi", "sev", "sevl", "cps", "brk", "hlt", "svc", "hvc", "smc", + "trap", "eret", // arm pop is return + "pop", +]; + +pub const ARM_GRP_COND_CTRANSFER: [&str; 74] = [ + "beq", "bne", "bcs", "bcc", "bmi", "bpl", "bvs", "bvc", "bhi", "bls", "bge", "blt", "bgt", + "ble", "bleq", "blne", "blcs", "blcc", "blmi", "blpl", "blvs", "blvc", "blhi", "blls", "blge", + "bllt", "blgt", "blle", "bxeq", "bxne", "bxcs", "bxcc", "bxmi", "bxpl", "bxvs", "bxvc", "bxhi", + "bxls", "bxge", "bxlt", "bxgt", "bxle", "blxeq", "blxne", "blxcs", "blxcc", "blxmi", "blxpl", + "blxvs", "blxvc", "blxhi", "blxls", "blxge", "blxlt", "blxgt", "blxle", "bxjeq", "bxjne", + "bxjcs", "bxjcc", "bxjmi", "bxjpl", "bxjvs", "bxjvc", "bxjhi", "bxjls", "bxjge", "bxjlt", + "bxjgt", "bxjle", "tbz", "tbnz", // combined instructions + "cbz", "cbnz", +]; + +// ==================== MIPS 32 ============================================= +// data transfer +// refernce : https://www.cs.cornell.edu/courses/cs3410/2008fa/MIPS_Vol2.pdf +pub const MIPS_GRP_DTRANSFER: [&str; 75] = [ + "lb", "lbu", "lh", "lhu", "ll", "lw", "lwu", "ld", "ldl", "ldr", "lwl", "lwr", "pref", "sb", + "sc", "sd", "sdl", "sdr", "sh", "st", "sw", "swl", "swr", "sync", "lui", "ldxc1", "lwxc1", + "sdxc1", "swxc1", "mfhi", "mflo", "mov", "movf", "movn", "movt", "movz", "mthi", "mtlo", + "move", "cvt", "ldc", "lwc", "sdc", "swc", // move + "cfc", "ctc", "mfc", "mtc", "pref", "sync", "splat", "cfcmsa", "ctcmsa", "copy", "push", "seh", + "seb", "wsbh", "dsbh", "dshd", "mtc0", "mfc0", "ldc3", "lwc3", "sdc3", "swc3", + // coprocessor load, store + "cop2", "ldc2", "lwc2", "sdc2", "swc2", // cop move + "cfc2", "ctc2", "mfc2", "mtc2", +]; + +pub const MIPS_GRP_FLOAT_DTRANSFER: [&str; 34] = [ + // floating point + "frint", "fclass", // load, store, memory + "ldc1", "lwc1", "sdc1", "swc1", // move + "cfc1", "ctc1", "mfc1", "fmov", "movf", "movn", "movt", "movz", "mtc1", // convert + "fex", "ffint", "ffq", "ftint", "ftrun", "ftq", "fcvt", "floor", "round", "trunc", "ffloor", + "fround", "ftrunc", "dmfc", "dmfc1", "dmtc", "dmtc1", "mthc1", "mfhc1", +]; + +// binary arithmetic instructions: +pub const MIPS_GRP_ARITH: [&str; 57] = [ + // general purpose instructions + "add", "addi", "addu", "addiu", "sub", "subu", "mul", "mult", "multu", "clo", "clz", "div", + "divu", "madd", "maddu", "msub", "msubu", "aadd", "asub", "abs", "neg", "negu", + // additional + "daa", "dsub", "dsubu", "dsubiu", "ddiv", "ddivu", "ddiviu", "dmul", "dmult", "dmultu", "dotp", + "dpadd", "dpsub", "madd", "max", "min", "msub", "mod", "sat", "hsub", "sqrt", "aui", "daui", + "dahi", "dati", "addiupc", "auipc", "aluipc", "dadd", "daddu", "daddiu", "dclz", + // from bit + "bmz", "bmn", "bneg", +]; + +pub const MIPS_GRP_CMP: [&str; 45] = [ + "slt", "slti", "sltiu", "sltu", // compare instructions + "cmp", "ceq", "cle", "clt", "cf", "cun", "ceq", "cueq", "colt", "cult", "cole", "cule", "csf", + "cngle", "cseq", "cngl", "clt", "cnge", "cle", "cngt", "cmp", "ceq", "cle", "clt", "cf", "cun", + "ceq", "cueq", "colt", "cult", "cole", "cule", "csf", "cngle", "cseq", "cngl", "clt", "cnge", + "cle", "cngt", "c", +]; + +pub const MIPS_GRP_FLOAT_CMP: [&str; 3] = [ + // floating point compare instructions + "facf", "fc", "fs", +]; + +pub const MIPS_GRP_SHIFT: [&str; 25] = [ + // shift operation + "sll", "sllv", "srl", "srlv", "sra", "srav", "shl", "shr", "sld", "dsll", "dsll32", "dsllv", + "dsra", "dsra32", "dsrav", "dsrl", "dsrl32", "dsrlv", "rotr", "rotrv", "drotr", "drotr32", + "drotrv", "lsa", "dlsa", +]; + +pub const MIPS_GRP_FLOAT_ARITH: [&str; 19] = [ + // floating point + "fabs", "fadd", "fdiv", "fmadd", "fmsub", "fmul", "fneg", "fnmadd", "fnmsub", "fexp", "flog", + "fmax", "fmin", "frcp", "recip", "frecip", "frsqrt", "fsqrt", "fsub", +]; + +// Logical Instructions: +pub const MIPS_GRP_LOGIC: [&str; 8] = ["and", "andi", "nor", "or", "not", "ori", "xor", "xori"]; + +// bit and byte instructions: +pub const MIPS_GRP_BIT: [&str; 14] = [ + "bins", "dins", "dext", "ext", "ins", "bmz", "bmn", "bneg", "bsel", "bset", "bclr", + // bit wise count + "nloc", "nlzc", "pcnt", +]; + +pub const MIPS_GRP_MISC: [&str; 7] = ["nop", "ssnop", "cache", "tlbp", "tlbr", "tlbwi", "tlbwr"]; + +// control transfer instructions: +pub const MIPS_GRP_CTRANSFER: [&str; 32] = [ + "b", "bal", "j", "jal", "jr", "jalr", "break", "syscall", "pause", "wait", "hlt", "eret", + "deret", "sdbbp", "bkpt", "ret", "mfc0", "mtc0", // mips pop is return + "pop", // float + "bc1", "bc1f", "bc1t", "bc1fl", "bc1tl", // cop + "bc2f", "bc2t", "bc2fl", "bc2tl", "bc3f", "bc3t", "bc3fl", "bc3tl", +]; + +pub const MIPS_GRP_COND_CTRANSFER: [&str; 33] = [ + "beq", "beqz", "bne", "bge", "bgez", "bgezal", "bgtz", "blez", "bltz", "bltzal", "bnel", + "bnez", "bnz", "teq", "teqi", "tge", "tgei", "tgeiu", "tgeu", "tlt", "tlti", "tltiu", "tltu", + "tne", "tnei", "beql", "bgezall", "bgezl", "bgtzl", "blezl", "bltzall", "bltzl", "bnel", +]; + +// ================= POWERPC 32 ============================================= +// data transfer +// reference : https://www.ibm.com/docs/en/aix/7.3?topic=reference-appendix-f-powerpc-instructions +// reference : https://files.openpower.foundation/s/dAYSdGzTfW4j2r2 +pub const PPC_UNKOWN: [&str; 8] = [ + // condition register _ + "creqv", "crmove", "crnot", "crset", "mcrf", // move condition register field + "eqv", "isync", // instruction synccccchronize + // unknown 64-bit + "vsel", +]; + +pub const PPC_GRP_DTRANSFER: [&str; 178] = [ + // load + "la", + // load byte + "lbz", + "lbzcix", + "lbzu", + "lbzux", + "lbzx", + // load double + "ld", + "ldarx", + "ldbrx", + "ldcix", + "ldu", + "ldux", + "ldx", + // load half + "lha", + "lhau", + "lhaux", + "lhax", + "lhbrx", + "lhz", + "lhzcix", + "lhzu", + "lhzux", + "lhzx", + // load immediate + "li", + "lis", + "lmw", + "lswi", + // load vector + "lvebx", + "lvehx", + "lvewx", + "lvsl", + "lvsr", + "lvx", + "lvxl", + // load word + "lwa", + "lwarx", + "lwaux", + "lwax", + "lwbrx", + "lwsync", + "lwz", + "lwzcix", + "lwzu", + "lwzux", + "lwzx", + // load vsx + "lxsdx", + "lxvd2x", + "lxvdsx", + "lxvw4x", + // store byte + "stb", + "stbcix", + "stbu", + "stbux", + "stbx", + // store double + "std", + "stdbrx", + "stdcix", + "stdcx", + "stdu", + "stdux", + "stdx", + // store half word + "sth", + "sthbrx", + "sthcix", + "sthu", + "sthux", + "sthx", + "stmw", + "stswi", + // store vector + "stvebx", + "stvehx", + "stvewx", + "stvx", + "stvxl", + // store word + "stw", + "stwbrx", + "stwcix", + "stwcx", + "stwu", + "stwux", + "stwx", + // store vsx + "stxsdx", + "stxvd2x", + "stxvw4x", + "mr", + // move from + "mfamr", + "mfasr", + "mfbr0", + "mfbr1", + "mfbr2", + "mfbr3", + "mfbr4", + "mfbr5", + "mfbr6", + "mfbr7", + "mfcfar", + "mfcr", + "mfctr", + "mfdar", + "mfdbatl", + "mfdbatu", + "mfdccr", + "mfdcr", + "mfdear", + "mfdscr", + "mfdsisr", + "mfesr", + "mffs", + "mfibatl", + "mfibatu", + "mficcr", + "mflr", + "mfmsr", + "mfocrf", + "mfpid", + "mfpvr", + "mfrtcl", + "mfrtcu", + "mfspefscr", + "mfspr", + "mfsr", + "mfsrin", + "mfsrr2", + "mfsrr3", + "mftb", + "mftbhi", + "mftblo", + "mftbu", + "mftcr", + "mfvscr", + "mfxer", + // move to + "mtamr", + "mtbr0", + "mtbr1", + "mtbr2", + "mtbr3", + "mtbr4", + "mtbr5", + "mtbr6", + "mtbr7", + "mtcfar", + "mtcr", + "mtcrf", + "mtctr", + "mtdar", + "mtdbatl", + "mtdbatu", + "mtdccr", + "mtdcr", + "mtdear", + "mtdscr", + "mtdsisr", + "mtesr", + "mtfsb0", + "mtfsb1", + "mtfsf", + "mtfsfi", + "mtibatl", + "mtibatu", + "mticcr", + "mtlr", + "mtmsr", + "mtmsrd", + "mtocrf", + "mtpid", + "mtspefscr", + "mtspr", + "mtsr", + "mtsrin", + "mtsrr2", + "mtsrr3", + "mttbhi", + "mttbl", + "mttblo", + "mttbu", + "mttcr", + "mtvscr", + "mtxer", +]; + +pub const PPC_GRP_FLOAT_DTRANSFER: [&str; 25] = [ + "fmr", "lfd", "lfdu", "lfdux", "lfdx", "lfiwax", "lfiwzx", "lfs", "lfsu", "lfsux", "lfsx", + "stfd", "stfdu", "stfdux", "stfdx", "stfiwx", "stfs", "stfsu", "stfsux", "stfsx", + // vector merge + "vmrghb", "vmrglb", // vector pack + "vpkuhum", "vpkuwum", // vector permute + "vperm", +]; + +// binary arithmetic instructions: +pub const PPC_GRP_ARITH: [&str; 28] = [ + "neg", "add", "addc", "adde", "addi", "addic", "addis", "addme", "addze", "divd", "divdu", + "divw", "divwu", "mulhd", "mulhdu", "mulhw", "mulhwu", "mulld", "mulli", "mullw", "sub", + "subc", "subf", "subfc", "subfe", "subfic", "subfme", "subfze", +]; + +// floating point arithmetic instructions +pub const PPC_GRP_FLOAT_ARITH: [&str; 24] = [ + "fabs", "fmadd", "fmsub", "fneg", "fadd", "fadds", "fdiv", "fdivs", "fmul", "fmuls", "fsub", + "fsubs", "frsp", // floating round to single-precision + "fsqrt", // floating convert + "fcfid", "fctidz", "fctiwz", // vector add + "vaddubm", "vadduhm", "vadduwm", // vector multiply + "vmsumuhm", "vmulouh", // vector subtract + "vsubuhs", "vsubuwm", +]; + +pub const PPC_GRP_CMP: [&str; 9] = [ + "cmpb", "cmpd", "cmpdi", "cmpld", "cmpldi", "cmplw", "cmplwi", "cmpw", "cmpwi", +]; + +// floating point compare instructions +pub const PPC_GRP_FLOAT_CMP: [&str; 8] = [ + "fcmpo", "fcmpu", // vector compare + "vcmpequb", "vcmpequw", "vcmpgtsh", "vcmpgtsw", "vcmpgtub", "vcmpgtuw", +]; + +// shift operation +pub const PPC_GRP_SHIFT: [&str; 43] = [ + "rotlw", "rotlwi", "rotld", "rotldi", "slbia", "slbie", "slbmfee", "slbmte", "sld", "sldi", + "slw", "slwi", "srad", "sradi", "sraw", "srawi", "srd", "srw", "srwi", "rldcl", "rldcr", + "rldic", "rldicl", "rldicr", "rldimi", "rlwimi", "rlwinm", "rlwnm", + // vector shift left + "vslb", "vsldoi", "vslh", "vslw", // vector splat + "vspltb", "vsplth", "vspltisb", "vspltish", "vspltisw", "vspltw", + // vector shift right + "vsraw", "vsrb", "vsrh", "vsrw", // vector rotate + "vrlw", +]; + +// Logical Instructions: +pub const PPC_GRP_LOGIC: [&str; 49] = [ + "not", "and", "andc", "andi", "andis", "crand", "crandc", "crnand", "crnor", "cror", "crorc", + "crxor", "evand", "evandc", "evnand", "evnor", "evor", "evorc", "evxor", "nand", "nor", "or", + "orc", "ori", "oris", "qvfand", "qvfandc", "qvfnand", "qvfnor", "qvfor", "qvforc", "qvfxor", + "vand", "vandc", "vnand", "vnor", "vor", "vorc", "vxor", "xor", "xori", "xoris", "xxland", + "xxlandc", "xxlnand", "xxlnor", "xxlor", "xxlorc", "xxlxor", +]; + +// bit and byte instructions: +pub const PPC_GRP_BIT: [&str; 8] = [ + // condition register clear + "crclr", // count leading zeros + "cntlzd", "cntlzw", // clear high-order bits register + "clrldi", "clrlwi", // extend sign + "extsb", "extsw", "extsh", +]; + +pub const PPC_GRP_MISC: [&str; 1] = ["nop"]; + +// control transfer instructions: +pub const PPC_GRP_CTRANSFER: [&str; 26] = [ + "b", "bl", "bctrl", "bctr", "bla", "blr", "blrl", "ba", "sc", // trap + "trap", "tw", "tweq", "tweqi", "twgt", "twgti", "twi", "twlgt", "twlgti", "twllt", "twllti", + "twlt", "twlti", "twne", "twnei", "twu", "twui", +]; + +pub const PPC_GRP_COND_CTRANSFER: [&str; 61] = [ + "bc", "bca", "bcctr", "bcctrl", "bcl", "bcla", "bclr", "bclrl", "bct", "bdnz", "bdnza", + "bdnzf", "bdnzfa", "bdnzfl", "bdnzfla", "bdnzflrl", "bdnzl", "bdnzla", "bdnzlr", "bdnzlrl", + "bdnzt", "bdnzta", "bdnztl", "bdnztla", "bdnztlr", "bdnztlrl", "bdz", "bdza", "bdzf", "bdzfa", + "bdzfl", "bdzfla", "bdzflr", "bdzflrl", "bdzl", "bdzla", "bdzlr", "bdzlrl", "bdzt", "bdzta", + "bdztl", "bdztla", "bdztlr", "bdztlrl", "bf", "bfa", "bfctr", "bfctrl", "bfl", "bfla", "bflr", + "bflrl", "brinc", "bt", "bta", "btctr", "btctrl", "btl", "btla", "btlr", "btlrl", +]; diff --git a/src/dedup.rs b/src/dedup.rs index 78e5202..2ffbc1b 100644 --- a/src/dedup.rs +++ b/src/dedup.rs @@ -1,4 +1,4 @@ -use crate::networkx::{CallGraphFuncWithMetadata, NetworkxDiGraph}; +use crate::networkx::{CallGraphNodeFeatureType, CallGraphTypes}; use anyhow::Result; use indicatif::ParallelProgressIterator; use itertools::Itertools; @@ -14,7 +14,6 @@ use std::hash::{Hash, Hasher}; use std::path::Path; use std::string::String; - use std::{fs, vec}; use walkdir::{DirEntry, WalkDir}; @@ -90,11 +89,12 @@ pub struct EsilFuncStringCorpus { pub binary_name_index: Vec, pub uniq_binaries: Vec, pub arch_index: Vec, + pub output_path: String, } /// A collection of processed Esil Function String files impl EsilFuncStringCorpus { - pub fn new(directory: &String) -> Result { + pub fn new(directory: &String, output_path: &String) -> Result { let mut filepaths = Vec::new(); let mut binary_name_index = Vec::new(); let mut uniq_binaries = Vec::new(); @@ -122,12 +122,20 @@ impl EsilFuncStringCorpus { } } } + + let output_path: String = if !output_path.ends_with('/') { + format!("{}{}", output_path, "/") + } else { + output_path.to_string() + }; + Ok(EsilFuncStringCorpus { loaded_data: None, filepaths, binary_name_index, uniq_binaries, arch_index, + output_path, }) } @@ -270,7 +278,7 @@ impl EsilFuncStringCorpus { if !just_stats { let uniques_to_drop = json!(unique_func_hash_tuples); - let fname_string = format!("{}-dedup.json", &target_binary_name); + let fname_string = format!("{}{}-dedup.json", self.output_path, &target_binary_name); serde_json::to_writer( &File::create(fname_string).expect("Failed to create writer"), &uniques_to_drop, @@ -280,14 +288,22 @@ impl EsilFuncStringCorpus { } } +/// Struct and Impl for de-duplicating Call Graph Corpus's #[derive(Debug)] -pub struct OneHopCGCorpus { +pub struct CGCorpus { pub filepaths: Vec, pub output_path: String, + pub filepath_format: String, + pub node_type: CallGraphNodeFeatureType, } -impl OneHopCGCorpus { - pub fn new(directory: &String, output_path: &String) -> Result { +impl CGCorpus { + pub fn new( + directory: &String, + output_path: &String, + filepath_format: &String, + node_type: CallGraphNodeFeatureType, + ) -> Result { if !Path::new(output_path).exists() { fs::create_dir(output_path).expect("Failed to create output directory!"); info!("Output path not found - Creating {}", output_path) @@ -307,9 +323,17 @@ impl OneHopCGCorpus { info!("Returning One Hop CG Corpus Struct"); - Ok(OneHopCGCorpus { + let output_path = if output_path.ends_with('/') { + output_path.to_owned() + } else { + output_path.to_owned() + &*"/".to_string() + }; + + Ok(CGCorpus { filepaths, output_path: output_path.to_string(), + filepath_format: filepath_format.to_string(), + node_type, }) } @@ -319,14 +343,8 @@ impl OneHopCGCorpus { s.finish() } - // This is very slow O(N)^2 - fn dedup_corpus( - data: &mut Vec>>, - mut filepaths: Vec, - ) -> ( - Vec>>, - Vec, - ) { + //fn dedup_corpus(data: &mut Vec>, filepaths: &mut Vec) { + fn dedup_corpus(data: &mut Vec>, filepaths: &mut Vec) { debug!("Creating the removal index"); let mut seen = HashSet::new(); @@ -345,75 +363,108 @@ impl OneHopCGCorpus { data.remove(*ele); filepaths.remove(*ele); } - (data.to_vec(), filepaths) } - pub fn process_corpus(self) { + fn get_binary_name_cisco(filepath: &String) -> String { + // Example: x86-gcc-9-O3_nping_cg-onehopcgcallers-meta + let binary_intermediate = Path::new(filepath).parent().unwrap().file_name().unwrap(); + binary_intermediate + .to_string_lossy() + .split('_') + .nth(1) + .unwrap() + .to_string() + } + fn get_binary_name_binkit(filepath: &String) -> String { + // Example: tar-1.34_gcc-8.2.0_x86_32_O3_rmt_cg-onehopcgcallers-meta + let binary_intermediate = Path::new(filepath).parent().unwrap().file_name().unwrap(); + binary_intermediate + .to_string_lossy() + .split('_') + .rev() + .nth(1) + .unwrap() + .to_string() + } + + fn extract_binary_from_fps(&self) -> Vec { let mut fp_binaries = Vec::new(); // Process the file paths to get the associated binary of each path info!("Processing Filepaths to get binaries"); for file in &self.filepaths { - let binary_intermediate = Path::new(file).parent().unwrap().file_name().unwrap(); - let binary = binary_intermediate - .to_string_lossy() - .split('_') - .nth(1) - .unwrap() - .to_string(); - + let binary = match self.filepath_format.as_str() { + "cisco" => Self::get_binary_name_cisco(file), + "binkit" => Self::get_binary_name_binkit(file), + "trex" => Self::get_binary_name_binkit(file), + _ => unreachable!(), + }; + trace!("Extracted Binary Name: {:?} from {:?}", binary, file); fp_binaries.push(binary) } + fp_binaries + } + fn get_unique_binary_fps(&self, fp_binaries: Vec) -> Vec> { // Generate binary specific filepath vectors - let unqiue_binaries: Vec<_> = fp_binaries.iter().unique().collect(); - let mut unique_binaries_fps: Vec> = vec![Vec::new(); unqiue_binaries.len()]; + let unique_binaries: Vec<_> = fp_binaries.iter().unique().collect(); + let mut unique_binaries_fps: Vec> = vec![Vec::new(); unique_binaries.len()]; for (file, binary) in self.filepaths.iter().zip(fp_binaries.iter()) { - unique_binaries_fps - [unqiue_binaries.iter().position(|&x| x == binary).unwrap()] + unique_binaries_fps[unique_binaries.iter().position(|&x| x == binary).unwrap()] .push(file.clone()); } - info!("Loading the filepaths"); unique_binaries_fps - .par_iter() - .progress() - .enumerate() - .for_each(|(idx, fp_subset)| { - let mut subset_loaded_data = Vec::new(); + } - for ele in fp_subset.iter() { - let data = - read_to_string(ele).expect(&format!("Unable to read file - {:?}", ele)); + fn load_subset(&self, fp_subset: &[String]) -> Vec> { + let mut subset_loaded_data = Vec::new(); + for ele in fp_subset.iter() { + let data = read_to_string(ele).expect(&format!("Unable to read file - {:?}", ele)); - let json: NetworkxDiGraph = - serde_json::from_str(&data) - .expect(&format!("Unable to load function data from {}", ele)); + let json = serde_json::from_str::(&data) + .expect(&format!("Unable to load function data from {}", ele)); - if !json.nodes.is_empty() { - subset_loaded_data.push(Some(json)) - } else { - subset_loaded_data.push(None) - } - } + let nodes_empty = match self.node_type { + CallGraphNodeFeatureType::CGName => json.as_cg_name().unwrap().nodes.is_empty(), + CallGraphNodeFeatureType::CGMeta => json.as_cg_meta().unwrap().nodes.is_empty(), + CallGraphNodeFeatureType::TikNib => json.as_tik_nib().unwrap().nodes.is_empty(), + }; + + if !nodes_empty { + subset_loaded_data.push(Some(json)) + } else { + subset_loaded_data.push(None) + } + } + println!("{:?}", subset_loaded_data); + subset_loaded_data + } + + pub fn process_corpus(self) { + let fp_binaries = self.extract_binary_from_fps(); - subset_loaded_data.retain(|c| c.is_some()); + // Generate binary specific filepath vectors + let mut unique_binaries_fps = self.get_unique_binary_fps(fp_binaries); - info!("Starting to deduplicate the corpus - {}", idx); - let (subset_loaded_data, fp_subset) = - Self::dedup_corpus(&mut subset_loaded_data, fp_subset.to_vec()); - let subset_loaded_data: Vec> = + info!("Loading the filepaths"); + unique_binaries_fps + .par_iter_mut() + .progress() + .enumerate() + .for_each(|(idx, fp_subset)| { + let mut subset_loaded_data: Vec> = + self.load_subset(fp_subset); + debug!("Starting to deduplicate the corpus - {}", idx); + Self::dedup_corpus(&mut subset_loaded_data, fp_subset); + let subset_loaded_data: Vec = subset_loaded_data.into_iter().flatten().collect(); - info!("Starting to save - {}", idx); + debug!("Starting to save - {}", idx); self.save_corpus(subset_loaded_data, fp_subset); - info!("File processing complete - {}", idx); + debug!("File processing complete - {}", idx); }); } - pub fn save_corpus( - &self, - subset_loaded_data: Vec>, - fp_subset: Vec, - ) { + pub fn save_corpus(&self, subset_loaded_data: Vec, fp_subset: &mut [String]) { subset_loaded_data .iter() .zip(fp_subset.iter()) @@ -423,17 +474,18 @@ impl OneHopCGCorpus { .rev() .take(2) .collect::>(); - + trace!("Fixed Path (First Pass): {:?}", fixed_path); let fixed_path = fixed_path .iter() .map(|c| c.as_os_str().to_string_lossy().to_string()) .rev() .collect::>(); - + trace!("Fixed Path (Second Pass): {:?}", fixed_path); let dirs = format!("{}{}", self.output_path, fixed_path[0]); fs::create_dir_all(&dirs).expect("Failed to create output directory!"); let fixed_path = format!("{}/{}", dirs, fixed_path[1]); + trace!("Fixed Path (Final Pass): {:?}", fixed_path); serde_json::to_writer( &File::create(fixed_path).expect("Failed to create writer"), &data_ele, @@ -442,3 +494,288 @@ impl OneHopCGCorpus { }); } } + +mod tests { + + // Test Dedup on typed CG's + #[test] + fn test_cg_corpus_gen() { + // CG Corpus Generation + let corpus = CGCorpus::new( + &"test-files/cg_dedup/to_dedup".to_string(), + &"test-files/cg_dedup/deduped".to_string(), + &"cisco".to_string(), + CallGraphNodeFeatureType::CGName, + ); + assert_eq!(corpus.as_ref().unwrap().filepaths.len(), 12); + assert_eq!( + corpus.as_ref().unwrap().output_path, + "test-files/cg_dedup/deduped/".to_string() + ); + assert_eq!( + corpus.as_ref().unwrap().filepath_format, + "cisco".to_string() + ); + + let corpus = CGCorpus::new( + &"test-files/cg_dedup/to_dedup".to_string(), + &"test-files/cg_dedup/deduped/".to_string(), + &"cisco".to_string(), + CallGraphNodeFeatureType::CGName, + ); + assert_eq!(corpus.as_ref().unwrap().filepaths.len(), 12); + assert_eq!( + corpus.as_ref().unwrap().output_path, + "test-files/cg_dedup/deduped/".to_string() + ); + assert_eq!( + corpus.as_ref().unwrap().filepath_format, + "cisco".to_string() + ); + } + + #[test] + fn test_extract_binary_from_fps() { + let corpus = CGCorpus::new( + &"test-files/cg_dedup/to_dedup".to_string(), + &"test-files/cg_dedup/deduped".to_string(), + &"cisco".to_string(), + CallGraphNodeFeatureType::CGMeta, + ); + + let fp_binaries = corpus.unwrap().extract_binary_from_fps(); + assert_eq!(fp_binaries.len(), 12); + assert_eq!( + fp_binaries, + vec![ + "testbin".to_string(), + "testbin".to_string(), + "testbin".to_string(), + "testbin".to_string(), + "testbin".to_string(), + "testbin".to_string(), + "testbin".to_string(), + "testbin".to_string(), + "testbin2".to_string(), + "testbin2".to_string(), + "testbin2".to_string(), + "testbin2".to_string(), + ] + ) + } + + #[test] + fn test_get_unique_binary_fps() { + let corpus = CGCorpus::new( + &"test-files/cg_dedup/to_dedup".to_string(), + &"test-files/cg_dedup/deduped".to_string(), + &"cisco".to_string(), + CallGraphNodeFeatureType::CGMeta, + ) + .unwrap(); + let fp_binaries = corpus.extract_binary_from_fps(); + let unique_binary_fps = corpus.get_unique_binary_fps(fp_binaries); + + assert_eq!(unique_binary_fps.len(), 2); + assert_eq!(unique_binary_fps[0].len(), 8); + assert_eq!(unique_binary_fps[1].len(), 4); + } + + #[test] + fn test_processing_unique_binary_collection() { + let corpus = CGCorpus::new( + &"test-files/cg_dedup/to_dedup".to_string(), + &"test-files/cg_dedup/deduped".to_string(), + &"cisco".to_string(), + CallGraphNodeFeatureType::CGMeta, + ) + .unwrap(); + + let fp_binaries = corpus.extract_binary_from_fps(); + let unique_binary_fps = corpus.get_unique_binary_fps(fp_binaries); + + // Load the first collection which has dups + let mut subset_loaded = corpus.load_subset(&unique_binary_fps[0]); + assert_eq!(subset_loaded.len(), 8); + subset_loaded.retain(|c| c.is_some()); + assert_eq!(subset_loaded.len(), 8); + } + + #[test] + fn test_dedup_binary_subset() { + let corpus = CGCorpus::new( + &"test-files/cg_dedup/to_dedup".to_string(), + &"test-files/cg_dedup/deduped".to_string(), + &"cisco".to_string(), + CallGraphNodeFeatureType::CGMeta, + ) + .unwrap(); + let fp_binaries = corpus.extract_binary_from_fps(); + let mut unique_binary_fps = corpus.get_unique_binary_fps(fp_binaries); + + // Load the first collection which has dups + let mut subset_loaded = corpus.load_subset(&unique_binary_fps[0]); + subset_loaded.retain(|c| c.is_some()); + + // Prior to dedup + assert_eq!(subset_loaded.len(), 8); + CGCorpus::dedup_corpus(&mut subset_loaded, &mut unique_binary_fps[0]); + + // Subset + assert_eq!(subset_loaded.len(), 4); + + // Filepaths + assert_eq!(unique_binary_fps[0].len(), 4); + + // Check first node - should be function name + for (loaded_ele, filepath) in subset_loaded.iter().zip(unique_binary_fps[0].iter()) { + let inner = &loaded_ele.clone().unwrap(); + let loaded_func_name = &inner.as_cg_meta().unwrap().nodes[0].func_name; + let filepath_func_name: Vec<_> = Path::new(filepath) + .components() + .rev() + .take(1) + .collect::>(); + + let filepath_func_name = filepath_func_name[0] + .as_os_str() + .to_string_lossy() + .to_string(); + + let filepath_func_name = filepath_func_name.split("-").next().unwrap(); + + assert_eq!(loaded_func_name.to_owned(), filepath_func_name) + } + let subset_loaded: Vec = subset_loaded.into_iter().flatten().collect(); + + // Save corpus! + corpus.save_corpus(subset_loaded, &mut unique_binary_fps[0]); + + // Check the files saved! + for file in WalkDir::new(&corpus.output_path) + .into_iter() + .filter_map(|file| file.ok()) + { + if file.path().to_string_lossy().ends_with(".json") { + let data = read_to_string(file.path()) + .expect(&format!("Unable to read file - {:?}", file)); + let json: NetworkxDiGraph = + serde_json::from_str::>(&data) + .expect(&format!("Unable to load function data from {:?}", file)); + + let filepath_func_name: Vec<_> = Path::new(file.file_name()) + .components() + .rev() + .take(1) + .collect::>(); + + let filepath_func_name = filepath_func_name[0] + .as_os_str() + .to_string_lossy() + .to_string(); + + let filepath_func_name = filepath_func_name.split("-").next().unwrap(); + + assert_eq!(json.nodes[0].func_name, filepath_func_name) + } + } + + // clean up + fs::remove_dir_all(&corpus.output_path).expect("Unable to remove directory!"); + } + + // Test binary name extraction + #[test] + fn test_binkit_binary_extraction() { + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"which-2.21_gcc-9.4.0_arm_32_O2_which_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json +".to_string() + ), + "which" + ); + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"recutils-1.9_gcc-11.2.0_mips_64_O3_recins_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json +".to_string() + ), + "recins" + ); + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"recutils-1.9_gcc-11.2.0_mips_64_O3_recsel_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json +".to_string(), + ), + "recsel", + ); + } + + #[test] + fn test_cisco_binary_extraction() { + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"arm64-clang-9-Os_curl_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json".to_string() + ), + "curl" + ); + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"x86-clang-9-Os_libcrypto.so.3_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json +".to_string() + ), + "libcrypto.so.3" + ); + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"x86-gcc-9-O3_unrar_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json +".to_string(), + ), + "unrar", + ); + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"/random/path/before/x86-gcc-9-O3_unrar_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json +".to_string(), + ), + "unrar", + ); + } + + #[test] + fn test_trex_binary_extraction() { + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"arm-32_binutils-2.34-O0_elfedit_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json".to_string() + ), + "elfedit" + ); + + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"arm-32_binutils-2.34-O0_objdump_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json".to_string() + ), + "objdump" + ); + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"arm-32_binutils-2.34-O0_nm-new_cg-onehopcgcallers-meta/sym.dummy-func-onehopcgcallers-meta.json".to_string() + ), + "nm-new" + ); + // __ for c++ bins that sometimes crop up + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit( + &"arm-32_binutils-2.34-O0_nm-new_cg-onehopcgcallers-meta/sym.dummy___func__-onehopcgcallers-meta.json".to_string() + ), + "nm-new" + ); + + assert_eq!( + crate::dedup::CGCorpus::get_binary_name_binkit(&"fast-disk/Dataset-2/cgs/x86-32_coreutils-8.32-O1_stat_cg-onehopcgcallers-meta/main-onehopcgcallers-meta.json".to_string()), + "stat" + ); + + assert_eq!(crate::dedup::CGCorpus::get_binary_name_binkit(&"/fast-disk/processed_datasets/Dataset-2/arm-32_binutils-2.34-O0_addr2line_cg-onehopcgcallers-meta/sym.adjust_relative_path-onehopcgcallers-meta.json".to_string()), + "addr2line") + } +} diff --git a/src/extract.rs b/src/extract.rs index 94a653f..8d01bd3 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -63,37 +63,37 @@ impl std::fmt::Display for ExtractionJob { #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct AFLJFuncDetails { - pub offset: i64, + pub offset: u64, pub name: String, - pub size: i64, + pub size: u64, #[serde(rename = "is-pure")] pub is_pure: String, - pub realsz: i64, + pub realsz: u64, pub noreturn: bool, - pub stackframe: i64, + pub stackframe: u64, pub calltype: String, - pub cost: i64, - pub cc: i64, - pub bits: i64, + pub cost: u64, + pub cc: u64, + pub bits: u64, #[serde(rename = "type")] pub type_field: String, - pub nbbs: i64, + pub nbbs: u64, #[serde(rename = "is-lineal")] pub is_lineal: bool, - pub ninstrs: i64, - pub edges: i64, - pub ebbs: i64, + pub ninstrs: u64, + pub edges: u64, + pub ebbs: u64, pub signature: String, pub minbound: i64, - pub maxbound: i64, + pub maxbound: u64, #[serde(default)] pub callrefs: Vec, #[serde(default)] pub datarefs: Vec, - pub indegree: Option, - pub outdegree: Option, - pub nlocals: Option, - pub nargs: Option, + pub indegree: Option, + pub outdegree: Option, + pub nlocals: Option, + pub nargs: Option, pub bpvars: Option>, pub spvars: Option>, pub regvars: Option>, @@ -101,7 +101,7 @@ pub struct AFLJFuncDetails { #[serde(default)] pub codexrefs: Option>, #[serde(default)] - pub dataxrefs: Option>, + pub dataxrefs: Option>, } #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -113,10 +113,10 @@ pub struct DataRef { #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Callref { - pub addr: i128, + pub addr: u64, #[serde(rename = "type")] pub type_field: String, - pub at: i64, + pub at: u64, } #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -151,10 +151,10 @@ pub struct Regvar { #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Codexref { - pub addr: i64, + pub addr: u64, #[serde(rename = "type")] pub type_field: String, - pub at: i64, + pub at: u64, } // Structs related to AEAFJ @@ -437,16 +437,18 @@ impl FileToBeProcessed { fn get_function_name_list(&self, r2p: &mut R2Pipe) -> Vec { info!("Getting function information from binary"); - let json = r2p.cmd("aflj").expect("aflj command failed"); - let json_obj: Vec = - serde_json::from_str(&json).expect("Unable to convert to JSON object!"); + let json = r2p + .cmd("aflj") + .expect(&format!("aflj command failed for {}", self.file_path)); + let json_obj: Vec = serde_json::from_str(&json) + .expect(&format!("Unable to convert to JSON object! - {}", json)); json_obj } fn get_function_xref_details( &self, - function_addr: i64, + function_addr: u64, r2p: &mut R2Pipe, ) -> Vec { info!("Getting function xref details"); @@ -471,11 +473,11 @@ impl FileToBeProcessed { json_obj } - fn get_function_info(&self, function_addr: i64, r2p: &mut R2Pipe) -> Vec { + fn get_function_info(&self, function_addr: u64, r2p: &mut R2Pipe) -> Vec { Self::go_to_address(r2p, function_addr); let json = r2p.cmd("afij").expect("afij command failed"); - let json_obj: Vec = - serde_json::from_str(&json).expect("Unable to convert to JSON object!"); + let json_obj: Vec = serde_json::from_str(&json) + .expect(&format!("Unable to convert to JSON object! - {}", json)); json_obj } @@ -497,7 +499,7 @@ impl FileToBeProcessed { .unwrap_or_else(|_| panic!("the world is ending: {}", f_name)); } - fn go_to_address(r2p: &mut R2Pipe, function_addr: i64) { + fn go_to_address(r2p: &mut R2Pipe, function_addr: u64) { r2p.cmd(format!("s @ {}", function_addr).as_str()) .expect("failed to seek addr"); } diff --git a/src/files.rs b/src/files.rs index 770f2a2..08a61b2 100644 --- a/src/files.rs +++ b/src/files.rs @@ -1,12 +1,13 @@ use crate::afij::{AFIJFeatureSubset, AFIJFunctionInfo}; use crate::agcj::AGCJFunctionCallGraphs; -use crate::agfj::AGFJFunc; +use crate::agfj::{AGFJFunc, TikNibFunc}; use crate::bb::{FeatureType, InstructionMode}; use crate::consts::*; use crate::errors::FileLoadError; #[cfg(feature = "inference")] use crate::inference::InferenceJob; use crate::utils::get_save_file_path; +use enum_as_inner::EnumAsInner; use indicatif::ParallelProgressIterator; use rayon::iter::ParallelIterator; use rayon::prelude::{IntoParallelRefIterator, IntoParallelRefMutIterator}; @@ -67,7 +68,7 @@ impl AGFJFile { /// Detects the architecture of a file by iterating through the functions /// until a call instruction type is found. Once found, the opcode is then /// matched with architecture specific options. - fn detect_architecture(&self) -> Option { + pub fn detect_architecture(&self) -> Option { let mut call_op: Option = None; for func in self.functions.as_ref().unwrap() { @@ -76,7 +77,6 @@ impl AGFJFile { if op.r#type == "call" || op.r#type == "rcall" { call_op = Some(op.disasm.as_ref().unwrap().clone()) } - if call_op.is_some() { let opcode = call_op.as_ref().unwrap().split_whitespace().next().unwrap(); if X86_CALL.contains(&opcode) { @@ -324,6 +324,26 @@ impl AGFJFile { }); } + pub fn tiknib_func_level_feature_gen(self) { + let arch = self.detect_architecture(); + + let mut func_feature_vectors = Vec::new(); + + for func in self.functions.unwrap().iter() { + let feature_vec = func[0].generate_tiknib_cfg_features(arch.as_ref().unwrap()); + func_feature_vectors.push(feature_vec); + } + + let json = json!(&func_feature_vectors); + let fname_string: String = get_save_file_path(&self.filename, &self.output_path, None); + let fname_string = format!("{}-tiknib.json", fname_string); + serde_json::to_writer( + &File::create(fname_string).expect("Failed to create writer"), + &json, + ) + .expect("Unable to write JSON"); + } + /// EXPERIMENTAL /// /// Generate a CFG where each basic blocks contents is embedded using a provided @@ -347,12 +367,20 @@ impl AGFJFile { } } +#[derive(Debug, Deserialize, Serialize, EnumAsInner)] +#[serde(untagged)] +pub enum FunctionMetadataTypes { + AFIJ(Vec), + AGFJ(Vec), +} + #[derive(Serialize, Deserialize, Debug)] pub struct AGCJFile { pub filename: String, pub function_call_graphs: Option>, pub output_path: String, - pub function_metadata: Option>, + pub function_metadata: Option, + pub include_unk: bool, } impl AGCJFile { @@ -387,14 +415,14 @@ impl AFIJFile { Ok(()) } - pub fn subset(&mut self) -> Vec { + pub fn subset(&mut self) -> FunctionMetadataTypes { let mut func_info_subsets: Vec = Vec::new(); debug!("Starting to subset functions"); for function in self.function_info.as_ref().unwrap().iter() { let subset = AFIJFeatureSubset::from(function); func_info_subsets.push(subset) } - func_info_subsets + FunctionMetadataTypes::AFIJ(func_info_subsets) } pub fn subset_and_save(&mut self) { let func_info_subsets = self.subset(); @@ -407,3 +435,27 @@ impl AFIJFile { .expect("Unable to write JSON"); } } + +#[derive(Serialize, Deserialize, Debug)] +pub struct TikNibFuncMetaFile { + pub filename: String, + pub function_info: Option>, + pub output_path: String, +} + +impl TikNibFuncMetaFile { + pub fn load_and_deserialize(&mut self) -> Result<(), FileLoadError> { + let data = read_to_string(&self.filename)?; + + #[allow(clippy::expect_fun_call)] + // Kept in to ensure that the JSON decode error message is printed alongside the filename + let json: Vec = serde_json::from_str(&data)?; + + self.function_info = Some(json); + Ok(()) + } + + pub fn subset(&mut self) -> FunctionMetadataTypes { + FunctionMetadataTypes::AGFJ(self.function_info.clone().unwrap()) + } +} diff --git a/src/main.rs b/src/main.rs index dcbf2e8..35a4cc8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -35,11 +35,13 @@ pub mod processors; pub mod tokeniser; pub mod utils; -use crate::dedup::{EsilFuncStringCorpus, OneHopCGCorpus}; +use crate::dedup::{CGCorpus, EsilFuncStringCorpus}; use crate::extract::ExtractionJobType; -use crate::files::{AFIJFile, AGCJFile}; +use crate::files::{AFIJFile, AGCJFile, FunctionMetadataTypes, TikNibFuncMetaFile}; use crate::tokeniser::{train_byte_bpe_tokeniser, TokeniserType}; use crate::utils::get_save_file_path; + +use crate::networkx::CallGraphNodeFeatureType; use bb::{FeatureType, InstructionMode}; #[cfg(feature = "goblin")] use binnfo::goblin_info; @@ -131,13 +133,26 @@ enum GenerateSubCommands { #[arg(short, long, value_name = "EMBED_DIM")] embed_dim: Option, - /// Toggle for call graphs to include AFIJ feature subsets + /// Num Threads + #[arg(short, long)] + num_threads: usize, + + /// Toggle for call graphs to include AFIJ feature subsets (For call graphs) #[arg(long, default_value = "false")] with_features: bool, - /// Filepath to the AFIJ function metadata + /// Filepath to the AFIJ function metadata (For call graphs) #[arg(long)] metadata_path: Option, + + /// Include unknown functions (For call graphs) + #[arg(long, default_value = "false")] + include_unk: bool, + + /// Metadata Type (For call graphs) + #[arg(short, long, value_name = "METADATA_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["finfo", "tiknib"]) + .map(|s| s.parse::().unwrap()),)] + metadata_type: Option, }, /// Generate NLP data from extracted data Nlp { @@ -183,6 +198,10 @@ enum GenerateSubCommands { /// The path for the generated output #[arg(short, long, value_name = "OUTPUT_PATH")] output_path: String, + /// Data Source Type + #[arg(short, long, value_parser = clap::builder::PossibleValuesParser::new(["finfo", "agfj"]) + .map(|s| s.parse::().unwrap()))] + data_source_type: String, }, /// Generate tokenisers from extracted data Tokeniser { @@ -265,16 +284,44 @@ enum Commands { }, /// Utility to remove duplicate entries within processed data Dedup { + #[command(subcommand)] + subcommands: DedupSubCommands, + }, +} + +#[derive(Subcommand, Clone)] +enum DedupSubCommands { + /// De-Dup generated call graphs + Cgs { /// The filename to dedup #[arg(short, long, value_name = "FILENAME")] filename: String, - /// Type of dedup - #[arg(short, long, value_name = "TYPE", value_parser = clap::builder::PossibleValuesParser::new(["esilfstr", "onehopcgs"]) - .map(|s| s.parse::().unwrap()))] - datatype: String, + /// Output path to save dedup corpus + #[arg(short, long, value_name = "OUTPUT_PATH")] + output_path: String, + + /// Number of threads to use with Rayon + #[arg(short, long, value_name = "NUM_THREADS", default_value = "2")] + num_threads: usize, + + /// The filepath_format of the dataset + #[arg(long,value_parser = clap::builder::PossibleValuesParser::new(["cisco", "binkit", "trex"]) + .map(|s| s.parse::().unwrap()), required = true)] + filepath_format: String, + + /// The node feature type for call graphs + #[arg(long,value_parser = clap::builder::PossibleValuesParser::new(["cgmeta", "cgname", "tiknib"]) + .map(|s| s.parse::().unwrap()), required = true)] + node_feature_type: String, + }, + /// De-dup generate ESIL strings + Esil { + /// The filename to dedup + #[arg(short, long, value_name = "FILENAME")] + filename: String, - /// Output path to save dedup corpus - Only works for onehopcgs atm + /// Output path to save dedup corpus #[arg(short, long, value_name = "OUTPUT_PATH")] output_path: String, @@ -298,7 +345,7 @@ enum Commands { fn main() { let env = Env::default() - .filter_or("LOG_LEVEL", "info") + .filter_or("LOG_LEVEL", "error") .write_style_or("LOG_STYLE", "always"); env_logger::init_from_env(env); @@ -328,6 +375,9 @@ fn main() { embed_dim, with_features, metadata_path, + include_unk, + num_threads, + metadata_type, } => { let graph_data_type = match graph_type.as_str() { "cfg" => DataType::Cfg, @@ -338,6 +388,11 @@ fn main() { _ => DataType::Invalid, }; + rayon::ThreadPoolBuilder::new() + .num_threads(*num_threads) + .build_global() + .unwrap(); + if graph_data_type == DataType::Cfg && *with_features { warn!("The 'with_features' toggle is set but is not support for CFG generation. Will ignore.") }; @@ -441,6 +496,7 @@ fn main() { function_call_graphs: None, output_path: output_path.to_owned(), function_metadata: Some(metadata_subset), + include_unk: *include_unk, } } else { AGCJFile { @@ -448,6 +504,7 @@ fn main() { function_call_graphs: None, output_path: output_path.to_owned(), function_metadata: None, + include_unk: *include_unk, } }; file.load_and_deserialize() @@ -459,6 +516,8 @@ fn main() { &file.output_path, &file.filename, with_features, + &file.include_unk, + metadata_type.clone(), ); } } else if graph_data_type == DataType::OneHopCg { @@ -468,6 +527,8 @@ fn main() { &file.output_path, &file.filename, with_features, + &file.include_unk, + metadata_type.clone(), ); } } else if graph_data_type == DataType::CgWithCallers { @@ -477,6 +538,8 @@ fn main() { &file.output_path, &file.filename, with_features, + &file.include_unk, + metadata_type.clone(), ); } } else if graph_data_type == DataType::OneHopCgWithcallers { @@ -486,6 +549,8 @@ fn main() { &file.output_path, &file.filename, with_features, + &file.include_unk, + metadata_type.clone(), ); } } @@ -504,10 +569,9 @@ fn main() { file_paths_vec.len() ); // if without metadata - if !with_features { + if !with_features & metadata_type.is_none() { debug!("Creating call graphs without any node features"); - - file_paths_vec.par_iter().for_each(|path| { + file_paths_vec.par_iter().progress().for_each(|path| { let suffix = graph_type.to_owned().to_string(); let full_output_path = PathBuf::from(get_save_file_path( path, @@ -520,46 +584,55 @@ fn main() { function_call_graphs: None, output_path: output_path.to_owned(), function_metadata: None, + include_unk: *include_unk, }; debug!("Proceissing {}", file.filename); file.load_and_deserialize() .expect("Unable to load and desearilize JSON"); - if graph_data_type == DataType::Cg { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_data_type == DataType::OneHopCg { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_data_type == DataType::CgWithCallers { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - with_features, - ); - } - } else if graph_data_type == DataType::OneHopCgWithcallers { - for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph_with_callers( - &file, - &file.output_path, - &file.filename, - with_features, - ); + for fcg in file.function_call_graphs.as_ref().unwrap() { + match graph_data_type { + DataType::Cg => { + fcg.to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + &file.include_unk, + None, + ); + } + DataType::OneHopCg => { + fcg.one_hop_to_petgraph( + &file, + &file.output_path, + &file.filename, + with_features, + &file.include_unk, + None, + ); + } + DataType::CgWithCallers => { + fcg.to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features, + &file.include_unk, + None, + ); + } + DataType::OneHopCgWithcallers => { + fcg.one_hop_to_petgraph_with_callers( + &file, + &file.output_path, + &file.filename, + with_features, + &file.include_unk, + None, + ); + } + _ => unreachable!("Not possible hopefully! :O"), } } } else { @@ -578,9 +651,12 @@ fn main() { exit(1) }; + if with_features & metadata_type.is_none() { + error!("with features requires metadata_type to be set") + } let mut metadata_paths_vec = get_json_paths_from_dir( metadata_path.as_ref().unwrap(), - Some("finfo".to_string()), + Some(metadata_type.as_ref().unwrap().to_string()), ); file_paths_vec.sort(); @@ -592,27 +668,43 @@ fn main() { .zip(metadata_paths_vec) .collect::>(); - combined_cgs_metadata.par_iter().for_each(|tup| { + combined_cgs_metadata.par_iter().progress().for_each(|tup| { let suffix = format!("{}-meta", graph_type.to_owned()); let full_output_path = PathBuf::from(get_save_file_path(&tup.0, output_path, Some(suffix))); if !full_output_path.is_dir() { let mut file = { - let mut metadata = AFIJFile { - filename: tup.1.clone(), - function_info: None, - output_path: "".to_string(), - }; - debug!("Attempting to load metadata file: {}", tup.1); - metadata - .load_and_deserialize() - .expect("Unable to load assocaited metadata file"); - let metadata_subset = metadata.subset(); + let metadata: Option; + if metadata_type.clone().unwrap() == *"finfo" { + let mut metadata_file = AFIJFile { + filename: tup.1.clone(), + function_info: None, + output_path: "".to_string(), + }; + debug!("Attempting to load metadata file: {}", tup.1); + metadata_file + .load_and_deserialize() + .expect("Unable to load associated metadata file"); + metadata = Some(metadata_file.subset()); + } else if metadata_type.clone().unwrap() == *"tiknib" { + let mut metadata_file = TikNibFuncMetaFile { + filename: tup.1.clone(), + function_info: None, + output_path: "".to_string(), + }; + + metadata_file.load_and_deserialize().expect("Unable to load associated metadata file"); + metadata = Some(metadata_file.subset()); + } else { + metadata = None + } + AGCJFile { filename: tup.0.to_owned(), function_call_graphs: None, output_path: output_path.to_owned(), - function_metadata: Some(metadata_subset), + function_metadata: metadata, + include_unk: *include_unk, } }; debug!("Attempting to load {}", file.filename); @@ -627,12 +719,14 @@ fn main() { &file.output_path, &file.filename, with_features, + &file.include_unk, + metadata_type.clone() ); } } else if graph_data_type == DataType::OneHopCg { debug!("Generating one hop call graphs using loaded cgs + metadata"); for fcg in file.function_call_graphs.as_ref().unwrap() { - fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename, with_features); + fcg.one_hop_to_petgraph(&file, &file.output_path, &file.filename, with_features, &file.include_unk, metadata_type.clone()); } } else if graph_data_type == DataType::CgWithCallers { debug!("Generating call graphs with callers using loaded cgs + metadata"); @@ -641,7 +735,9 @@ fn main() { &file, &file.output_path, &file.filename, - with_features + with_features, + &file.include_unk, + metadata_type.clone() ); } } else if graph_data_type == DataType::OneHopCgWithcallers { @@ -651,7 +747,9 @@ fn main() { &file, &file.output_path, &file.filename, - with_features + with_features, + &file.include_unk, + metadata_type.clone() ); } } @@ -666,18 +764,35 @@ fn main() { GenerateSubCommands::Metadata { input_path, output_path, + data_source_type, } => { - let mut file = AFIJFile { - filename: input_path.to_owned(), - function_info: None, - output_path: output_path.to_owned(), - }; - info!("Generating function metadata subsets"); - file.load_and_deserialize() - .expect("Unable to load and desearilize JSON"); - info!("Successfully loaded JSON"); - file.subset_and_save(); - info!("Generation complete"); + if data_source_type == "finfo" { + let mut file = AFIJFile { + filename: input_path.to_owned(), + function_info: None, + output_path: output_path.to_owned(), + }; + info!("Generating function metadata subsets"); + file.load_and_deserialize() + .expect("Unable to load and desearilize JSON"); + info!("Successfully loaded JSON"); + file.subset_and_save(); + info!("Generation complete"); + } else if data_source_type == "agfj" { + warn!("This currently only supports making TikNib features for single files"); + let mut file = AGFJFile { + functions: None, + filename: input_path.to_owned(), + output_path: output_path.to_string(), + min_blocks: 1, // Dummy + feature_type: None, + architecture: None, + reg_norm: false, // Dummy + }; + + file.load_and_deserialize().expect("Unable to load data"); + file.tiknib_func_level_feature_gen() + } } GenerateSubCommands::Nlp { path, @@ -867,35 +982,50 @@ fn main() { sequence, ); } - Commands::Dedup { - filename, - datatype, - output_path, - print_stats, - just_stats, - num_threads, - just_hash_value, - } => { - if datatype == "esilfstr" { - warn!("This only supports the Cisco Talos Binary Sim Dataset naming convention"); + Commands::Dedup { subcommands } => match subcommands { + DedupSubCommands::Cgs { + filename, + output_path, + num_threads, + filepath_format, + node_feature_type, + } => { rayon::ThreadPoolBuilder::new() .num_threads(*num_threads) .build_global() .unwrap(); - let corpus = EsilFuncStringCorpus::new(filename).unwrap(); - corpus.uniq_binaries.par_iter().progress().for_each(|name| { - corpus.dedup_subset(name, *print_stats, *just_stats, *just_hash_value) - }); - } else if datatype == "onehopcgs" { + warn!("This only supports the Cisco Talos Binary Sim Dataset naming convention"); if Path::new(filename).exists() { + let node_feature_type = CallGraphNodeFeatureType::new(node_feature_type); info!("Starting duplication process for One Hop Call Graphs"); - let corpus = OneHopCGCorpus::new(filename, output_path).unwrap(); + let corpus = + CGCorpus::new(filename, output_path, filepath_format, node_feature_type) + .unwrap(); corpus.process_corpus(); } else { error!("Filename provided does not exist! - {}", filename) } } - } + DedupSubCommands::Esil { + filename, + print_stats, + just_stats, + just_hash_value, + num_threads, + output_path, + } => { + rayon::ThreadPoolBuilder::new() + .num_threads(*num_threads) + .build_global() + .unwrap(); + + warn!("This only supports the Cisco Talos Binary Sim Dataset naming convention"); + let corpus = EsilFuncStringCorpus::new(filename, output_path).unwrap(); + corpus.uniq_binaries.par_iter().progress().for_each(|name| { + corpus.dedup_subset(name, *print_stats, *just_stats, *just_hash_value) + }); + } + }, } } diff --git a/src/networkx.rs b/src/networkx.rs index 7b0f9c2..be59509 100644 --- a/src/networkx.rs +++ b/src/networkx.rs @@ -1,4 +1,5 @@ use crate::afij::AFIJFeatureSubset; +use crate::agfj::TikNibFunc; use crate::bb::FeatureType; use enum_as_inner::EnumAsInner; use petgraph::prelude::Graph; @@ -29,6 +30,32 @@ pub enum NodeType { Discovere(DiscovreNode), } +#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize, EnumAsInner)] +#[serde(untagged)] +pub enum CallGraphTypes { + TikNib(NetworkxDiGraph), + CGMeta(NetworkxDiGraph), + CGName(NetworkxDiGraph), +} + +#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize)] +#[serde(untagged)] +pub enum CallGraphNodeFeatureType { + TikNib, + CGMeta, + CGName, +} + +impl CallGraphNodeFeatureType { + pub fn new(node_feature_type: &str) -> CallGraphNodeFeatureType { + match node_feature_type { + "cgmeta" => CallGraphNodeFeatureType::CGMeta, + "cgname" => CallGraphNodeFeatureType::CGName, + "tiknib" => CallGraphNodeFeatureType::TikNib, + _ => unreachable!("Invalid node type"), + } + } +} #[derive(Default, Copy, Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct GeminiNode { @@ -134,7 +161,7 @@ impl From<(i64, &Vec)> for DiscovreNode { } } -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] +#[derive(Default, Debug, Clone, PartialEq, Hash, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct CallGraphFuncNameNode { pub id: i64, @@ -234,6 +261,62 @@ impl From<(Graph, &Vec)> } } +#[derive(Default, Debug, Clone, PartialEq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct CallGraphTikNibFeatures { + pub id: i64, + pub func_name: String, + pub features: TikNibFunc, +} + +impl From<(Graph, &Vec)> for NetworkxDiGraph { + fn from( + src_graph: (Graph, &Vec), + ) -> NetworkxDiGraph { + let node_weights = src_graph.0.node_weights(); + let mut nodes: Vec = vec![]; + for (i, node_weight) in node_weights.enumerate() { + let subset_object = src_graph.1.iter().find(|ele| &ele.name == node_weight); + if let Some(subset_object) = subset_object { + nodes.push(CallGraphTikNibFeatures { + id: i as i64, + func_name: node_weight.to_owned(), + features: subset_object.clone(), + }) + } else { + nodes.push(CallGraphTikNibFeatures { + id: i as i64, + func_name: node_weight.to_owned(), + features: Default::default(), + }) + } + } + let mut adjacency: Vec> = vec![]; + let node_indices = src_graph.0.node_indices(); + + for node in node_indices { + let mut node_adjacency_vec = vec![]; + let node_edges = src_graph.0.edges(node); + for edge in node_edges { + let edge_entry = Adjacency { + id: edge.target().index(), + weight: edge.weight().to_owned(), + }; + node_adjacency_vec.push(edge_entry) + } + adjacency.push(node_adjacency_vec) + } + + NetworkxDiGraph { + adjacency, + directed: "True".to_string(), + graph: vec![], + multigraph: false, + nodes, + } + } +} + impl From<(&Graph, &Vec>, FeatureType)> for NetworkxDiGraph { fn from( input: (&Graph, &Vec>, FeatureType), diff --git a/src/utils.rs b/src/utils.rs index 10995ca..77fa795 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -78,6 +78,10 @@ pub fn check_or_create_dir(full_output_path: &String) { } } +/// Average +pub fn average(numbers: Vec) -> f32 { + numbers.iter().sum::() / numbers.len() as f32 +} #[cfg(test)] mod tests { use super::*; diff --git a/test-files/cg_dedup/raw/test_bin_2_cg.json b/test-files/cg_dedup/raw/test_bin_2_cg.json new file mode 100644 index 0000000..f261c69 --- /dev/null +++ b/test-files/cg_dedup/raw/test_bin_2_cg.json @@ -0,0 +1 @@ +[{"imports":["unk.0x3fe0"],"name":"entry0","size":47},{"imports":["sym..plt.got","sym.deregister_tm_clones"],"name":"sym.__do_global_dtors_aux","size":57},{"imports":["sym._init","rsp"],"name":"sym.__libc_csu_init","size":101},{"imports":["sym.imp.printf"],"name":"main","size":161}] \ No newline at end of file diff --git a/test-files/cg_dedup/raw/test_bin_2_finfo.json b/test-files/cg_dedup/raw/test_bin_2_finfo.json new file mode 100644 index 0000000..d6a06d0 --- /dev/null +++ b/test-files/cg_dedup/raw/test_bin_2_finfo.json @@ -0,0 +1 @@ +[{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":4,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":6,"is-lineal":true,"is-pure":"false","maxbound":4187,"minbound":4176,"name":"sym.imp.printf","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4176,"outdegree":0,"realsz":11,"regvars":[],"signature":"int sym.imp.printf (const char *format);","size":11,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":16,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"false","maxbound":4239,"minbound":4192,"name":"entry0","nargs":1,"nbbs":1,"ninstrs":13,"nlocals":0,"noreturn":false,"offset":4192,"outdegree":1,"realsz":47,"regvars":[{"kind":"reg","name":"arg3","ref":"rdx","type":"int64_t"}],"signature":"entry0 (int64_t arg3);","size":47,"spvars":[],"stackframe":8,"type":"fcn"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":14,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":2,"edges":4,"indegree":1,"is-lineal":false,"is-pure":"false","maxbound":4281,"minbound":4240,"name":"sym.deregister_tm_clones","nargs":0,"nbbs":4,"ninstrs":9,"nlocals":0,"noreturn":false,"offset":4240,"outdegree":0,"realsz":34,"regvars":[],"signature":"sym.deregister_tm_clones ();","size":41,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":19,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":2,"edges":4,"indegree":0,"is-lineal":false,"is-pure":"false","maxbound":4345,"minbound":4288,"name":"sym.register_tm_clones","nargs":0,"nbbs":4,"ninstrs":14,"nlocals":0,"noreturn":false,"offset":4288,"outdegree":0,"realsz":51,"regvars":[],"signature":"sym.register_tm_clones ();","size":57,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":24,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":2,"edges":5,"indegree":0,"is-lineal":false,"is-pure":"false","maxbound":4409,"minbound":4352,"name":"sym.__do_global_dtors_aux","nargs":0,"nbbs":5,"ninstrs":14,"nlocals":0,"noreturn":false,"offset":4352,"outdegree":2,"realsz":54,"regvars":[],"signature":"sym.__do_global_dtors_aux ();","size":57,"spvars":[],"stackframe":8,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":4,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":1,"is-lineal":true,"is-pure":"false","maxbound":4171,"minbound":4160,"name":"sym..plt.got","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4160,"outdegree":0,"realsz":11,"regvars":[],"signature":"sym..plt.got ();","size":11,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":3,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"true","maxbound":4425,"minbound":4416,"name":"sym.frame_dummy","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4416,"outdegree":0,"realsz":9,"regvars":[],"signature":"sym.frame_dummy ();","size":9,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":2,"codexrefs":null,"cost":13,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":3,"indegree":1,"is-lineal":true,"is-pure":"false","maxbound":4123,"minbound":4096,"name":"sym._init","nargs":0,"nbbs":3,"ninstrs":8,"nlocals":0,"noreturn":false,"offset":4096,"outdegree":0,"realsz":27,"regvars":[],"signature":"sym._init ();","size":27,"spvars":[],"stackframe":8,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":4,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"true","maxbound":4709,"minbound":4704,"name":"sym.__libc_csu_fini","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4704,"outdegree":0,"realsz":5,"regvars":[],"signature":"sym.__libc_csu_fini ();","size":5,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":6,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"true","maxbound":4725,"minbound":4712,"name":"sym._fini","nargs":0,"nbbs":1,"ninstrs":4,"nlocals":0,"noreturn":false,"offset":4712,"outdegree":0,"realsz":13,"regvars":[],"signature":"sym._fini ();","size":13,"spvars":[],"stackframe":8,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":3,"codexrefs":null,"cost":43,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":5,"indegree":0,"is-lineal":true,"is-pure":"false","maxbound":4693,"minbound":4592,"name":"sym.__libc_csu_init","nargs":3,"nbbs":4,"ninstrs":34,"nlocals":0,"noreturn":false,"offset":4592,"outdegree":2,"realsz":101,"regvars":[{"kind":"reg","name":"arg1","ref":"rdi","type":"int64_t"},{"kind":"reg","name":"arg2","ref":"rsi","type":"int64_t"},{"kind":"reg","name":"arg3","ref":"rdx","type":"int64_t"}],"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);","size":101,"spvars":[],"stackframe":56,"type":"sym"},{"bits":64,"bpvars":[{"kind":"var","name":"var_4h","ref":{"base":"rbp","offset":-4},"type":"int64_t"},{"kind":"var","name":"var_8h","ref":{"base":"rbp","offset":-8},"type":"int64_t"}],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":58,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":11,"indegree":0,"is-lineal":true,"is-pure":"false","maxbound":4586,"minbound":4425,"name":"main","nargs":0,"nbbs":9,"ninstrs":38,"nlocals":2,"noreturn":false,"offset":4425,"outdegree":6,"realsz":161,"regvars":[],"signature":"int main (int argc, char **argv, char **envp);","size":161,"spvars":[],"stackframe":24,"type":"sym"}] \ No newline at end of file diff --git a/test-files/cg_dedup/raw/test_bin_cg.json b/test-files/cg_dedup/raw/test_bin_cg.json new file mode 100644 index 0000000..f261c69 --- /dev/null +++ b/test-files/cg_dedup/raw/test_bin_cg.json @@ -0,0 +1 @@ +[{"imports":["unk.0x3fe0"],"name":"entry0","size":47},{"imports":["sym..plt.got","sym.deregister_tm_clones"],"name":"sym.__do_global_dtors_aux","size":57},{"imports":["sym._init","rsp"],"name":"sym.__libc_csu_init","size":101},{"imports":["sym.imp.printf"],"name":"main","size":161}] \ No newline at end of file diff --git a/test-files/cg_dedup/raw/test_bin_finfo.json b/test-files/cg_dedup/raw/test_bin_finfo.json new file mode 100644 index 0000000..d6a06d0 --- /dev/null +++ b/test-files/cg_dedup/raw/test_bin_finfo.json @@ -0,0 +1 @@ +[{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":4,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":6,"is-lineal":true,"is-pure":"false","maxbound":4187,"minbound":4176,"name":"sym.imp.printf","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4176,"outdegree":0,"realsz":11,"regvars":[],"signature":"int sym.imp.printf (const char *format);","size":11,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":16,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"false","maxbound":4239,"minbound":4192,"name":"entry0","nargs":1,"nbbs":1,"ninstrs":13,"nlocals":0,"noreturn":false,"offset":4192,"outdegree":1,"realsz":47,"regvars":[{"kind":"reg","name":"arg3","ref":"rdx","type":"int64_t"}],"signature":"entry0 (int64_t arg3);","size":47,"spvars":[],"stackframe":8,"type":"fcn"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":14,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":2,"edges":4,"indegree":1,"is-lineal":false,"is-pure":"false","maxbound":4281,"minbound":4240,"name":"sym.deregister_tm_clones","nargs":0,"nbbs":4,"ninstrs":9,"nlocals":0,"noreturn":false,"offset":4240,"outdegree":0,"realsz":34,"regvars":[],"signature":"sym.deregister_tm_clones ();","size":41,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":19,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":2,"edges":4,"indegree":0,"is-lineal":false,"is-pure":"false","maxbound":4345,"minbound":4288,"name":"sym.register_tm_clones","nargs":0,"nbbs":4,"ninstrs":14,"nlocals":0,"noreturn":false,"offset":4288,"outdegree":0,"realsz":51,"regvars":[],"signature":"sym.register_tm_clones ();","size":57,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":24,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":2,"edges":5,"indegree":0,"is-lineal":false,"is-pure":"false","maxbound":4409,"minbound":4352,"name":"sym.__do_global_dtors_aux","nargs":0,"nbbs":5,"ninstrs":14,"nlocals":0,"noreturn":false,"offset":4352,"outdegree":2,"realsz":54,"regvars":[],"signature":"sym.__do_global_dtors_aux ();","size":57,"spvars":[],"stackframe":8,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":4,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":1,"is-lineal":true,"is-pure":"false","maxbound":4171,"minbound":4160,"name":"sym..plt.got","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4160,"outdegree":0,"realsz":11,"regvars":[],"signature":"sym..plt.got ();","size":11,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":3,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"true","maxbound":4425,"minbound":4416,"name":"sym.frame_dummy","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4416,"outdegree":0,"realsz":9,"regvars":[],"signature":"sym.frame_dummy ();","size":9,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":2,"codexrefs":null,"cost":13,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":3,"indegree":1,"is-lineal":true,"is-pure":"false","maxbound":4123,"minbound":4096,"name":"sym._init","nargs":0,"nbbs":3,"ninstrs":8,"nlocals":0,"noreturn":false,"offset":4096,"outdegree":0,"realsz":27,"regvars":[],"signature":"sym._init ();","size":27,"spvars":[],"stackframe":8,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":4,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"true","maxbound":4709,"minbound":4704,"name":"sym.__libc_csu_fini","nargs":0,"nbbs":1,"ninstrs":2,"nlocals":0,"noreturn":false,"offset":4704,"outdegree":0,"realsz":5,"regvars":[],"signature":"sym.__libc_csu_fini ();","size":5,"spvars":[],"stackframe":0,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":1,"codexrefs":null,"cost":6,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":0,"indegree":0,"is-lineal":true,"is-pure":"true","maxbound":4725,"minbound":4712,"name":"sym._fini","nargs":0,"nbbs":1,"ninstrs":4,"nlocals":0,"noreturn":false,"offset":4712,"outdegree":0,"realsz":13,"regvars":[],"signature":"sym._fini ();","size":13,"spvars":[],"stackframe":8,"type":"sym"},{"bits":64,"bpvars":[],"callrefs":null,"calltype":"amd64","cc":3,"codexrefs":null,"cost":43,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":5,"indegree":0,"is-lineal":true,"is-pure":"false","maxbound":4693,"minbound":4592,"name":"sym.__libc_csu_init","nargs":3,"nbbs":4,"ninstrs":34,"nlocals":0,"noreturn":false,"offset":4592,"outdegree":2,"realsz":101,"regvars":[{"kind":"reg","name":"arg1","ref":"rdi","type":"int64_t"},{"kind":"reg","name":"arg2","ref":"rsi","type":"int64_t"},{"kind":"reg","name":"arg3","ref":"rdx","type":"int64_t"}],"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);","size":101,"spvars":[],"stackframe":56,"type":"sym"},{"bits":64,"bpvars":[{"kind":"var","name":"var_4h","ref":{"base":"rbp","offset":-4},"type":"int64_t"},{"kind":"var","name":"var_8h","ref":{"base":"rbp","offset":-8},"type":"int64_t"}],"callrefs":null,"calltype":"amd64","cc":4,"codexrefs":null,"cost":58,"datarefs":null,"dataxrefs":null,"difftype":"new","ebbs":1,"edges":11,"indegree":0,"is-lineal":true,"is-pure":"false","maxbound":4586,"minbound":4425,"name":"main","nargs":0,"nbbs":9,"ninstrs":38,"nlocals":2,"noreturn":false,"offset":4425,"outdegree":6,"realsz":161,"regvars":[],"signature":"int main (int argc, char **argv, char **envp);","size":161,"spvars":[],"stackframe":24,"type":"sym"}] \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json new file mode 100644 index 0000000..11877d3 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/main-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/main-onehopcgcallers-meta.json new file mode 100644 index 0000000..99d9b13 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/main-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":0,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json new file mode 100644 index 0000000..8bf1d8c --- /dev/null +++ b/test-files/cg_dedup/to_dedup/arm32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json new file mode 100644 index 0000000..11877d3 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/main-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/main-onehopcgcallers-meta.json new file mode 100644 index 0000000..99d9b13 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/main-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":0,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json new file mode 100644 index 0000000..8bf1d8c --- /dev/null +++ b/test-files/cg_dedup/to_dedup/mips32-gcc-9-03_testbin_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json new file mode 100644 index 0000000..11877d3 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/entry0-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"entry0","functionFeatureSubset":{"name":"entry0","ninstrs":13,"edges":0,"indegree":0,"outdegree":1,"nlocals":0,"nargs":1,"signature":"entry0 (int64_t arg3);"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/main-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/main-onehopcgcallers-meta.json new file mode 100644 index 0000000..99d9b13 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/main-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":1,"weight":0}],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"main","functionFeatureSubset":{"name":"main","ninstrs":38,"edges":11,"indegree":0,"outdegree":6,"nlocals":2,"nargs":0,"signature":"int main (int argc, char **argv, char **envp);"}},{"id":1,"funcName":"sym.imp.printf","functionFeatureSubset":{"name":"sym.imp.printf","ninstrs":2,"edges":0,"indegree":6,"outdegree":0,"nlocals":0,"nargs":0,"signature":"int sym.imp.printf (const char *format);"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json new file mode 100644 index 0000000..3ee8566 --- /dev/null +++ b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/sym.__do_global_dtors_aux-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__do_global_dtors_aux","functionFeatureSubset":{"name":"sym.__do_global_dtors_aux","ninstrs":14,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":0,"signature":"sym.__do_global_dtors_aux ();"}},{"id":1,"funcName":"sym..plt.got","functionFeatureSubset":{"name":"sym..plt.got","ninstrs":2,"edges":0,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym..plt.got ();"}},{"id":2,"funcName":"sym.deregister_tm_clones","functionFeatureSubset":{"name":"sym.deregister_tm_clones","ninstrs":9,"edges":4,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym.deregister_tm_clones ();"}}]} \ No newline at end of file diff --git a/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json new file mode 100644 index 0000000..8bf1d8c --- /dev/null +++ b/test-files/cg_dedup/to_dedup/x86-gcc-9-03_testbin2_cg-onehopcgcallers/sym.__libc_csu_init-onehopcgcallers-meta.json @@ -0,0 +1 @@ +{"adjacency":[[{"id":2,"weight":0},{"id":1,"weight":0}],[],[]],"directed":"True","graph":[],"multigraph":false,"nodes":[{"id":0,"funcName":"sym.__libc_csu_init","functionFeatureSubset":{"name":"sym.__libc_csu_init","ninstrs":34,"edges":5,"indegree":0,"outdegree":2,"nlocals":0,"nargs":3,"signature":"sym.__libc_csu_init (int64_t arg1, int64_t arg2, int64_t arg3);"}},{"id":1,"funcName":"sym._init","functionFeatureSubset":{"name":"sym._init","ninstrs":8,"edges":3,"indegree":1,"outdegree":0,"nlocals":0,"nargs":0,"signature":"sym._init ();"}},{"id":2,"funcName":"rsp","functionFeatureSubset":{"name":"","ninstrs":0,"edges":0,"indegree":0,"outdegree":0,"nlocals":0,"nargs":0,"signature":""}}]} \ No newline at end of file diff --git a/test-files/ls_cg.json b/test-files/ls_cg.json new file mode 100644 index 0000000..db52aba --- /dev/null +++ b/test-files/ls_cg.json @@ -0,0 +1,383 @@ +[ + { + "imports": [ + "sym.func.1000071a9", + "sym.imp.setlocale", + "sym.imp.isatty", + "sym.imp.getenv", + "sym.imp.atoi", + "sym.imp.ioctl", + "sym.imp.getuid", + "sym.imp.getopt", + "sym.imp.setenv", + "sym.imp.compat_mode", + "sym.imp.tgetent", + "sym.imp.tgetstr", + "sym.imp.signal", + "sym.func.1000067f7", + "sym.imp.getbsize", + "sym.imp.sysctlbyname", + "sym.imp.err", + "sym.func.100004a3c", + "sym.imp.exit" + ], + "name": "main", + "size": 2153 + }, + { + "imports": [ + "sym.imp.fts_open_INODE64", + "sym.imp.fts_children_INODE64", + "sym.func.100004d11", + "sym.imp.fts_read_INODE64", + "sym.imp.printf", + "sym.imp.compat_mode", + "sym.func.100007266", + "sym.imp.fts_set_INODE64", + "sym.func.100007230", + "sym.func.1000071fa", + "sym.imp.__error", + "sym.imp.fts_close_INODE64", + "sym.func.1000071d0", + "sym.func.1000071e5" + ], + "name": "sym.func.100004a3c", + "size": 594 + }, + { + "imports": [ + "sym.imp.getenv", + "sym.imp.strlen", + "sym.imp.malloc", + "sym.imp.sscanf", + "sym.func.100006de9", + "sym.imp.snprintf", + "sym.imp.user_from_uid", + "sym.imp.group_from_gid", + "sym.imp.fflagstostr", + "sym.imp.free", + "sym.imp.strdup", + "sym.imp.calloc", + "sym.imp.strcpy", + "sym.imp.listxattr", + "sym.imp.reallocf", + "sym.imp.getxattr", + "sym.imp.acl_get_link_np", + "sym.imp.acl_get_entry", + "sym.imp.acl_free", + "sym.func.1000072cc", + "unk.0x100008530", + "sym.imp.__stack_chk_fail", + "sym.func.1000072b7", + "sym.func.1000072a2", + "sym.func.10000730a" + ], + "name": "sym.func.100004d11", + "size": 2507 + }, + { + "imports": [ + "sym.imp.strerror", + "sym.imp.warnx" + ], + "name": "sym.func.100007266", + "size": 60 + }, + { + "imports": [ + "sym.imp.strerror", + "sym.imp.warnx" + ], + "name": "sym.func.100007230", + "size": 54 + }, + { + "imports": [ + "sym.imp.warnx", + "sym.imp.compat_mode" + ], + "name": "sym.func.1000071fa", + "size": 54 + }, + { + "imports": [ + "sym.imp.err" + ], + "name": "sym.func.1000071d0", + "size": 21 + }, + { + "imports": [ + "sym.imp.err" + ], + "name": "sym.func.1000071e5", + "size": 21 + }, + { + "imports": [ + "sym.imp.compat_mode", + "sym.imp.printf", + "sym.func.1000057af", + "sym.imp.putchar", + "sym.func.10000731f" + ], + "name": "sym.func.100005700", + "size": 175 + }, + { + "imports": [ + "sym.imp.printf", + "sym.func.1000062a8", + "sym.func.100006368", + "sym.imp.tputs", + "sym.func.10000639e" + ], + "name": "sym.func.1000057af", + "size": 281 + }, + { + "imports": [ + "sym.imp.printf", + "sym.imp.strmode", + "sym.func.100006100", + "sym.func.100006168", + "sym.func.1000062a8", + "sym.func.100006368", + "sym.imp.tputs", + "sym.func.10000639e", + "sym.imp.snprintf", + "sym.imp.readlink", + "sym.imp.__error", + "sym.imp.strerror", + "sym.imp.fprintf", + "sym.imp.putchar", + "sym.imp.strlen", + "sym.imp.acl_get_entry", + "sym.imp.acl_get_tag_type", + "sym.imp.acl_get_flagset_np", + "sym.imp.acl_get_permset", + "sym.imp.acl_get_qualifier", + "sym.imp.malloc", + "sym.imp.mbr_identifier_translate", + "sym.imp.uuid_unparse_upper", + "sym.imp.acl_free", + "sym.imp.acl_get_flag_np", + "sym.imp.free", + "sym.imp.acl_get_perm_np", + "sym.imp.__stack_chk_fail", + "sym.func.100007342" + ], + "name": "sym.func.1000058c8", + "size": 2104 + }, + { + "imports": [ + "sym.imp.humanize_number", + "sym.imp.printf" + ], + "name": "sym.func.100006100", + "size": 104 + }, + { + "imports": [ + "sym.imp.nl_langinfo", + "sym.imp.time", + "sym.imp.compat_mode", + "sym.imp.localtime", + "sym.imp.strftime", + "sym.imp.fputs", + "sym.imp.__stack_chk_fail" + ], + "name": "sym.func.100006168", + "size": 320 + }, + { + "imports": [ + "sym.func.100006a30" + ], + "name": "sym.func.1000062a8", + "size": 143 + }, + { + "imports": [ + "sym.imp.putchar" + ], + "name": "sym.func.10000639e", + "size": 130 + }, + { + "imports": [ + "sym.imp.strlen", + "sym.imp.putchar", + "sym.func.1000057af", + "sym.imp.printf" + ], + "name": "sym.func.10000644c", + "size": 200 + }, + { + "imports": [ + "sym.imp.realloc", + "sym.imp.__bzero", + "sym.imp.printf", + "sym.func.1000057af", + "sym.imp.putchar", + "sym.imp.warn", + "sym.func.10000737a", + "sym.func.100007357" + ], + "name": "sym.func.100006514", + "size": 739 + }, + { + "imports": [ + "sym.imp.strlen", + "sym.imp.fwrite", + "sym.imp.__tolower", + "sym.imp.fprintf" + ], + "name": "sym.func.1000067f7", + "size": 412 + }, + { + "imports": [ + "sym.imp.tputs", + "sym.imp.signal", + "sym.imp.getpid" + ], + "name": "sym.func.100006993", + "size": 106 + }, + { + "imports": [ + "sym.imp.write" + ], + "name": "sym.func.1000069fd", + "size": 38 + }, + { + "imports": [ + "sym.imp.putchar" + ], + "name": "sym.func.100006a23", + "size": 13 + }, + { + "imports": [ + "sym.imp.tputs", + "sym.imp.tgoto" + ], + "name": "sym.func.100006a30", + "size": 167 + }, + { + "imports": [ + "sym.imp.mbrtowc", + "sym.imp.putchar", + "sym.imp.__maskrune", + "sym.imp.wcwidth", + "sym.imp.printf", + "sym.imp.__stack_chk_fail" + ], + "name": "sym.func.100006ad7", + "size": 398 + }, + { + "imports": [ + "sym.imp.mbrtowc", + "sym.imp.putchar", + "sym.imp.__maskrune", + "sym.imp.wcwidth", + "sym.imp.__stack_chk_fail" + ], + "name": "sym.func.100006c65", + "size": 388 + }, + { + "imports": [ + "sym.imp.mbrtowc", + "sym.imp.__maskrune", + "sym.imp.__stack_chk_fail" + ], + "name": "sym.func.100006de9", + "size": 342 + }, + { + "imports": [ + "sym.imp.mbrtowc", + "sym.imp.strlen", + "sym.imp.__maskrune", + "sym.imp.memchr", + "sym.imp.putchar", + "sym.imp.wcwidth", + "sym.imp.__stack_chk_fail" + ], + "name": "sym.func.100006f3f", + "size": 618 + }, + { + "imports": [ + "sym.imp.fprintf", + "sym.imp.exit" + ], + "name": "sym.func.1000071a9", + "size": 38 + }, + { + "imports": [ + "sym.imp.err" + ], + "name": "sym.func.1000072a2", + "size": 21 + }, + { + "imports": [ + "sym.imp.err" + ], + "name": "sym.func.1000072b7", + "size": 21 + }, + { + "imports": [ + "sym.imp.strerror", + "sym.imp.warnx" + ], + "name": "sym.func.1000072cc", + "size": 62 + }, + { + "imports": [ + "sym.imp.err" + ], + "name": "sym.func.10000730a", + "size": 21 + }, + { + "imports": [ + "sym.imp.__assert_rtn" + ], + "name": "sym.func.10000731f", + "size": 35 + }, + { + "imports": [ + "sym.imp.err" + ], + "name": "sym.func.100007342", + "size": 21 + }, + { + "imports": [ + "sym.imp.__assert_rtn" + ], + "name": "sym.func.100007357", + "size": 35 + }, + { + "imports": [ + "sym.imp.__assert_rtn" + ], + "name": "sym.func.10000737a", + "size": 35 + } +] \ No newline at end of file