Skip to content

Commit

Permalink
merging remote main
Browse files Browse the repository at this point in the history
  • Loading branch information
br0kej committed Apr 15, 2024
2 parents a703bfa + 5c1220a commit 624d8d0
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 15 deletions.
127 changes: 116 additions & 11 deletions src/agfj.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use crate::bb::{ACFJBlock, FeatureType, TikNibFeaturesBB};
#[cfg(feature = "inference")]
use crate::inference::InferenceJob;
use crate::networkx::{DGISNode, DiscovreNode, GeminiNode, NetworkxDiGraph, NodeType, TiknibNode};
use crate::networkx::{
DGISNode, DisasmNode, DiscovreNode, EsilNode, GeminiNode, NetworkxDiGraph, NodeType, TiknibNode,
};
use crate::utils::{average, check_or_create_dir, get_save_file_path};
use enum_as_inner::EnumAsInner;
use itertools::Itertools;
use ordered_float::OrderedFloat;
use petgraph::prelude::Graph;
Expand Down Expand Up @@ -47,6 +50,12 @@ pub struct AGFJFunc {
graph: Option<Graph<String, u32>>,
}

#[derive(EnumAsInner, Serialize, Deserialize, Debug)]
pub enum StringOrF64 {
String(Vec<Vec<String>>),
F64(Vec<Vec<f64>>),
}

impl AGFJFunc {
pub fn create_graph_struct_members(&mut self, min_blocks: &u16) {
self.create_bb_edge_list(min_blocks);
Expand Down Expand Up @@ -332,8 +341,7 @@ impl AGFJFunc {
feature_type: FeatureType,
architecture: &String,
) {
let full_output_path =
get_save_file_path(path, output_path, Some(".json".to_string()), None, None);
let full_output_path = get_save_file_path(path, output_path, None, None, None);
check_or_create_dir(&full_output_path);
let file_name = path.file_name().unwrap();
let binding = file_name.to_string_lossy().to_string();
Expand All @@ -358,25 +366,69 @@ impl AGFJFunc {
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();
let mut edge_list = Vec::<(u32, u32, u32)>::new();
let mut feature_vecs = Vec::<_>::new();

let mut feature_vecs: StringOrF64 = match feature_type {
FeatureType::Tiknib
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => StringOrF64::F64(Vec::new()),
FeatureType::Esil | FeatureType::Disasm => StringOrF64::String(Vec::new()),
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
return;
}
};

let min_offset: u64 = self.offset;
let max_offset: u64 = self.offset + self.size.unwrap_or(0);
for bb in &self.blocks {
bb.get_block_edges(&mut addr_idxs, &mut edge_list, max_offset, min_offset);
bb.generate_bb_feature_vec(&mut feature_vecs, feature_type, architecture);
}
match feature_type {
FeatureType::Tiknib
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => {
let feature_vecs = feature_vecs.as_f64_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.generate_bb_feature_vec(feature_vecs, feature_type, architecture);
}
}
FeatureType::Esil | FeatureType::Disasm => {
let feature_vecs = feature_vecs.as_string_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.generate_bb_feature_strings(feature_vecs, feature_type, true);
}
}
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
return;
}
};

if !edge_list.is_empty() {
let mut graph = Graph::<std::string::String, u32>::from_edges(&edge_list);

Self::str_to_hex_node_idxs(&mut graph, &mut addr_idxs);

let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((&graph, &feature_vecs, feature_type));

// Unpack the NodeTypes to the inner Types
if feature_type == FeatureType::Gemini {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<GeminiNode> =
NetworkxDiGraph::<GeminiNode>::from(networkx_graph);

Expand All @@ -387,6 +439,13 @@ impl AGFJFunc {
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::DGIS {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<DGISNode> =
NetworkxDiGraph::<DGISNode>::from(networkx_graph);
info!("Saving to JSON..");
Expand All @@ -396,6 +455,13 @@ impl AGFJFunc {
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::DiscovRE {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<DiscovreNode> =
NetworkxDiGraph::<DiscovreNode>::from(networkx_graph);
info!("Saving to JSON..");
Expand All @@ -405,6 +471,13 @@ impl AGFJFunc {
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Tiknib {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<TiknibNode> =
NetworkxDiGraph::<TiknibNode>::from(networkx_graph);
info!("Saving to JSON..");
Expand All @@ -413,6 +486,38 @@ impl AGFJFunc {
&networkx_graph_inners,
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Disasm {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_string().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<DisasmNode> =
NetworkxDiGraph::<DisasmNode>::from(networkx_graph);
info!("Saving to JSON..");
serde_json::to_writer(
&File::create(fname_string).expect("Failed to create writer"),
&networkx_graph_inners,
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Esil {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_string().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<EsilNode> =
NetworkxDiGraph::<EsilNode>::from(networkx_graph);
info!("Saving to JSON..");
serde_json::to_writer(
&File::create(fname_string).expect("Failed to create writer"),
&networkx_graph_inners,
)
.expect("Unable to write JSON");
}
} else {
info!("Function {} has no edges. Skipping...", self.name)
Expand Down
23 changes: 22 additions & 1 deletion src/bb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ pub enum FeatureType {
DiscovRE,
DGIS,
Tiknib,
Disasm,
Esil,
ModelEmbedded,
Encoded,
Invalid,
Expand Down Expand Up @@ -217,7 +219,26 @@ impl ACFJBlock {
};

if feature_vector.is_empty() {
println!("Empty feature vector. This means that the feature type is wrong!")
error!("Empty feature vector. This means that the feature type is wrong!")
} else {
feature_vecs.push(feature_vector);
}
}

pub fn generate_bb_feature_strings(
&self,
feature_vecs: &mut Vec<Vec<String>>,
feature_type: FeatureType,
normalise: bool,
) {
let feature_vector: Vec<String> = match feature_type {
FeatureType::Disasm => self.get_disasm_bb(normalise),
FeatureType::Esil => self.get_esil_bb(normalise),
_ => unreachable!(),
};

if feature_vector.is_empty() {
error!("Empty feature vector. This means that the feature type is wrong!")
} else {
feature_vecs.push(feature_vector);
}
Expand Down
16 changes: 14 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub mod normalisation;
pub mod processors;
pub mod tokeniser;
pub mod utils;
mod validate;

use crate::dedup::{CGCorpus, EsilFuncStringCorpus};
use crate::extract::ExtractionJobType;
Expand All @@ -45,6 +46,7 @@ use crate::utils::get_save_file_path;

use crate::combos::{ComboJob, FinfoTiknibFile};
use crate::networkx::CallGraphNodeFeatureType;
use crate::validate::validate_input;
use bb::{FeatureType, InstructionMode};
#[cfg(feature = "goblin")]
use binnfo::goblin_info;
Expand Down Expand Up @@ -110,7 +112,7 @@ enum GenerateSubCommands {
output_path: PathBuf,

/// The type of features to generate per basic block (node)
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib"])
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil"])
.map(|s| s.parse::<String>().unwrap()),)]
feature_type: Option<String>,

Expand Down Expand Up @@ -444,6 +446,8 @@ fn main() {
"dgis" => FeatureType::DGIS,
"encode" => FeatureType::Encoded,
"tiknib" => FeatureType::Tiknib,
"disasm" => FeatureType::Disasm,
"esil" => FeatureType::Esil,
#[cfg(feature = "inference")]
"embed" => FeatureType::ModelEmbedded,
_ => FeatureType::Invalid,
Expand All @@ -456,13 +460,16 @@ fn main() {
|| feature_vec_type == FeatureType::DiscovRE
|| feature_vec_type == FeatureType::DGIS
|| feature_vec_type == FeatureType::Tiknib
|| feature_vec_type == FeatureType::Disasm
|| feature_vec_type == FeatureType::Esil
{
info!(
"Creating graphs with {:?} feature vectors.",
feature_vec_type
);

if Path::new(path).is_file() {
validate_input(path, "cfg");
info!("Single file found");
agfj_graph_statistical_features(
path,
Expand All @@ -476,6 +483,7 @@ fn main() {
WalkDir::new(path).into_iter().filter_map(|file| file.ok())
{
if file.path().to_string_lossy().ends_with(".json") {
validate_input(path, "cfg");
agfj_graph_statistical_features(
file.path(),
&min_blocks.unwrap(),
Expand Down Expand Up @@ -511,6 +519,7 @@ fn main() {
error!("--feature-type/-f is required for creating CFG's")
}
} else if Path::new(path).is_file() {
validate_input(path, "cg");
let mut file = match with_features {
true => {
let mut metadata = AFIJFile {
Expand Down Expand Up @@ -722,6 +731,7 @@ fn main() {
extended,
} => {
if data_source_type == "finfo" {
validate_input(input_path, "metadata_finfo");
let mut file = AFIJFile {
filename: input_path.to_owned(),
function_info: None,
Expand All @@ -737,6 +747,7 @@ fn main() {
warn!("This currently only supports making TikNib features for single files");

if input_path.is_file() {
validate_input(input_path, "metadata_tiknib");
let mut file = AGFJFile {
functions: None,
filename: input_path.to_owned(),
Expand Down Expand Up @@ -828,6 +839,7 @@ fn main() {

if Path::new(path).is_file() {
info!("Single file found");
validate_input(path, "nlp");
let file = AGFJFile {
functions: None,
filename: path.to_owned(),
Expand All @@ -841,7 +853,7 @@ fn main() {
file.execute_data_generation(format_type, instruction_type, random_walk, *pairs)
} else {
info!("Multiple files found. Will parallel process.");
let file_paths_vec = get_json_paths_from_dir(path, None);
let file_paths_vec = get_json_paths_from_dir(path, Some("_cfg".to_string()));
info!(
"{} files found. Beginning Processing.",
file_paths_vec.len()
Expand Down
Loading

0 comments on commit 624d8d0

Please sign in to comment.