Skip to content

Commit

Permalink
Merge pull request #10 from br0kej/dev
Browse files Browse the repository at this point in the history
[feature] Adding support for generating networkx graphs with Disasm or ESIL instructions as nodes
  • Loading branch information
br0kej authored Apr 15, 2024
2 parents 0ad6ae2 + 06cc90c commit 5c1220a
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 11 deletions.
124 changes: 115 additions & 9 deletions src/agfj.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use crate::bb::{ACFJBlock, FeatureType, TikNibFeaturesBB};
#[cfg(feature = "inference")]
use crate::inference::InferenceJob;
use crate::networkx::{DGISNode, DiscovreNode, GeminiNode, NetworkxDiGraph, NodeType, TiknibNode};
use crate::networkx::{
DGISNode, DisasmNode, DiscovreNode, EsilNode, GeminiNode, NetworkxDiGraph, NodeType, TiknibNode,
};
use crate::utils::{average, check_or_create_dir, get_save_file_path};
use enum_as_inner::EnumAsInner;
use itertools::Itertools;
use ordered_float::OrderedFloat;
use petgraph::prelude::Graph;
Expand Down Expand Up @@ -47,6 +50,12 @@ pub struct AGFJFunc {
graph: Option<Graph<String, u32>>,
}

#[derive(EnumAsInner, Serialize, Deserialize, Debug)]
pub enum StringOrF64 {
String(Vec<Vec<String>>),
F64(Vec<Vec<f64>>),
}

impl AGFJFunc {
pub fn create_graph_struct_members(&mut self, min_blocks: &u16) {
self.create_bb_edge_list(min_blocks);
Expand Down Expand Up @@ -357,25 +366,69 @@ impl AGFJFunc {
if self.blocks.len() >= (*min_blocks).into() && self.blocks[0].offset != 1 {
let mut addr_idxs = Vec::<i64>::new();
let mut edge_list = Vec::<(u32, u32, u32)>::new();
let mut feature_vecs = Vec::<_>::new();

let mut feature_vecs: StringOrF64 = match feature_type {
FeatureType::Tiknib
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => StringOrF64::F64(Vec::new()),
FeatureType::Esil | FeatureType::Disasm => StringOrF64::String(Vec::new()),
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
return;
}
};

let min_offset: u64 = self.offset;
let max_offset: u64 = self.offset + self.size.unwrap_or(0);
for bb in &self.blocks {
bb.get_block_edges(&mut addr_idxs, &mut edge_list, max_offset, min_offset);
bb.generate_bb_feature_vec(&mut feature_vecs, feature_type, architecture);
}
match feature_type {
FeatureType::Tiknib
| FeatureType::Gemini
| FeatureType::DiscovRE
| FeatureType::DGIS => {
let feature_vecs = feature_vecs.as_f64_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.generate_bb_feature_vec(feature_vecs, feature_type, architecture);
}
}
FeatureType::Esil | FeatureType::Disasm => {
let feature_vecs = feature_vecs.as_string_mut().unwrap();
for bb in &self.blocks {
bb.get_block_edges(
&mut addr_idxs,
&mut edge_list,
max_offset,
min_offset,
);
bb.generate_bb_feature_strings(feature_vecs, feature_type, true);
}
}
FeatureType::ModelEmbedded | FeatureType::Encoded | FeatureType::Invalid => {
info!("Invalid Feature Type. Skipping..");
return;
}
};

if !edge_list.is_empty() {
let mut graph = Graph::<std::string::String, u32>::from_edges(&edge_list);

Self::str_to_hex_node_idxs(&mut graph, &mut addr_idxs);

let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((&graph, &feature_vecs, feature_type));

// Unpack the NodeTypes to the inner Types
if feature_type == FeatureType::Gemini {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<GeminiNode> =
NetworkxDiGraph::<GeminiNode>::from(networkx_graph);

Expand All @@ -386,6 +439,13 @@ impl AGFJFunc {
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::DGIS {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<DGISNode> =
NetworkxDiGraph::<DGISNode>::from(networkx_graph);
info!("Saving to JSON..");
Expand All @@ -395,6 +455,13 @@ impl AGFJFunc {
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::DiscovRE {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<DiscovreNode> =
NetworkxDiGraph::<DiscovreNode>::from(networkx_graph);
info!("Saving to JSON..");
Expand All @@ -404,6 +471,13 @@ impl AGFJFunc {
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Tiknib {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_f64().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<TiknibNode> =
NetworkxDiGraph::<TiknibNode>::from(networkx_graph);
info!("Saving to JSON..");
Expand All @@ -412,6 +486,38 @@ impl AGFJFunc {
&networkx_graph_inners,
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Disasm {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_string().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<DisasmNode> =
NetworkxDiGraph::<DisasmNode>::from(networkx_graph);
info!("Saving to JSON..");
serde_json::to_writer(
&File::create(fname_string).expect("Failed to create writer"),
&networkx_graph_inners,
)
.expect("Unable to write JSON");
} else if feature_type == FeatureType::Esil {
let networkx_graph: NetworkxDiGraph<NodeType> =
NetworkxDiGraph::<NodeType>::from((
&graph,
feature_vecs.as_string().unwrap(),
feature_type,
));

let networkx_graph_inners: NetworkxDiGraph<EsilNode> =
NetworkxDiGraph::<EsilNode>::from(networkx_graph);
info!("Saving to JSON..");
serde_json::to_writer(
&File::create(fname_string).expect("Failed to create writer"),
&networkx_graph_inners,
)
.expect("Unable to write JSON");
}
} else {
info!("Function {} has no edges. Skipping...", self.name)
Expand Down
23 changes: 22 additions & 1 deletion src/bb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ pub enum FeatureType {
DiscovRE,
DGIS,
Tiknib,
Disasm,
Esil,
ModelEmbedded,
Encoded,
Invalid,
Expand Down Expand Up @@ -217,7 +219,26 @@ impl ACFJBlock {
};

if feature_vector.is_empty() {
println!("Empty feature vector. This means that the feature type is wrong!")
error!("Empty feature vector. This means that the feature type is wrong!")
} else {
feature_vecs.push(feature_vector);
}
}

pub fn generate_bb_feature_strings(
&self,
feature_vecs: &mut Vec<Vec<String>>,
feature_type: FeatureType,
normalise: bool,
) {
let feature_vector: Vec<String> = match feature_type {
FeatureType::Disasm => self.get_disasm_bb(normalise),
FeatureType::Esil => self.get_esil_bb(normalise),
_ => unreachable!(),
};

if feature_vector.is_empty() {
error!("Empty feature vector. This means that the feature type is wrong!")
} else {
feature_vecs.push(feature_vector);
}
Expand Down
6 changes: 5 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ enum GenerateSubCommands {
output_path: PathBuf,

/// The type of features to generate per basic block (node)
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib"])
#[arg(short, long, value_name = "FEATURE_TYPE", value_parser = clap::builder::PossibleValuesParser::new(["gemini", "discovre", "dgis", "tiknib", "disasm", "esil"])
.map(|s| s.parse::<String>().unwrap()),)]
feature_type: Option<String>,

Expand Down Expand Up @@ -446,6 +446,8 @@ fn main() {
"dgis" => FeatureType::DGIS,
"encode" => FeatureType::Encoded,
"tiknib" => FeatureType::Tiknib,
"disasm" => FeatureType::Disasm,
"esil" => FeatureType::Esil,
#[cfg(feature = "inference")]
"embed" => FeatureType::ModelEmbedded,
_ => FeatureType::Invalid,
Expand All @@ -458,6 +460,8 @@ fn main() {
|| feature_vec_type == FeatureType::DiscovRE
|| feature_vec_type == FeatureType::DGIS
|| feature_vec_type == FeatureType::Tiknib
|| feature_vec_type == FeatureType::Disasm
|| feature_vec_type == FeatureType::Esil
{
info!(
"Creating graphs with {:?} feature vectors.",
Expand Down
118 changes: 118 additions & 0 deletions src/networkx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ pub enum NodeType {
Dgis(DGISNode),
Discovere(DiscovreNode),
Tiknib(TiknibNode),
Disasm(DisasmNode),
Esil(EsilNode),
}

#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize, EnumAsInner)]
Expand Down Expand Up @@ -60,6 +62,36 @@ impl CallGraphNodeFeatureType {
}
}

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DisasmNode {
pub id: i64,
pub features: Vec<String>,
}

impl From<(i64, &Vec<String>)> for DisasmNode {
fn from(src: (i64, &Vec<String>)) -> DisasmNode {
DisasmNode {
id: src.0,
features: src.1.to_owned(),
}
}
}

#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EsilNode {
pub id: i64,
pub features: Vec<String>,
}

impl From<(i64, &Vec<String>)> for EsilNode {
fn from(src: (i64, &Vec<String>)) -> EsilNode {
EsilNode {
id: src.0,
features: src.1.to_owned(),
}
}
}

#[derive(Copy, Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TiknibNode {
pub id: i64,
Expand Down Expand Up @@ -372,6 +404,53 @@ impl From<(Graph<String, u32>, &Vec<FinfoTiknib>)>
}
}

impl From<(&Graph<String, u32>, &Vec<Vec<String>>, FeatureType)> for NetworkxDiGraph<NodeType> {
fn from(
input: (&Graph<String, u32>, &Vec<Vec<String>>, FeatureType),
) -> NetworkxDiGraph<NodeType> {
let mut nodes: Vec<NodeType> = vec![];

for (i, node_vector) in input.1.iter().enumerate() {
let node: Option<NodeType> = match input.2 {
FeatureType::Disasm => {
Some(NodeType::Disasm(DisasmNode::from((i as i64, node_vector))))
}
FeatureType::Esil => Some(NodeType::Esil(EsilNode::from((i as i64, node_vector)))),
_ => None,
};
if let Some(node) = node {
nodes.push(node);
} else {
error!("Failed to create node for input!")
}
}

let mut adjacency: Vec<Vec<Adjacency>> = vec![];
let node_indices = input.0.node_indices();

for node in node_indices {
let mut node_adjacency_vec = vec![];
let node_edges = input.0.edges(node);
for edge in node_edges {
let edge_entry = Adjacency {
id: edge.target().index(),
weight: edge.weight().to_owned(),
};
node_adjacency_vec.push(edge_entry)
}
adjacency.push(node_adjacency_vec)
}

NetworkxDiGraph {
adjacency,
directed: "True".to_string(),
graph: vec![],
multigraph: false,
nodes,
}
}
}

impl From<(&Graph<String, u32>, &Vec<Vec<f64>>, FeatureType)> for NetworkxDiGraph<NodeType> {
fn from(
input: (&Graph<String, u32>, &Vec<Vec<f64>>, FeatureType),
Expand All @@ -392,6 +471,7 @@ impl From<(&Graph<String, u32>, &Vec<Vec<f64>>, FeatureType)> for NetworkxDiGrap
FeatureType::Tiknib => {
Some(NodeType::Tiknib(TiknibNode::from((i as i64, node_vector))))
}

_ => None,
};

Expand Down Expand Up @@ -504,3 +584,41 @@ impl From<NetworkxDiGraph<NodeType>> for NetworkxDiGraph<TiknibNode> {
}
}
}

impl From<NetworkxDiGraph<NodeType>> for NetworkxDiGraph<DisasmNode> {
fn from(src: NetworkxDiGraph<NodeType>) -> NetworkxDiGraph<DisasmNode> {
let inner_nodes_types: Vec<DisasmNode> = src
.clone()
.nodes
.into_iter()
.map(|el| el.as_disasm().unwrap().clone())
.collect();

NetworkxDiGraph {
adjacency: src.adjacency,
directed: src.directed,
graph: vec![],
multigraph: false,
nodes: inner_nodes_types,
}
}
}

impl From<NetworkxDiGraph<NodeType>> for NetworkxDiGraph<EsilNode> {
fn from(src: NetworkxDiGraph<NodeType>) -> NetworkxDiGraph<EsilNode> {
let inner_nodes_types: Vec<EsilNode> = src
.clone()
.nodes
.into_iter()
.map(|el| el.as_esil().unwrap().clone())
.collect();

NetworkxDiGraph {
adjacency: src.adjacency,
directed: src.directed,
graph: vec![],
multigraph: false,
nodes: inner_nodes_types,
}
}
}

0 comments on commit 5c1220a

Please sign in to comment.