Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP][TA] Analyze out-of-BT func with tainted parameters #47

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <memory>
#include <string>
#include <algorithm>

using namespace llvm;
using namespace crash_analyzer;
Expand All @@ -32,6 +33,7 @@ struct Node {
static unsigned NextID;
bool IsCrashNode;
bool IsContant;
unsigned Depth = 0;

// Call instruction that performed the call to function that is out
// of bt.
Expand Down Expand Up @@ -161,13 +163,13 @@ class TaintDataFlowGraph {
public:
// Map operand to the latest taint node.
// FIXME: This should be private.
std::map<TaintInfo, std::shared_ptr<Node>> lastTaintedNode;
std::map<const MachineOperand*, std::shared_ptr<Node>> lastTaintedNode;

void addEdge(std::shared_ptr<Node> src, std::shared_ptr<Node> dest,
EdgeType e_type = EdgeType::Assigment);
void addNode(std::shared_ptr<Node> n);

void updateLastTaintedNode(TaintInfo Op, std::shared_ptr<Node> N);
void updateLastTaintedNode(const MachineOperand* Op, std::shared_ptr<Node> N);
unsigned getBlameNodesSize() { return Nodes.size(); }

Node *getCrashNode() { return Nodes[0].get(); }
Expand Down
34 changes: 29 additions & 5 deletions llvm-15.0.3/llvm-crash-analyzer/include/Target/CATargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Host.h"

Expand All @@ -39,7 +41,8 @@ class CATargetInfo {
std::unordered_map<unsigned, RegAliasTuple> RegMap;

// Save PC value for each instruction.
std::unordered_map<const MachineInstr*, std::pair<uint64_t,uint64_t>> InstAddrs;
std::unordered_map<const MachineInstr *, std::pair<uint64_t, uint64_t>>
InstAddrs;

// Singleton class for the CATargetInfo instance.
template <typename T> class Singleton {
Expand All @@ -53,34 +56,47 @@ class CATargetInfo {

public:
CATargetInfo() {}
virtual ~CATargetInfo() { RegMap.clear(); InstAddrs.clear(); }
virtual ~CATargetInfo() {
RegMap.clear();
InstAddrs.clear();
}

// Get register index in the RegMap.
virtual Optional<unsigned> getID(std::string RegName) const = 0;

// Get register unsigned (MCRegister) from the RegMap.
virtual Optional<unsigned> getRegister(std::string RegName,
const MachineInstr *MI) const = 0;

virtual unsigned getRegSize(std::string RegName) const = 0;

// Get RegAliasTuple from the RegMap with selected Id.
RegAliasTuple &getRegMap(unsigned Id) const {
return const_cast<RegAliasTuple &>(RegMap.at(Id));
}

// Get RegAliasTuple from the RegMap with selected Id.
std::unordered_map<unsigned, RegAliasTuple> getWholeRegMap() const {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better return a reference to map instead of copying the entire map; no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely, I missed this, thanks!

return RegMap;
}

// Get InstAddr from the InstAddrs map for the MI.
Optional<uint64_t> getInstAddr(const MachineInstr* MI) {
Optional<uint64_t> getInstAddr(const MachineInstr *MI) {
if (InstAddrs.count(MI) == 0)
return None;
return InstAddrs[MI].first;
}

// Get InstAddr from the InstAddrs map for the MI.
Optional<uint64_t> getInstSize(const MachineInstr* MI) {
Optional<uint64_t> getInstSize(const MachineInstr *MI) {
if (InstAddrs.count(MI) == 0)
return None;
return InstAddrs[MI].second;
}

// Set InstAddr in the InstAddrs map for the MI.
void setInstAddr(const MachineInstr* MI, uint64_t InstAddr, uint64_t InstSize = 0) {
void setInstAddr(const MachineInstr *MI, uint64_t InstAddr,
uint64_t InstSize = 0) {
InstAddrs[MI] = {InstAddr, InstSize};
}

Expand All @@ -99,6 +115,9 @@ class CATargetInfo {
// Return true if the register is Base Pointer Register.
virtual bool isBPRegister(std::string RegName) const = 0;

// Return true if the register can be used to forward parameters.
virtual bool isParamFwdRegister(std::string RegName) const = 0;

// Set target Triple of the CATargetInfo instance.
static void initializeCATargetInfo(Triple *Triple) {
if (!TT)
Expand All @@ -118,6 +137,9 @@ class X86CATargetInfo : public CATargetInfo {

Optional<unsigned> getID(std::string RegName) const override;

Optional<unsigned> getRegister(std::string RegName,
const MachineInstr *MI) const override;

unsigned getRegSize(std::string RegName) const override;

bool isRetValRegister(std::string RegName) const override;
Expand All @@ -130,6 +152,8 @@ class X86CATargetInfo : public CATargetInfo {

bool isBPRegister(std::string RegName) const override;

bool isParamFwdRegister(std::string RegName) const override;

// Define static instance getter for each target.
static X86CATargetInfo *instance() {
return CATargetInfo::Singleton<X86CATargetInfo>::get();
Expand Down
22 changes: 15 additions & 7 deletions llvm-15.0.3/llvm-crash-analyzer/lib/Analysis/TaintAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,10 @@ void crash_analyzer::TaintAnalysis::insertTaint(
StartCrashOrder = 0;
Node *sNode = new Node(MF->getCrashOrder(), &MI, DestTi, false);
std::shared_ptr<Node> startTaintNode(sNode);
// Set new node depth (crashNode has zero depth).
startTaintNode->Depth = crashNode->Depth + 1;
TaintDFG.addEdge(crashNode, startTaintNode, EdgeType::Dereference);
TaintDFG.updateLastTaintedNode(DestTi, startTaintNode);
TaintDFG.updateLastTaintedNode(DestTi.Op, startTaintNode);
}

printTaintList(TL);
Expand Down Expand Up @@ -708,8 +710,10 @@ void crash_analyzer::TaintAnalysis::addNewTaint(
if (addToTaintList(Ti, TL)) {
Node *sNode = new Node(MF->getCrashOrder(), &MI, Ti, false);
std::shared_ptr<Node> startTaintNode(sNode);
// Set new node depth (crashNode has zero depth).
startTaintNode->Depth = crashNode->Depth + 1;
TaintDFG.addEdge(crashNode, startTaintNode, EdgeType::Dereference);
TaintDFG.updateLastTaintedNode(Ti, startTaintNode);
TaintDFG.updateLastTaintedNode(Ti.Op, startTaintNode);
}
}

Expand Down Expand Up @@ -941,10 +945,12 @@ bool llvm::crash_analyzer::TaintAnalysis::propagateTaint(
if (CallMI)
constantNode->CallMI = CallMI;
std::shared_ptr<Node> constNode(constantNode);
auto &LastTaintedNodeForTheOp = TaintDFG.lastTaintedNode[Taint];
auto &LastTaintedNodeForTheOp = TaintDFG.lastTaintedNode[Taint.Op];
TaintDFG.addEdge(LastTaintedNodeForTheOp, constNode, EdgeType::Dereference);
// FIXME: The LastTaintedNode won't be used any more, no need for this line?
TaintDFG.updateLastTaintedNode(SrcTi, constNode);
TaintDFG.updateLastTaintedNode(SrcTi.Op, constNode);
// Set new node depth.
constNode->Depth = LastTaintedNodeForTheOp->Depth + 1;

// We have reached a terminating condition where
// dest is tainted and src is a constant operand.
Expand All @@ -959,9 +965,9 @@ bool llvm::crash_analyzer::TaintAnalysis::propagateTaint(
// TODO: Check if this should be a deref edge:
// if we propagate taint from a mem addr (e.g. rbx + 10)
// to its base reg (e.g. rbx).
assert(TaintDFG.lastTaintedNode.count(Taint) &&
assert(TaintDFG.lastTaintedNode.count(Taint.Op) &&
"Taint Op must be reached already");
auto &LastTaintedNodeForTheOp = TaintDFG.lastTaintedNode[Taint];
auto &LastTaintedNodeForTheOp = TaintDFG.lastTaintedNode[Taint.Op];

if (LastTaintedNodeForTheOp->TaintOp.Op->isReg() &&
LastTaintedNodeForTheOp->TaintOp.Offset &&
Expand All @@ -972,7 +978,9 @@ bool llvm::crash_analyzer::TaintAnalysis::propagateTaint(
EdgeType::Dereference);
else
TaintDFG.addEdge(LastTaintedNodeForTheOp, newTaintNode);
TaintDFG.updateLastTaintedNode(SrcTi, newTaintNode);
TaintDFG.updateLastTaintedNode(SrcTi.Op, newTaintNode);
// Set new node depth.
newTaintNode->Depth = LastTaintedNodeForTheOp->Depth + 1;

if (!BaseTaintFlag)
removeFromTaintList(Taint, TL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ void TaintDataFlowGraph::addEdge(std::shared_ptr<Node> src,
adjacencies[src.get()].push_back({dest.get(), e_type});
}

void TaintDataFlowGraph::updateLastTaintedNode(TaintInfo Op,
void TaintDataFlowGraph::updateLastTaintedNode(const MachineOperand *Op,
std::shared_ptr<Node> N) {
lastTaintedNode[Op] = N;
}
Expand Down Expand Up @@ -134,13 +134,19 @@ void TaintDataFlowGraph::findBlameFunction(Node *v) {
if (a->MI->getParent() == adjNode->MI->getParent() &&
!a->CallMI && !adjNode->CallMI) {
if (MDT->dominates(adjNode->MI, a->MI)) {
// Do not erase potential blame nodes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why didn't you terminate in the begining of the loop (line 134) after?
the terminating condition is repeated all over this loop which makes it hard to understand

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, I will move this condition to the beginning of the iteration.

if (a->TaintOp.DerefLevel == 0 && a->IsContant)
break;
BlameNodes.erase(BlameNodes.begin() + i);
break;
}
} else if (a->CallMI && !adjNode->CallMI) {
MDT = dominators[a->CallMI->getMF()];
if (a->CallMI->getParent() == adjNode->MI->getParent()) {
if (MDT->dominates(adjNode->MI, a->CallMI)) {
// Do not erase potential blame nodes.
if (a->TaintOp.DerefLevel == 0 && a->IsContant)
break;
BlameNodes.erase(BlameNodes.begin() + i);
break;
}
Expand All @@ -149,6 +155,9 @@ void TaintDataFlowGraph::findBlameFunction(Node *v) {
MDT = dominators[adjNode->CallMI->getMF()];
if (a->MI->getParent() == adjNode->CallMI->getParent()) {
if (MDT->dominates(adjNode->CallMI, a->MI)) {
// Do not erase potential blame nodes.
if (a->TaintOp.DerefLevel == 0 && a->IsContant)
break;
BlameNodes.erase(BlameNodes.begin() + i);
break;
}
Expand All @@ -159,6 +168,9 @@ void TaintDataFlowGraph::findBlameFunction(Node *v) {
a->CallMI->getParent() == adjNode->CallMI->getParent()) {
MDT = dominators[adjNode->CallMI->getMF()];
if (MDT->dominates(adjNode->CallMI, a->CallMI)) {
// Do not erase potential blame nodes.
if (a->TaintOp.DerefLevel == 0 && a->IsContant)
break;
BlameNodes.erase(BlameNodes.begin() + i);
break;
}
Expand Down Expand Up @@ -196,12 +208,26 @@ bool TaintDataFlowGraph::printBlameFunction(

StringRef BlameFn = "";
const MachineFunction *MF = nullptr;
auto &BlameNodes = blameNodes[MaxLevel];
auto &SortBlameNodes = blameNodes[MaxLevel];
llvm::SmallVector<StringRef, 8> BlameFns;
llvm::SmallVector<MachineFunction *, 8> MFs;

unsigned BlameLine = 0;
unsigned BlameColumn = 0;
// Sort blame Nodes by depth - descending.
std::sort(SortBlameNodes.begin(), SortBlameNodes.end(),
[](Node *n1, Node *n2) { return n1->Depth > n2->Depth; });
unsigned DepthLevel = 0;
llvm::SmallVector<Node *, 8> BlameNodes;
// Filter leaf nodes - consider zero DerefLevel and max depth.
for (auto &n : SortBlameNodes) {
if (n->TaintOp.DerefLevel != 0)
continue;
if (n->Depth < DepthLevel)
break;
DepthLevel = n->Depth;
BlameNodes.push_back(n);
}

for (auto &a : BlameNodes) {
// Only consider Node if it's DerefLevel is zero.
Expand Down
36 changes: 36 additions & 0 deletions llvm-15.0.3/llvm-crash-analyzer/lib/Target/CATargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,39 @@ bool X86CATargetInfo::isBPRegister(std::string RegName) const {
return true;
return false;
}

bool X86CATargetInfo::isParamFwdRegister(std::string RegName) const {
if (RegName == "rdi" || RegName == "edi" || RegName == "di" ||
RegName == "dil")
return true;
if (RegName == "rsi" || RegName == "esi" || RegName == "si" ||
RegName == "sil")
return true;
if (RegName == "rdx" || RegName == "edx" || RegName == "dx" ||
RegName == "dl")
return true;
if (RegName == "rcx" || RegName == "ecx" || RegName == "cx" ||
RegName == "cl")
return true;
if (RegName == "r8" || RegName == "r8d" || RegName == "r8w" ||
RegName == "r8b")
return true;
if (RegName == "r9" || RegName == "r9d" || RegName == "r9w" ||
RegName == "r9b")
return true;
return false;
}

Optional<unsigned> X86CATargetInfo::getRegister(std::string RegName,
const MachineInstr *MI) const {
auto TRI = MI->getMF()->getSubtarget().getRegisterInfo();
if (!TRI)
return None;
unsigned N = 1000;
for (unsigned I = 0; I < N; ++I) {
std::string CurName = TRI->getRegAsmName(I).lower();
if (CurName == RegName)
return I;
}
return None;
}