最终目标:尝试使用LLVM生成与CFG相关的信息(例如拓扑排序)。
状态:我对LLVM还是很陌生,并且有点迷失-可以帮助我着手实现最终目标的任何信息或博客都很棒!
我的问题:在阅读Eli的代码和blog post之后,我首先获取了.ll文件并运行了代码,但没有结果。
这是.ll文件示例:
; ModuleID = 'CWE15_External_Control_of_System_or_Configuration_Setting__w32_83a.cpp'
source_filename = "CWE15_External_Control_of_System_or_Configuration_Setting__w32_83a.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad" = type { i8* }
%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B" = type { i8* }
; Function Attrs: noinline optnone uwtable
define void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_833badEv() #0 {
%1 = alloca i8*, align 8
%2 = alloca [100 x i8], align 16
%3 = alloca %"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad", align 8
%4 = bitcast [100 x i8]* %2 to i8*
call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 100, i32 16, i1 false)
%5 = getelementptr inbounds [100 x i8], [100 x i8]* %2, i32 0, i32 0
store i8* %5, i8** %1, align 8
%6 = load i8*, i8** %1, align 8
call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"* %3, i8* %6)
call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"* %3) #4
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"*, i8*) unnamed_addr #2
; Function Attrs: nounwind
declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"*) unnamed_addr #3
; Function Attrs: noinline optnone uwtable
define void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_834goodEv() #0 {
call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv()
ret void
}
; Function Attrs: noinline optnone uwtable
define internal void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv() #0 {
%1 = alloca i8*, align 8
%2 = alloca [100 x i8], align 16
%3 = alloca %"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B", align 8
%4 = bitcast [100 x i8]* %2 to i8*
call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 100, i32 16, i1 false)
%5 = getelementptr inbounds [100 x i8], [100 x i8]* %2, i32 0, i32 0
store i8* %5, i8** %1, align 8
%6 = load i8*, i8** %1, align 8
call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"* %3, i8* %6)
call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"* %3) #4
ret void
}
declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"*, i8*) unnamed_addr #2
; Function Attrs: nounwind
declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"*) unnamed_addr #3
attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (tags/RELEASE_500/final 375507)"}
这是生成排序信息的.cpp文件:
//------------------------------------------------------------------------------
// bb_toposort_sccs LLVM sample. Demonstrates:
//
// * How to implement DFS & topological sort over the control-flow graph (CFG)
// of a function.
// * How to use po_iterator for post-order iteration over basic blocks.
// * How to use scc_iterator for post-order iteration over strongly-connected
// components in the graph of basic blocks.
//
// Eli Bendersky (eliben@gmail.com)
// This code is in the public domain
//------------------------------------------------------------------------------
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Pass.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>
using namespace llvm;
// Runs a topological sort on the basic blocks of the given function. Uses
// the simple recursive DFS from "Introduction to algorithms", with 3-coloring
// of vertices. The coloring enables detecting cycles in the graph with a simple
// test.
class TopoSorter {
public:
void runToposort(const Function &F) {
outs() << "Topological sort of " << F.getName() << ":\n";
// Initialize the color map by marking all the vertices white.
for (Function::const_iterator I = F.begin(), IE = F.end(); I != IE; ++I) {
ColorMap[&*I] = TopoSorter::WHITE;
}
// The BB graph has a single entry vertex from which the other BBs should
// be discoverable - the function entry block.
bool success = recursiveDFSToposort(&F.getEntryBlock());
if (success) {
// Now we have all the BBs inside SortedBBs in reverse topological order.
for (BBVector::const_reverse_iterator RI = SortedBBs.rbegin(),
RE = SortedBBs.rend();
RI != RE; ++RI) {
outs() << " " << (*RI)->getName() << "\n";
}
} else {
outs() << " Sorting failed\n";
}
}
private:
enum Color { WHITE, GREY, BLACK };
// Color marks per vertex (BB).
typedef DenseMap<const BasicBlock *, Color> BBColorMap;
// Collects vertices (BBs) in "finish" order. The first finished vertex is
// first, and so on.
typedef SmallVector<const BasicBlock *, 32> BBVector;
BBColorMap ColorMap;
BBVector SortedBBs;
// Helper function to recursively run topological sort from a given BB.
// Returns true if the sort succeeded and false otherwise; topological sort
// may fail if, for example, the graph is not a DAG (detected a cycle).
bool recursiveDFSToposort(const BasicBlock *BB) {
ColorMap[BB] = TopoSorter::GREY;
// For demonstration, using the lowest-level APIs here. A BB's successors
// are determined by looking at its terminator instruction.
const TerminatorInst *TInst = BB->getTerminator();
for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; ++I) {
BasicBlock *Succ = TInst->getSuccessor(I);
Color SuccColor = ColorMap[Succ];
if (SuccColor == TopoSorter::WHITE) {
if (!recursiveDFSToposort(Succ))
return false;
} else if (SuccColor == TopoSorter::GREY) {
// This detects a cycle because grey vertices are all ancestors of the
// currently explored vertex (in other words, they're "on the stack").
outs() << " Detected cycle: edge from " << BB->getName() << " to "
<< Succ->getName() << "\n";
return false;
}
}
// This BB is finished (fully explored), so we can add it to the vector.
ColorMap[BB] = TopoSorter::BLACK;
SortedBBs.push_back(BB);
return true;
}
};
class AnalyzeBBGraph : public FunctionPass {
public:
AnalyzeBBGraph(const std::string &AnalysisKind)
: FunctionPass(ID), AnalysisKind(AnalysisKind) {}
virtual bool runOnFunction(Function &F) {
if (AnalysisKind == "-topo") {
TopoSorter TS;
TS.runToposort(F);
} else if (AnalysisKind == "-po") {
// Use LLVM's post-order iterator to produce a reverse topological sort.
// Note that this doesn't detect cycles so if the graph is not a DAG, the
// result is not a true topological sort.
outs() << "Basic blocks of " << F.getName() << " in post-order:\n";
for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
IE = po_end(&F.getEntryBlock());
I != IE; ++I) {
outs() << " " << (*I)->getName() << "\n";
}
} else if (AnalysisKind == "-scc") {
// Use LLVM's Strongly Connected Components (SCCs) iterator to produce
// a reverse topological sort of SCCs.
outs() << "SCCs for " << F.getName() << " in post-order:\n";
for (scc_iterator<Function *> I = scc_begin(&F), IE = scc_end(&F);
I != IE; ++I) {
// Obtain the vector of BBs in this SCC and print it out.
const std::vector<BasicBlock *> &SCCBBs = *I;
outs() << " SCC: ";
for (std::vector<BasicBlock *>::const_iterator BBI = SCCBBs.begin(),
BBIE = SCCBBs.end();
BBI != BBIE; ++BBI) {
outs() << (*BBI)->getName() << " ";
}
outs() << "\n";
}
} else {
outs() << "Unknown analysis kind: " << AnalysisKind << "\n";
}
return false;
}
// The address of this member is used to uniquely identify the class. This is
// used by LLVM's own RTTI mechanism.
static char ID;
private:
std::string AnalysisKind;
};
char AnalyzeBBGraph::ID = 0;
int main(int argc, char **argv) {
if (argc < 3) {
// Using very basic command-line argument parsing here...
errs() << "Usage: " << argv[0] << " -[topo|po|scc] <IR file>\n";
return 1;
}
// Parse the input LLVM IR file into a module.
SMDiagnostic Err;
LLVMContext Context;
std::unique_ptr<Module> Mod(parseIRFile(argv[2], Err, Context));
if (!Mod) {
Err.print(argv[0], errs());
return 1;
}
// Create a pass manager and fill it with the passes we want to run.
legacy::PassManager PM;
PM.add(new AnalyzeBBGraph(std::string(argv[1])));
PM.run(*Mod);
return 0;
}
这是我尝试获取拓扑排序时的结果:
Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_833badEv:
Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_834goodEv:
Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv:
我尝试了其他方法,但发现所有的“ getName()”打印内容都不存在, 我的代码有问题吗?llvm IR有什么问题吗?
for(BasicBlock &BB : F){
outs() << "BB:" <<BB.getName() <<" has" <<BB.size() <<"instructions.\n";
for(Instruction &I : BB){
outs() << "details: "<<I <<"name: "<<I.getName() <<"\n";
for(Use &U : I.operands()){
Value *v = U.get();
outs() <<"value:"<< v<<"name:" << v->getName() <<"\n";
}
}
}
任何想法都值得赞赏!
答案 0 :(得分:0)
您可能需要在获取IR(.ll文件)后运行instruction namer,以便将名称分配给基本块和寄存器,因为目前,除了基本名称和寄存器外,它们没有其他名称。他们的数字分配(例如%1
等)。您可以使用以下命令运行它:
opt -instnamer foo.bc -o foo-named.bc
这3个函数定义中只有一个基本块,因此不会有任何有趣的事情,但这是一个好的开始。
在继续操作之前,您可能想解决的另一点是优化级别。您所有的函数都带有optnone
注释,这将禁止其他LLVM传递进行任何类型的优化。一种获取未优化IR的更灵活的方法,但可以进行进一步优化的方法是使用:
clang -emit-llvm -O1 -Xclang -disable-llvm-passes foo.c