为什么无法对控制流图(CFG)进行拓扑排序?

时间:2019-12-01 05:24:04

标签: c++ llvm llvm-c++-api control-flow-graph

最终目标:尝试使用LLVM生成与CFG相关的信息(例如拓扑排序)。

状态:我对LLVM还是很陌生,并且有点迷失-可以帮助我着手实现最终目标的任何信息或博客都很棒!

我的问题:在阅读Eli的代码和blog post之后,我首先获取了.ll文件并运行了代码,但没有结果。

这是.ll文件示例:

; ModuleID = 'CWE15_External_Control_of_System_or_Configuration_Setting__w32_83a.cpp'
source_filename = "CWE15_External_Control_of_System_or_Configuration_Setting__w32_83a.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad" = type { i8* }
%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B" = type { i8* }

; Function Attrs: noinline optnone uwtable
define void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_833badEv() #0 {
  %1 = alloca i8*, align 8
  %2 = alloca [100 x i8], align 16
  %3 = alloca %"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad", align 8
  %4 = bitcast [100 x i8]* %2 to i8*
  call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 100, i32 16, i1 false)
  %5 = getelementptr inbounds [100 x i8], [100 x i8]* %2, i32 0, i32 0
  store i8* %5, i8** %1, align 8
  %6 = load i8*, i8** %1, align 8
  call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"* %3, i8* %6)
  call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"* %3) #4
  ret void
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1

declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"*, i8*) unnamed_addr #2

; Function Attrs: nounwind
declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8369CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_badD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_bad"*) unnamed_addr #3

; Function Attrs: noinline optnone uwtable
define void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_834goodEv() #0 {
  call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv()
  ret void
}

; Function Attrs: noinline optnone uwtable
define internal void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv() #0 {
  %1 = alloca i8*, align 8
  %2 = alloca [100 x i8], align 16
  %3 = alloca %"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B", align 8
  %4 = bitcast [100 x i8]* %2 to i8*
  call void @llvm.memset.p0i8.i64(i8* %4, i8 0, i64 100, i32 16, i1 false)
  %5 = getelementptr inbounds [100 x i8], [100 x i8]* %2, i32 0, i32 0
  store i8* %5, i8** %1, align 8
  %6 = load i8*, i8** %1, align 8
  call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"* %3, i8* %6)
  call void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"* %3) #4
  ret void
}

declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BC1EPc(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"*, i8*) unnamed_addr #2

; Function Attrs: nounwind
declare void @_ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_8373CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2BD1Ev(%"class.CWE15_External_Control_of_System_or_Configuration_Setting__w32_83::CWE15_External_Control_of_System_or_Configuration_Setting__w32_83_goodG2B"*) unnamed_addr #3

attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 5.0.0 (tags/RELEASE_500/final 375507)"}

这是生成排序信息的.cpp文件:

//------------------------------------------------------------------------------
// bb_toposort_sccs LLVM sample. Demonstrates:
//
// * How to implement DFS & topological sort over the control-flow graph (CFG)
//   of a function.
// * How to use po_iterator for post-order iteration over basic blocks.
// * How to use scc_iterator for post-order iteration over strongly-connected
//   components in the graph of basic blocks.
//
// Eli Bendersky (eliben@gmail.com)
// This code is in the public domain
//------------------------------------------------------------------------------
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Pass.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>

using namespace llvm;

// Runs a topological sort on the basic blocks of the given function. Uses
// the simple recursive DFS from "Introduction to algorithms", with 3-coloring
// of vertices. The coloring enables detecting cycles in the graph with a simple
// test.
class TopoSorter {
public:
  void runToposort(const Function &F) {
    outs() << "Topological sort of " << F.getName() << ":\n";
    // Initialize the color map by marking all the vertices white.
    for (Function::const_iterator I = F.begin(), IE = F.end(); I != IE; ++I) {
      ColorMap[&*I] = TopoSorter::WHITE;
    }

    // The BB graph has a single entry vertex from which the other BBs should
    // be discoverable - the function entry block.
    bool success = recursiveDFSToposort(&F.getEntryBlock());
    if (success) {
      // Now we have all the BBs inside SortedBBs in reverse topological order.
      for (BBVector::const_reverse_iterator RI = SortedBBs.rbegin(),
                                            RE = SortedBBs.rend();
           RI != RE; ++RI) {
        outs() << "  " << (*RI)->getName() << "\n";
      }
    } else {
      outs() << "  Sorting failed\n";
    }
  }

private:
  enum Color { WHITE, GREY, BLACK };
  // Color marks per vertex (BB).
  typedef DenseMap<const BasicBlock *, Color> BBColorMap;
  // Collects vertices (BBs) in "finish" order. The first finished vertex is
  // first, and so on.
  typedef SmallVector<const BasicBlock *, 32> BBVector;
  BBColorMap ColorMap;
  BBVector SortedBBs;

  // Helper function to recursively run topological sort from a given BB.
  // Returns true if the sort succeeded and false otherwise; topological sort
  // may fail if, for example, the graph is not a DAG (detected a cycle).
  bool recursiveDFSToposort(const BasicBlock *BB) {
    ColorMap[BB] = TopoSorter::GREY;
    // For demonstration, using the lowest-level APIs here. A BB's successors
    // are determined by looking at its terminator instruction.
    const TerminatorInst *TInst = BB->getTerminator();
    for (unsigned I = 0, NSucc = TInst->getNumSuccessors(); I < NSucc; ++I) {
      BasicBlock *Succ = TInst->getSuccessor(I);
      Color SuccColor = ColorMap[Succ];
      if (SuccColor == TopoSorter::WHITE) {
        if (!recursiveDFSToposort(Succ))
          return false;
      } else if (SuccColor == TopoSorter::GREY) {
        // This detects a cycle because grey vertices are all ancestors of the
        // currently explored vertex (in other words, they're "on the stack").
        outs() << "  Detected cycle: edge from " << BB->getName() << " to "
               << Succ->getName() << "\n";
        return false;
      }
    }
    // This BB is finished (fully explored), so we can add it to the vector.
    ColorMap[BB] = TopoSorter::BLACK;
    SortedBBs.push_back(BB);
    return true;
  }
};

class AnalyzeBBGraph : public FunctionPass {
public:
  AnalyzeBBGraph(const std::string &AnalysisKind)
      : FunctionPass(ID), AnalysisKind(AnalysisKind) {}

  virtual bool runOnFunction(Function &F) {
    if (AnalysisKind == "-topo") {
      TopoSorter TS;
      TS.runToposort(F);
    } else if (AnalysisKind == "-po") {
      // Use LLVM's post-order iterator to produce a reverse topological sort.
      // Note that this doesn't detect cycles so if the graph is not a DAG, the
      // result is not a true topological sort.
      outs() << "Basic blocks of " << F.getName() << " in post-order:\n";
      for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
                                     IE = po_end(&F.getEntryBlock());
           I != IE; ++I) {
        outs() << "  " << (*I)->getName() << "\n";
      }
    } else if (AnalysisKind == "-scc") {
      // Use LLVM's Strongly Connected Components (SCCs) iterator to produce
      // a reverse topological sort of SCCs.
      outs() << "SCCs for " << F.getName() << " in post-order:\n";
      for (scc_iterator<Function *> I = scc_begin(&F), IE = scc_end(&F);
           I != IE; ++I) {
        // Obtain the vector of BBs in this SCC and print it out.
        const std::vector<BasicBlock *> &SCCBBs = *I;
        outs() << "  SCC: ";
        for (std::vector<BasicBlock *>::const_iterator BBI = SCCBBs.begin(),
                                                       BBIE = SCCBBs.end();
             BBI != BBIE; ++BBI) {
          outs() << (*BBI)->getName() << "  ";
        }
        outs() << "\n";
      }
    } else {
      outs() << "Unknown analysis kind: " << AnalysisKind << "\n";
    }
    return false;
  }

  // The address of this member is used to uniquely identify the class. This is
  // used by LLVM's own RTTI mechanism.
  static char ID;

private:
  std::string AnalysisKind;
};

char AnalyzeBBGraph::ID = 0;

int main(int argc, char **argv) {
  if (argc < 3) {
    // Using very basic command-line argument parsing here...
    errs() << "Usage: " << argv[0] << " -[topo|po|scc] <IR file>\n";
    return 1;
  }

  // Parse the input LLVM IR file into a module.
  SMDiagnostic Err;
  LLVMContext Context;
  std::unique_ptr<Module> Mod(parseIRFile(argv[2], Err, Context));
  if (!Mod) {
    Err.print(argv[0], errs());
    return 1;
  }

  // Create a pass manager and fill it with the passes we want to run.
  legacy::PassManager PM;
  PM.add(new AnalyzeBBGraph(std::string(argv[1])));
  PM.run(*Mod);

  return 0;
}

这是我尝试获取拓扑排序时的结果:

Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_833badEv:

Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_834goodEv:

Topological sort of _ZN65CWE15_External_Control_of_System_or_Configuration_Setting__w32_83L7goodG2BEv:

我尝试了其他方法,但发现所有的“ getName()”打印内容都不存在, 我的代码有问题吗?llvm IR有什么问题吗?

for(BasicBlock &BB : F){
      outs() << "BB:" <<BB.getName() <<" has" <<BB.size() <<"instructions.\n"; 
      for(Instruction &I : BB){
        outs() << "details: "<<I <<"name: "<<I.getName() <<"\n";
        for(Use &U : I.operands()){
           Value *v = U.get();
           outs() <<"value:"<< v<<"name:" << v->getName() <<"\n";
        }
      }

    }

任何想法都值得赞赏!

1 个答案:

答案 0 :(得分:0)

您可能需要在获取IR(.ll文件)后运行instruction namer,以便将名称分配给基本块和寄存器,因为目前,除了基本名称和寄存器外,它们没有其他名称。他们的数字分配(例如%1等)。您可以使用以下命令运行它:

opt -instnamer foo.bc -o foo-named.bc

这3个函数定义中只有一个基本块,因此不会有任何有趣的事情,但这是一个好的开始。

在继续操作之前,您可能想解决的另一点是优化级别。您所有的函数都带有optnone注释,这将禁止其他LLVM传递进行任何类型的优化。一种获取未优化IR的更灵活的方法,但可以进行进一步优化的方法是使用:

clang -emit-llvm -O1 -Xclang -disable-llvm-passes foo.c