如何将C函数映射到LLVM IR?

时间:2017-07-17 10:51:47

标签: c parsing llvm llvm-ir

我有一个要求,我有一个c文件,我正在为此生成LLVM IR。从生成的每个指令的LLVM IR我计算执行需要多少个周期,现在我的问题是我如何追溯到c代码并显示特定的c代码块(比如函数)计算出的数量循环(我实际上是根据生成的LLVM IR代码计算的。)

我的c代码如下:

int arithmeticOperations(int x, int y)
{
    int aa, ab, ac, ad;
    if(x>10)
    {
        aa = x+y;
        ab = x-y;
        for(x = 1; x <= aa; ++x)
        {
            y += x;
        }
    }
    else
    {
        ac = x*y;
        ad = x/y;       
    }
    return aa * ab * ac * ad;
}

void arithmeticOperationsPart2(int x, int y)
{
    int aa, ab, ac, ad;
    if(x>10)
    {
        aa = x+y;
        ab = x-y;
    }
    else
    {
        ac = x*y;
        ad = x/y;       
    }
}

int main()
{
    arithmeticOperations(35, 7);
    arithmeticOperationsPart2(35, 7);
}

我正在使用命令创建LLVM IR:

clang -Os -S -emit-llvm addition.c

此输出addition.ll文件如下:

; ModuleID = 'addition.c'
source_filename = "addition.c"
target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc18.0.0"

; Function Attrs: norecurse nounwind optsize readnone uwtable
define i32 @arithmeticOperations(i32, i32) local_unnamed_addr #0 {
  %3 = icmp sgt i32 %0, 10
  br i1 %3, label %4, label %7

; <label>:4:                                      ; preds = %2
  %5 = add nsw i32 %1, %0
  %6 = sub nsw i32 %0, %1
  br label %10

; <label>:7:                                      ; preds = %2
  %8 = mul nsw i32 %1, %0
  %9 = sdiv i32 %0, %1
  br label %10

; <label>:10:                                     ; preds = %4, %7
  %11 = phi i32 [ undef, %7 ], [ %5, %4 ]
  %12 = phi i32 [ undef, %7 ], [ %6, %4 ]
  %13 = phi i32 [ %8, %7 ], [ undef, %4 ]
  %14 = phi i32 [ %9, %7 ], [ undef, %4 ]
  %15 = mul nsw i32 %12, %11
  %16 = mul nsw i32 %15, %13
  %17 = mul nsw i32 %16, %14
  ret i32 %17
}

; Function Attrs: norecurse nounwind optsize readnone uwtable
define void @arithmeticOperationsPart2(i32, i32) local_unnamed_addr #0 {
  ret void
}

; Function Attrs: norecurse nounwind optsize readnone uwtable
define i32 @main() local_unnamed_addr #0 {
  ret i32 0
}

attributes #0 = { norecurse nounwind optsize readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"clang version 5.0.0 (trunk 302984) (llvm/trunk 302983)"}

现在我想过滤一下LLVM代码对应于它生成的c源代码。(具体说一下函数)

例如(目前我想过滤c函数arithmeticOperations):

  %3 = icmp sgt i32 %0, 10
  br i1 %3, label %4, label %7

; <label>:4:                                      ; preds = %2
  %5 = add nsw i32 %1, %0
  %6 = sub nsw i32 %0, %1
  br label %10

; <label>:7:                                      ; preds = %2
  %8 = mul nsw i32 %1, %0
  %9 = sdiv i32 %0, %1
  br label %10

; <label>:10:                                     ; preds = %4, %7
  %11 = phi i32 [ undef, %7 ], [ %5, %4 ]
  %12 = phi i32 [ undef, %7 ], [ %6, %4 ]
  %13 = phi i32 [ %8, %7 ], [ undef, %4 ]
  %14 = phi i32 [ %9, %7 ], [ undef, %4 ]
  %15 = mul nsw i32 %12, %11
  %16 = mul nsw i32 %15, %13
  %17 = mul nsw i32 %16, %14
  ret i32 %17

对应于c代码的以下部分:

int aa, ab, ac, ad;
    if(x>10)
    {
        aa = x+y;
        ab = x-y;
        for(x = 1; x <= aa; ++x)
        {
            y += x;
        }
    }
    else
    {
        ac = x*y;
        ad = x/y;       
    }
    return aa * ab * ac * ad;

1 个答案:

答案 0 :(得分:1)

您可以通过添加-g标志告诉clang发出调试信息:

clang -Os -S -emit-llvm -g addition.c

然后你会发现很多关于哪个指令对应ll文件中哪个原始行的信息。

例如,arithmeticOperations函数的开头转换如下,以!dgb !<number>结尾的行引用调试信息条目:

; Function Attrs: nounwind optsize readnone uwtable
define i32 @arithmeticOperations(i32 %x, i32 %y) local_unnamed_addr #0 !dbg !7 {
entry:
  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !12, metadata !18), !dbg !19
  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !13, metadata !18), !dbg !20
  %cmp = icmp sgt i32 %x, 10, !dbg !21
  br i1 %cmp, label %if.then, label %if.else, !dbg !23

在文件的末尾会有许多“DILocation”条目告诉你相应的源代码在哪里:

...
!19 = !DILocation(line: 1, column: 37, scope: !7)
!20 = !DILocation(line: 1, column: 30, scope: !7)
!21 = !DILocation(line: 4, column: 9, scope: !22)
!22 = distinct !DILexicalBlock(scope: !7, file: !1, line: 4, column: 8)
!23 = !DILocation(line: 4, column: 8, scope: !7)

因此,如果您对此行的来源感兴趣:

%cmp = icmp sgt i32 %x, 10, !dbg !21

您必须查找调试条目!21:

!21 = !DILocation(line: 4, column: 9, scope: !22)

事实上,第9行是if的地方:

9:    if(x>10)

Clangs调试信息非常精确甚至指向'&gt;'操作