LLVM内在函数

时间:2014-12-16 18:55:15

标签: llvm

使用LLVM构建项目时,某些函数调用将被内部函数替换。替换是由前端(例如clang)还是LLVM后端完成的?

通过互联网进行的讨论表明,内在功能替换与优化选项有关。那么这是否意味着如果没有优化选项,那么就不会发生内在替代?或者实际上,有一些默认的内部函数替换无法禁用?

如果有任何方法可以禁用所有内部函数,我应该怎么做?

1 个答案:

答案 0 :(得分:13)

这取决于。用代码编写的内在函数直接通过前端发出。在IR级别的优化期间(如前端和后端执行此优化),将像llvm.memset这样的内在函数引入代码中。

这是一个(非常愚蠢的)例子:

int main(int argc, char** argv)
{
        int a[8];

        for (int i = 0; i != 8; ++i)
                a[i] = 0;

        for (int i = 7; i >= 0; --i)
                a[i] = a[i+1] + argc;

        return a[0];
}

使用clang 3.5(clang -S -emit-llvm)进行编译,您将获得以下IR而无需任何内在函数:

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i8**, align 8
  %a = alloca [8 x i32], align 16
  %i = alloca i32, align 4
  %i1 = alloca i32, align 4
  store i32 0, i32* %1
  store i32 %argc, i32* %2, align 4
  store i8** %argv, i8*** %3, align 8
  store i32 0, i32* %i, align 4
  br label %4

; <label>:4                                       ; preds = %11, %0
  %5 = load i32* %i, align 4
  %6 = icmp ne i32 %5, 8
  br i1 %6, label %7, label %14

; <label>:7                                       ; preds = %4
  %8 = load i32* %i, align 4
  %9 = sext i32 %8 to i64
  %10 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 %9
  store i32 0, i32* %10, align 4
  br label %11

; <label>:11                                      ; preds = %7
  %12 = load i32* %i, align 4
  %13 = add nsw i32 %12, 1
  store i32 %13, i32* %i, align 4
  br label %4

; <label>:14                                      ; preds = %4
  store i32 7, i32* %i1, align 4
  br label %15

; <label>:15                                      ; preds = %29, %14
  %16 = load i32* %i1, align 4
  %17 = icmp sge i32 %16, 0
  br i1 %17, label %18, label %32

; <label>:18                                      ; preds = %15
  %19 = load i32* %i1, align 4
  %20 = add nsw i32 %19, 1
  %21 = sext i32 %20 to i64
  %22 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 %21
  %23 = load i32* %22, align 4
  %24 = load i32* %2, align 4
  %25 = add nsw i32 %23, %24
  %26 = load i32* %i1, align 4
  %27 = sext i32 %26 to i64
  %28 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 %27
  store i32 %25, i32* %28, align 4
  br label %29

; <label>:29                                      ; preds = %18
  %30 = load i32* %i1, align 4
  %31 = add nsw i32 %30, -1
  store i32 %31, i32* %i1, align 4
  br label %15

; <label>:32                                      ; preds = %15
  %33 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 0
  %34 = load i32* %33, align 4
  ret i32 %34
}

再次使用clang -emit-llvm -O1进行编译,您会看到:

; Function Attrs: nounwind readnone uwtable
define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
.preheader:
  %a = alloca [8 x i32], align 16
  %a6 = bitcast [8 x i32]* %a to i8*
  call void @llvm.memset.p0i8.i64(i8* %a6, i8 0, i64 32, i32 4, i1 false)
  br label %0

; <label>:0                                       ; preds = %.preheader, %0
  %indvars.iv = phi i64 [ 7, %.preheader ], [ %indvars.iv.next, %0 ]
  %1 = add nsw i64 %indvars.iv, 1
  %2 = getelementptr inbounds [8 x i32]* %a, i64 0, i64 %1
  %3 = load i32* %2, align 4, !tbaa !1
  %4 = add nsw i32 %3, %argc
  %5 = getelementptr inbounds [8 x i32]* %a, i64 0, i64 %indvars.iv
  store i32 %4, i32* %5, align 4, !tbaa !1
  %indvars.iv.next = add nsw i64 %indvars.iv, -1
  %6 = trunc i64 %indvars.iv to i32
  %7 = icmp sgt i32 %6, 0
  br i1 %7, label %0, label %8

; <label>:8                                       ; preds = %0
  %9 = getelementptr inbounds [8 x i32]* %a, i64 0, i64 0
  %10 = load i32* %9, align 16, !tbaa !1
  ret i32 %10
}

初始化循环由llvm.memset内在替换。后端可以根据需要自由处理内在函数,但通常将llvm.memset降低到libc库调用。

回答您的第一个问题:是的,如果您没有优化代码,那么您的IR就不会获得内在函数。

要防止在代码中引入内在函数,您只需在IR上找到优化传递,然后再运行它。以下是一个相关问题:如何找出在IR上完成的传递:Where to find the optimization sequence for clang -OX?

我们得到-O1

  

prune-eh -inline-cost -always-inline -functionattrs -sroa -domtree   -early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -instcombine -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa -loop-rotate -licm -loop-unswitch -instcombine -scalar-evolution -lcssa -indvars -loop-idiom -loop-deletion -loop-unroll -memdep -memcpyopt -sccp -instcombine -lazy-value-info -jump-threading -correlated-propagation -domtree -memdep -dse -adce -simplifycfg -instcombine -barrier -domtree -loops -loop-simplify -lcssa -branch-prob -block-freq -scalar-evolution -loop-vectorize -instcombine -simplifycfg -strip-dead-prototypes -verify

一个疯狂的猜测:instcombine正在引入llvm.memset。我在没有instcombine的情况下运行pass并选择未经优化的IR并得到这个:

; Function Attrs: nounwind readnone uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
  %a = alloca [8 x i32], align 16
  %1 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 8
  %2 = load i32* %1, align 4
  %3 = add nsw i32 %2, %argc
  %4 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 7
  store i32 %3, i32* %4, align 4
  %5 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 7
  %6 = load i32* %5, align 4
  %7 = add nsw i32 %6, %argc
  %8 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 6
  store i32 %7, i32* %8, align 4
  %9 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 6
  %10 = load i32* %9, align 4
  %11 = add nsw i32 %10, %argc
  %12 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 5
  store i32 %11, i32* %12, align 4
  %13 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 5
  %14 = load i32* %13, align 4
  %15 = add nsw i32 %14, %argc
  %16 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 4
  store i32 %15, i32* %16, align 4
  %17 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 4
  %18 = load i32* %17, align 4
  %19 = add nsw i32 %18, %argc
  %20 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 3
  store i32 %19, i32* %20, align 4
  %21 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 3
  %22 = load i32* %21, align 4
  %23 = add nsw i32 %22, %argc
  %24 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 2
  store i32 %23, i32* %24, align 4
  %25 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 2
  %26 = load i32* %25, align 4
  %27 = add nsw i32 %26, %argc
  %28 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 1
  store i32 %27, i32* %28, align 4
  %29 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 1
  %30 = load i32* %29, align 4
  %31 = add nsw i32 %30, %argc
  %32 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 0
  store i32 %31, i32* %32, align 4
  %33 = getelementptr inbounds [8 x i32]* %a, i32 0, i64 0
  %34 = load i32* %33, align 4
  ret i32 %34
}

没有说明。因此,为了防止(至少memset)你的代码中的内在函数不要在你的IR上运行instcombine。然而,instcombine是一个强大的选择通行证,真正缩短了代码。

现在您有两个选择:

  1. 不要使用引入内在函数的选择通行证
  2. 写你自己的llvm 选择传递将内在函数转换回他们可能的任何东西 在优化之后和后端之前替换为运行它 开始工作
  3. 我希望这能以某种方式帮助你。干杯!