我有一段代码要通过各种展开因子展开,然后查看生成的bitcode。为此,我要做以下事情:
1)我首先使用clang编译代码:
clang -O0 -S -emit-llvm trainingCode.cpp -o trainingCode.ll
2)然后我对生成的bitcode进行了几次传递(按this的建议):
opt -mem2reg -simplifycfg -loops -lcssa -loop-simplify -loop-rotate -inline -inline-threshold=1000000 trainingCode.ll -o trainingCode.bc > /dev/null
3)最后,我运行展开传递:
opt -loop-unroll -unroll-count=2 -unroll-allow-partial trainingCode.bc -o unrolledTrainingCode.bc > /dev/null
然后我用1到4的各种展开因子重复这个。
对于一段简单的代码,如下所示,完全就像我需要的那样:
#include <math.h>
int main() {
volatile float checksum = 0.0;
for (int i = 0; i < 10; i++) {
float fff = 0.112345;
fff *= fff;
fff += 1.13;
checksum += fff/10000;
}
}
但是当我将身体的复杂性/大小增加到这个时,我会得到非常奇怪的行为:
#include <math.h>
int main() {
volatile float checksum = 0.0;
for (int i = 0; i < 10; i++) {
float fff = 0.112345;
fff *= sqrt(fff) + fff;
fff += 1.13;
fff *= sqrt(fff) + fff;
fff += 17.16;
fff *= sqrt(fff) + fff;
fff += 15.13;
fff *= sqrt(fff) + fff;
fff += 21.13;
fff *= sqrt(fff) + fff;
fff += 81.13;
fff *= sqrt(fff) + fff;
fff += 11.13;
fff *= sqrt(fff) + fff;
fff += 81.13;
fff *= sqrt(fff) + fff;
fff += 11.13;
fff *= sqrt(fff) + fff;
fff += 91.13;
fff *= sqrt(fff) + fff;
fff += 11.13;
checksum += (fff + i)/10000;
}
}
对于1-2的展开因子,一切正常,但如果我尝试使用大于2的因子展开,LLVM 完全展开循环。对于具有足够大的主体的任何循环,都会发生这种情况。例如,对于上面的代码,使用任何大于2的展开因子得到的bitcode(摘录):
; ModuleID = 'unrolledtrainingCode3.bc'
source_filename = "p1HighComplexity.cpp"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse uwtable
define i32 @main() #0 {
entry:
%checksum = alloca float, align 4
store volatile float 0.000000e+00, float* %checksum, align 4
br label %for.body
for.body: ; preds = %entry
%call.i = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add = fadd float %call.i, 0x3FBCC2A460000000
%mul = fmul float 0x3FBCC2A460000000, %add
%conv = fpext float %mul to double
%add1 = fadd double %conv, 1.130000e+00
%conv2 = fptrunc double %add1 to float
%call.i2 = call float @sqrtf(float %conv2) #2
%add4 = fadd float %call.i2, %conv2
%mul5 = fmul float %conv2, %add4
%conv6 = fpext float %mul5 to double
%add7 = fadd double %conv6, 1.716000e+01
%conv8 = fptrunc double %add7 to float
%call.i3 = call float @sqrtf(float %conv8) #2
%add10 = fadd float %call.i3, %conv8
%mul11 = fmul float %conv8, %add10
%conv12 = fpext float %mul11 to double
%add13 = fadd double %conv12, 1.513000e+01
%conv14 = fptrunc double %add13 to float
%call.i4 = call float @sqrtf(float %conv14) #2
%add16 = fadd float %call.i4, %conv14
%mul17 = fmul float %conv14, %add16
%conv18 = fpext float %mul17 to double
%add19 = fadd double %conv18, 2.113000e+01
%conv20 = fptrunc double %add19 to float
%call.i5 = call float @sqrtf(float %conv20) #2
%add22 = fadd float %call.i5, %conv20
%mul23 = fmul float %conv20, %add22
%conv24 = fpext float %mul23 to double
%add25 = fadd double %conv24, 0x40544851EB851EB8
%conv26 = fptrunc double %add25 to float
%call.i6 = call float @sqrtf(float %conv26) #2
%add28 = fadd float %call.i6, %conv26
%mul29 = fmul float %conv26, %add28
%conv30 = fpext float %mul29 to double
%add31 = fadd double %conv30, 1.113000e+01
%conv32 = fptrunc double %add31 to float
%call.i7 = call float @sqrtf(float %conv32) #2
%add34 = fadd float %call.i7, %conv32
%mul35 = fmul float %conv32, %add34
%conv36 = fpext float %mul35 to double
%add37 = fadd double %conv36, 0x40544851EB851EB8
%conv38 = fptrunc double %add37 to float
%call.i8 = call float @sqrtf(float %conv38) #2
%add40 = fadd float %call.i8, %conv38
%mul41 = fmul float %conv38, %add40
%conv42 = fpext float %mul41 to double
%add43 = fadd double %conv42, 1.113000e+01
%conv44 = fptrunc double %add43 to float
%call.i9 = call float @sqrtf(float %conv44) #2
%add46 = fadd float %call.i9, %conv44
%mul47 = fmul float %conv44, %add46
%conv48 = fpext float %mul47 to double
%add49 = fadd double %conv48, 0x4056C851EB851EB8
%conv50 = fptrunc double %add49 to float
%call.i10 = call float @sqrtf(float %conv50) #2
%add52 = fadd float %call.i10, %conv50
%mul53 = fmul float %conv50, %add52
%conv54 = fpext float %mul53 to double
%add55 = fadd double %conv54, 1.113000e+01
%conv56 = fptrunc double %add55 to float
%div = fdiv float %conv56, 1.000000e+04
%0 = load volatile float, float* %checksum, align 4
%add57 = fadd float %0, %div
store volatile float %add57, float* %checksum, align 4
%call.i.1 = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add.1 = fadd float %call.i.1, 0x3FBCC2A460000000
%mul.1 = fmul float 0x3FBCC2A460000000, %add.1
%conv.1 = fpext float %mul.1 to double
%add1.1 = fadd double %conv.1, 1.130000e+00
%conv2.1 = fptrunc double %add1.1 to float
%call.i2.1 = call float @sqrtf(float %conv2.1) #2
%add4.1 = fadd float %call.i2.1, %conv2.1
%mul5.1 = fmul float %conv2.1, %add4.1
%conv6.1 = fpext float %mul5.1 to double
%add7.1 = fadd double %conv6.1, 1.716000e+01
%conv8.1 = fptrunc double %add7.1 to float
%call.i3.1 = call float @sqrtf(float %conv8.1) #2
%add10.1 = fadd float %call.i3.1, %conv8.1
%mul11.1 = fmul float %conv8.1, %add10.1
%conv12.1 = fpext float %mul11.1 to double
%add13.1 = fadd double %conv12.1, 1.513000e+01
%conv14.1 = fptrunc double %add13.1 to float
%call.i4.1 = call float @sqrtf(float %conv14.1) #2
%add16.1 = fadd float %call.i4.1, %conv14.1
%mul17.1 = fmul float %conv14.1, %add16.1
%conv18.1 = fpext float %mul17.1 to double
%add19.1 = fadd double %conv18.1, 2.113000e+01
%conv20.1 = fptrunc double %add19.1 to float
%call.i5.1 = call float @sqrtf(float %conv20.1) #2
%add22.1 = fadd float %call.i5.1, %conv20.1
%mul23.1 = fmul float %conv20.1, %add22.1
%conv24.1 = fpext float %mul23.1 to double
%add25.1 = fadd double %conv24.1, 0x40544851EB851EB8
%conv26.1 = fptrunc double %add25.1 to float
%call.i6.1 = call float @sqrtf(float %conv26.1) #2
%add28.1 = fadd float %call.i6.1, %conv26.1
%mul29.1 = fmul float %conv26.1, %add28.1
%conv30.1 = fpext float %mul29.1 to double
%add31.1 = fadd double %conv30.1, 1.113000e+01
%conv32.1 = fptrunc double %add31.1 to float
%call.i7.1 = call float @sqrtf(float %conv32.1) #2
%add34.1 = fadd float %call.i7.1, %conv32.1
%mul35.1 = fmul float %conv32.1, %add34.1
%conv36.1 = fpext float %mul35.1 to double
%add37.1 = fadd double %conv36.1, 0x40544851EB851EB8
%conv38.1 = fptrunc double %add37.1 to float
%call.i8.1 = call float @sqrtf(float %conv38.1) #2
%add40.1 = fadd float %call.i8.1, %conv38.1
%mul41.1 = fmul float %conv38.1, %add40.1
%conv42.1 = fpext float %mul41.1 to double
%add43.1 = fadd double %conv42.1, 1.113000e+01
%conv44.1 = fptrunc double %add43.1 to float
%call.i9.1 = call float @sqrtf(float %conv44.1) #2
%add46.1 = fadd float %call.i9.1, %conv44.1
%mul47.1 = fmul float %conv44.1, %add46.1
%conv48.1 = fpext float %mul47.1 to double
%add49.1 = fadd double %conv48.1, 0x4056C851EB851EB8
%conv50.1 = fptrunc double %add49.1 to float
%call.i10.1 = call float @sqrtf(float %conv50.1) #2
%add52.1 = fadd float %call.i10.1, %conv50.1
%mul53.1 = fmul float %conv50.1, %add52.1
%conv54.1 = fpext float %mul53.1 to double
%add55.1 = fadd double %conv54.1, 1.113000e+01
%conv56.1 = fptrunc double %add55.1 to float
%div.1 = fdiv float %conv56.1, 1.000000e+04
%1 = load volatile float, float* %checksum, align 4
%add57.1 = fadd float %1, %div.1
store volatile float %add57.1, float* %checksum, align 4
%call.i.2 = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add.2 = fadd float %call.i.2, 0x3FBCC2A460000000
%mul.2 = fmul float 0x3FBCC2A460000000, %add.2
%conv.2 = fpext float %mul.2 to double
%add1.2 = fadd double %conv.2, 1.130000e+00
%conv2.2 = fptrunc double %add1.2 to float
%call.i2.2 = call float @sqrtf(float %conv2.2) #2
%add4.2 = fadd float %call.i2.2, %conv2.2
%mul5.2 = fmul float %conv2.2, %add4.2
%conv6.2 = fpext float %mul5.2 to double
%add7.2 = fadd double %conv6.2, 1.716000e+01
%conv8.2 = fptrunc double %add7.2 to float
%call.i3.2 = call float @sqrtf(float %conv8.2) #2
%add10.2 = fadd float %call.i3.2, %conv8.2
%mul11.2 = fmul float %conv8.2, %add10.2
%conv12.2 = fpext float %mul11.2 to double
%add13.2 = fadd double %conv12.2, 1.513000e+01
%conv14.2 = fptrunc double %add13.2 to float
%call.i4.2 = call float @sqrtf(float %conv14.2) #2
%add16.2 = fadd float %call.i4.2, %conv14.2
%mul17.2 = fmul float %conv14.2, %add16.2
%conv18.2 = fpext float %mul17.2 to double
%add19.2 = fadd double %conv18.2, 2.113000e+01
%conv20.2 = fptrunc double %add19.2 to float
%call.i5.2 = call float @sqrtf(float %conv20.2) #2
%add22.2 = fadd float %call.i5.2, %conv20.2
%mul23.2 = fmul float %conv20.2, %add22.2
%conv24.2 = fpext float %mul23.2 to double
%add25.2 = fadd double %conv24.2, 0x40544851EB851EB8
%conv26.2 = fptrunc double %add25.2 to float
%call.i6.2 = call float @sqrtf(float %conv26.2) #2
%add28.2 = fadd float %call.i6.2, %conv26.2
%mul29.2 = fmul float %conv26.2, %add28.2
%conv30.2 = fpext float %mul29.2 to double
%add31.2 = fadd double %conv30.2, 1.113000e+01
%conv32.2 = fptrunc double %add31.2 to float
%call.i7.2 = call float @sqrtf(float %conv32.2) #2
%add34.2 = fadd float %call.i7.2, %conv32.2
%mul35.2 = fmul float %conv32.2, %add34.2
%conv36.2 = fpext float %mul35.2 to double
%add37.2 = fadd double %conv36.2, 0x40544851EB851EB8
%conv38.2 = fptrunc double %add37.2 to float
%call.i8.2 = call float @sqrtf(float %conv38.2) #2
%add40.2 = fadd float %call.i8.2, %conv38.2
%mul41.2 = fmul float %conv38.2, %add40.2
%conv42.2 = fpext float %mul41.2 to double
%add43.2 = fadd double %conv42.2, 1.113000e+01
%conv44.2 = fptrunc double %add43.2 to float
%call.i9.2 = call float @sqrtf(float %conv44.2) #2
%add46.2 = fadd float %call.i9.2, %conv44.2
%mul47.2 = fmul float %conv44.2, %add46.2
%conv48.2 = fpext float %mul47.2 to double
%add49.2 = fadd double %conv48.2, 0x4056C851EB851EB8
%conv50.2 = fptrunc double %add49.2 to float
%call.i10.2 = call float @sqrtf(float %conv50.2) #2
%add52.2 = fadd float %call.i10.2, %conv50.2
%mul53.2 = fmul float %conv50.2, %add52.2
%conv54.2 = fpext float %mul53.2 to double
%add55.2 = fadd double %conv54.2, 1.113000e+01
%conv56.2 = fptrunc double %add55.2 to float
%div.2 = fdiv float %conv56.2, 1.000000e+04
%2 = load volatile float, float* %checksum, align 4
%add57.2 = fadd float %2, %div.2
store volatile float %add57.2, float* %checksum, align 4
%call.i.3 = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add.3 = fadd float %call.i.3, 0x3FBCC2A460000000
%mul.3 = fmul float 0x3FBCC2A460000000, %add.3
%conv.3 = fpext float %mul.3 to double
%add1.3 = fadd double %conv.3, 1.130000e+00
%conv2.3 = fptrunc double %add1.3 to float
%call.i2.3 = call float @sqrtf(float %conv2.3) #2
%add4.3 = fadd float %call.i2.3, %conv2.3
%mul5.3 = fmul float %conv2.3, %add4.3
%conv6.3 = fpext float %mul5.3 to double
%add7.3 = fadd double %conv6.3, 1.716000e+01
%conv8.3 = fptrunc double %add7.3 to float
%call.i3.3 = call float @sqrtf(float %conv8.3) #2
%add10.3 = fadd float %call.i3.3, %conv8.3
%mul11.3 = fmul float %conv8.3, %add10.3
%conv12.3 = fpext float %mul11.3 to double
%add13.3 = fadd double %conv12.3, 1.513000e+01
%conv14.3 = fptrunc double %add13.3 to float
%call.i4.3 = call float @sqrtf(float %conv14.3) #2
%add16.3 = fadd float %call.i4.3, %conv14.3
%mul17.3 = fmul float %conv14.3, %add16.3
%conv18.3 = fpext float %mul17.3 to double
%add19.3 = fadd double %conv18.3, 2.113000e+01
%conv20.3 = fptrunc double %add19.3 to float
%call.i5.3 = call float @sqrtf(float %conv20.3) #2
%add22.3 = fadd float %call.i5.3, %conv20.3
%mul23.3 = fmul float %conv20.3, %add22.3
%conv24.3 = fpext float %mul23.3 to double
%add25.3 = fadd double %conv24.3, 0x40544851EB851EB8
%conv26.3 = fptrunc double %add25.3 to float
%call.i6.3 = call float @sqrtf(float %conv26.3) #2
%add28.3 = fadd float %call.i6.3, %conv26.3
%mul29.3 = fmul float %conv26.3, %add28.3
%conv30.3 = fpext float %mul29.3 to double
%add31.3 = fadd double %conv30.3, 1.113000e+01
%conv32.3 = fptrunc double %add31.3 to float
%call.i7.3 = call float @sqrtf(float %conv32.3) #2
%add34.3 = fadd float %call.i7.3, %conv32.3
%mul35.3 = fmul float %conv32.3, %add34.3
%conv36.3 = fpext float %mul35.3 to double
%add37.3 = fadd double %conv36.3, 0x40544851EB851EB8
%conv38.3 = fptrunc double %add37.3 to float
%call.i8.3 = call float @sqrtf(float %conv38.3) #2
%add40.3 = fadd float %call.i8.3, %conv38.3
%mul41.3 = fmul float %conv38.3, %add40.3
%conv42.3 = fpext float %mul41.3 to double
%add43.3 = fadd double %conv42.3, 1.113000e+01
%conv44.3 = fptrunc double %add43.3 to float
%call.i9.3 = call float @sqrtf(float %conv44.3) #2
%add46.3 = fadd float %call.i9.3, %conv44.3
%mul47.3 = fmul float %conv44.3, %add46.3
%conv48.3 = fpext float %mul47.3 to double
%add49.3 = fadd double %conv48.3, 0x4056C851EB851EB8
%conv50.3 = fptrunc double %add49.3 to float
%call.i10.3 = call float @sqrtf(float %conv50.3) #2
%add52.3 = fadd float %call.i10.3, %conv50.3
%mul53.3 = fmul float %conv50.3, %add52.3
%conv54.3 = fpext float %mul53.3 to double
%add55.3 = fadd double %conv54.3, 1.113000e+01
%conv56.3 = fptrunc double %add55.3 to float
%div.3 = fdiv float %conv56.3, 1.000000e+04
%3 = load volatile float, float* %checksum, align 4
%add57.3 = fadd float %3, %div.3
store volatile float %add57.3, float* %checksum, align 4
%call.i.4 = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add.4 = fadd float %call.i.4, 0x3FBCC2A460000000
%mul.4 = fmul float 0x3FBCC2A460000000, %add.4
%conv.4 = fpext float %mul.4 to double
%add1.4 = fadd double %conv.4, 1.130000e+00
%conv2.4 = fptrunc double %add1.4 to float
%call.i2.4 = call float @sqrtf(float %conv2.4) #2
%add4.4 = fadd float %call.i2.4, %conv2.4
%mul5.4 = fmul float %conv2.4, %add4.4
%conv6.4 = fpext float %mul5.4 to double
%add7.4 = fadd double %conv6.4, 1.716000e+01
%conv8.4 = fptrunc double %add7.4 to float
%call.i3.4 = call float @sqrtf(float %conv8.4) #2
%add10.4 = fadd float %call.i3.4, %conv8.4
%mul11.4 = fmul float %conv8.4, %add10.4
%conv12.4 = fpext float %mul11.4 to double
%add13.4 = fadd double %conv12.4, 1.513000e+01
%conv14.4 = fptrunc double %add13.4 to float
%call.i4.4 = call float @sqrtf(float %conv14.4) #2
%add16.4 = fadd float %call.i4.4, %conv14.4
%mul17.4 = fmul float %conv14.4, %add16.4
%conv18.4 = fpext float %mul17.4 to double
%add19.4 = fadd double %conv18.4, 2.113000e+01
%conv20.4 = fptrunc double %add19.4 to float
%call.i5.4 = call float @sqrtf(float %conv20.4) #2
%add22.4 = fadd float %call.i5.4, %conv20.4
%mul23.4 = fmul float %conv20.4, %add22.4
%conv24.4 = fpext float %mul23.4 to double
%add25.4 = fadd double %conv24.4, 0x40544851EB851EB8
%conv26.4 = fptrunc double %add25.4 to float
%call.i6.4 = call float @sqrtf(float %conv26.4) #2
%add28.4 = fadd float %call.i6.4, %conv26.4
%mul29.4 = fmul float %conv26.4, %add28.4
%conv30.4 = fpext float %mul29.4 to double
%add31.4 = fadd double %conv30.4, 1.113000e+01
%conv32.4 = fptrunc double %add31.4 to float
%call.i7.4 = call float @sqrtf(float %conv32.4) #2
%add34.4 = fadd float %call.i7.4, %conv32.4
%mul35.4 = fmul float %conv32.4, %add34.4
%conv36.4 = fpext float %mul35.4 to double
%add37.4 = fadd double %conv36.4, 0x40544851EB851EB8
%conv38.4 = fptrunc double %add37.4 to float
%call.i8.4 = call float @sqrtf(float %conv38.4) #2
%add40.4 = fadd float %call.i8.4, %conv38.4
%mul41.4 = fmul float %conv38.4, %add40.4
%conv42.4 = fpext float %mul41.4 to double
%add43.4 = fadd double %conv42.4, 1.113000e+01
%conv44.4 = fptrunc double %add43.4 to float
%call.i9.4 = call float @sqrtf(float %conv44.4) #2
%add46.4 = fadd float %call.i9.4, %conv44.4
%mul47.4 = fmul float %conv44.4, %add46.4
%conv48.4 = fpext float %mul47.4 to double
%add49.4 = fadd double %conv48.4, 0x4056C851EB851EB8
%conv50.4 = fptrunc double %add49.4 to float
%call.i10.4 = call float @sqrtf(float %conv50.4) #2
%add52.4 = fadd float %call.i10.4, %conv50.4
%mul53.4 = fmul float %conv50.4, %add52.4
%conv54.4 = fpext float %mul53.4 to double
%add55.4 = fadd double %conv54.4, 1.113000e+01
%conv56.4 = fptrunc double %add55.4 to float
%div.4 = fdiv float %conv56.4, 1.000000e+04
%4 = load volatile float, float* %checksum, align 4
%add57.4 = fadd float %4, %div.4
store volatile float %add57.4, float* %checksum, align 4
%call.i.5 = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add.5 = fadd float %call.i.5, 0x3FBCC2A460000000
%mul.5 = fmul float 0x3FBCC2A460000000, %add.5
%conv.5 = fpext float %mul.5 to double
%add1.5 = fadd double %conv.5, 1.130000e+00
%conv2.5 = fptrunc double %add1.5 to float
%call.i2.5 = call float @sqrtf(float %conv2.5) #2
%add4.5 = fadd float %call.i2.5, %conv2.5
%mul5.5 = fmul float %conv2.5, %add4.5
%conv6.5 = fpext float %mul5.5 to double
%add7.5 = fadd double %conv6.5, 1.716000e+01
%conv8.5 = fptrunc double %add7.5 to float
%call.i3.5 = call float @sqrtf(float %conv8.5) #2
%add10.5 = fadd float %call.i3.5, %conv8.5
%mul11.5 = fmul float %conv8.5, %add10.5
%conv12.5 = fpext float %mul11.5 to double
%add13.5 = fadd double %conv12.5, 1.513000e+01
%conv14.5 = fptrunc double %add13.5 to float
%call.i4.5 = call float @sqrtf(float %conv14.5) #2
%add16.5 = fadd float %call.i4.5, %conv14.5
%mul17.5 = fmul float %conv14.5, %add16.5
%conv18.5 = fpext float %mul17.5 to double
%add19.5 = fadd double %conv18.5, 2.113000e+01
%conv20.5 = fptrunc double %add19.5 to float
%call.i5.5 = call float @sqrtf(float %conv20.5) #2
%add22.5 = fadd float %call.i5.5, %conv20.5
%mul23.5 = fmul float %conv20.5, %add22.5
%conv24.5 = fpext float %mul23.5 to double
%add25.5 = fadd double %conv24.5, 0x40544851EB851EB8
%conv26.5 = fptrunc double %add25.5 to float
%call.i6.5 = call float @sqrtf(float %conv26.5) #2
%add28.5 = fadd float %call.i6.5, %conv26.5
%mul29.5 = fmul float %conv26.5, %add28.5
%conv30.5 = fpext float %mul29.5 to double
%add31.5 = fadd double %conv30.5, 1.113000e+01
%conv32.5 = fptrunc double %add31.5 to float
%call.i7.5 = call float @sqrtf(float %conv32.5) #2
%add34.5 = fadd float %call.i7.5, %conv32.5
%mul35.5 = fmul float %conv32.5, %add34.5
%conv36.5 = fpext float %mul35.5 to double
%add37.5 = fadd double %conv36.5, 0x40544851EB851EB8
%conv38.5 = fptrunc double %add37.5 to float
%call.i8.5 = call float @sqrtf(float %conv38.5) #2
%add40.5 = fadd float %call.i8.5, %conv38.5
%mul41.5 = fmul float %conv38.5, %add40.5
%conv42.5 = fpext float %mul41.5 to double
%add43.5 = fadd double %conv42.5, 1.113000e+01
%conv44.5 = fptrunc double %add43.5 to float
%call.i9.5 = call float @sqrtf(float %conv44.5) #2
%add46.5 = fadd float %call.i9.5, %conv44.5
%mul47.5 = fmul float %conv44.5, %add46.5
%conv48.5 = fpext float %mul47.5 to double
%add49.5 = fadd double %conv48.5, 0x4056C851EB851EB8
%conv50.5 = fptrunc double %add49.5 to float
%call.i10.5 = call float @sqrtf(float %conv50.5) #2
%add52.5 = fadd float %call.i10.5, %conv50.5
%mul53.5 = fmul float %conv50.5, %add52.5
%conv54.5 = fpext float %mul53.5 to double
%add55.5 = fadd double %conv54.5, 1.113000e+01
%conv56.5 = fptrunc double %add55.5 to float
%div.5 = fdiv float %conv56.5, 1.000000e+04
%5 = load volatile float, float* %checksum, align 4
%add57.5 = fadd float %5, %div.5
store volatile float %add57.5, float* %checksum, align 4
%call.i.6 = call float @sqrtf(float 0x3FBCC2A460000000) #2
%add.6 = fadd float %call.i.6, 0x3FBCC2A460000000
%mul.6 = fmul float 0x3FBCC2A460000000, %add.6
%conv.6 = fpext float %mul.6 to double
%add1.6 = fadd double %conv.6, 1.130000e+00
%conv2.6 = fptrunc double %add1.6 to float
%call.i2.6 = call float @sqrtf(float %conv2.6) #2
%add4.6 = fadd float %call.i2.6, %conv2.6
%mul5.6 = fmul float %conv2.6, %add4.6
%conv6.6 = fpext float %mul5.6 to double
%add7.6 = fadd double %conv6.6, 1.716000e+01
%conv8.6 = fptrunc double %add7.6 to float
%call.i3.6 = call float @sqrtf(float %conv8.6) #2
%add10.6 = fadd float %call.i3.6, %conv8.6
%mul11.6 = fmul float %conv8.6, %add10.6
%conv12.6 = fpext float %mul11.6 to double
%add13.6 = fadd double %conv12.6, 1.513000e+01
%conv14.6 = fptrunc double %add13.6 to float
%call.i4.6 = call float @sqrtf(float %conv14.6) #2
%add16.6 = fadd float %call.i4.6, %conv14.6
%mul17.6 = fmul float %conv14.6, %add16.6
%conv18.6 = fpext float %mul17.6 to double
%add19.6 = fadd double %conv18.6, 2.113000e+01
%conv20.6 = fptrunc double %add19.6 to float
%call.i5.6 = call float @sqrtf(float %conv20.6) #2
%add22.6 = fadd float %call.i5.6, %conv20.6
%mul23.6 = fmul float %conv20.6, %add22.6
%conv24.6 = fpext float %mul23.6 to double
%add25.6 = fadd double %conv24.6, 0x40544851EB851EB8
%conv26.6 = fptrunc double %add25.6 to float
%call.i6.6 = call float @sqrtf(float %conv26.6) #2
%add28.6 = fadd float %call.i6.6, %conv26.6
%mul29.6 = fmul float %conv26.6, %add28.6
%conv30.6 = fpext float %mul29.6 to double
%add31.6 = fadd double %conv30.6, 1.113000e+01
%conv32.6 = fptrunc double %add31.6 to float
%call.i7.6 = call float @sqrtf(float %conv32.6) #2
%add34.6 = fadd float %call.i7.6, %conv32.6
%mul35.6 = fmul float %conv32.6, %add34.6
%conv36.6 = fpext float %mul35.6 to double
%add37.6 = fadd double %conv36.6, 0x40544851EB851EB8
%conv38.6 = fptrunc double %add37.6 to float
%call.i8.6 = call float @sqrtf(float %conv38.6) #2
%add40.6 = fadd float %call.i8.6, %conv38.6
%mul41.6 = fmul float %conv38.6, %add40.6
%conv42.6 = fpext float %mul41.6 to double
%add43.6 = fadd double %conv42.6, 1.113000e+01
%conv44.6 = fptrunc double %add43.6 to float
%call.i9.6 = call float @sqrtf(float %conv44.6) #2
%add46.6 = fadd float %call.i9.6, %conv44.6
%mul47.6 = fmul float %conv44.6, %add46.6
%conv48.6 = fpext float %mul47.6 to double
%add49.6 = fadd double %conv48.6, 0x4056C851EB851EB8
%conv50.6 = fptrunc double %add49.6 to float
%call.i10.6 = call float @sqrtf(float %conv50.6) #2
%add52.6 = fadd float %call.i10.6, %conv50.6
%mul53.6 = fmul float %conv50.6, %add52.6
%conv54.6 = fpext float %mul53.6 to double
%add55.6 = fadd double %conv54.6, 1.113000e+01
%conv56.6 = fptrunc double %add55.6 to float
%div.6 = fdiv float %conv56.6, 1.000000e+04
%6 = load volatile float, float* %checksum, align 4
%add57.6 = fadd float %6, %div.6
正如你所看到的,尽管我指定的展开因子为3,但循环完全展开了。我希望有人能够深入了解为什么会发生这种情况......
答案 0 :(得分:1)
LLVM具有不同的启发式方法,用于确定是否展开以及是否完全展开,因为完全展开循环通常比部分展开循环更有利,特别是如果行程计数是固定的并且在编译时已知,因为所有检查和分支可以消除。
我的快速搜索仅显示source code而非文档,
但我认为完全展开会受到不同设置的限制:-unroll-full-max-count
。