我有以下IR:
; ModuleID = 'vec.ir'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin15.3.0"
define void @patch(i64) {
entry:
%1 = load float, float* inttoptr (i64 4388240000 to float*)
%2 = load float, float* inttoptr (i64 4387644544 to float*)
%3 = fadd float %1, %2
%4 = load float, float* inttoptr (i64 4387729024 to float*)
%5 = fadd float %1, %4
%6 = load float, float* inttoptr (i64 4387730560 to float*)
%7 = fadd float %1, %6
%8 = load float, float* inttoptr (i64 4387513984 to float*)
%9 = fadd float %1, %8
store float %3, float* inttoptr (i64 4371309760 to float*)
call void @__tickValue(i64 105553117467608, i64 %0)
store float %5, float* inttoptr (i64 4371851456 to float*)
call void @__tickValue(i64 105553117465688, i64 %0)
store float %7, float* inttoptr (i64 4371574976 to float*)
call void @__tickValue(i64 105553117465528, i64 %0)
store float %9, float* inttoptr (i64 4371576512 to float*)
call void @__tickValue(i64 105553117466648, i64 %0)
ret void
}
declare void @__tickValue(i64, i64)
当我运行/usr/local/opt/llvm/bin/opt -S -O3 vec.ir > vec-opt.ir
时,我得到:
; ModuleID = 'vec.ir'
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin15.3.0"
define void @patch(i64) {
entry:
%1 = load float, float* inttoptr (i64 4388240000 to float*), align 128
%2 = load float, float* inttoptr (i64 4387644544 to float*), align 128
%3 = fadd float %1, %2
%4 = load float, float* inttoptr (i64 4387729024 to float*), align 128
%5 = fadd float %1, %4
%6 = load float, float* inttoptr (i64 4387730560 to float*), align 128
%7 = fadd float %1, %6
%8 = load float, float* inttoptr (i64 4387513984 to float*), align 128
%9 = fadd float %1, %8
store float %3, float* inttoptr (i64 4371309760 to float*), align 64
tail call void @__tickValue(i64 105553117467608, i64 %0)
store float %5, float* inttoptr (i64 4371851456 to float*), align 64
tail call void @__tickValue(i64 105553117465688, i64 %0)
store float %7, float* inttoptr (i64 4371574976 to float*), align 64
tail call void @__tickValue(i64 105553117465528, i64 %0)
store float %9, float* inttoptr (i64 4371576512 to float*), align 64
tail call void @__tickValue(i64 105553117466648, i64 %0)
ret void
}
declare void @__tickValue(i64, i64)
根据http://llvm.org/docs/Vectorizers.html#the-slp-vectorizer,我希望将fadd
指令合并。
如何确定有关优化程序未进行向量化的原因的更多信息?
答案 0 :(得分:2)
LLVM具有内部成本模型,如果SIMD矢量化有益,它将自动检测。您可以通过adding some flags to your build line
获取诊断信息您还可以尝试通过在代码中添加some directives来“强制”进行矢量化。
如果你从llvm IR开始而不是从源代码开始,你仍然有line switches for opt命令