我目前正在对opencl代码(.cl)进行静态分析。 我使用clang(clang -S -emit-llvm foo.cl)来创建一个SSA形式的LLVM-IR,并将其作为llvm框架的输入。
在.ll文件中,有一个函数调用,如下所示,
%call = tail call i32 (i32, ...) bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
%cmp = icmp slt i32 %call, %num_elements
....
我想获取函数调用的函数名(get_global_id)。 我尝试用
获取功能Value * CallInst::getCalledValue ();
但由于IR文件没有get_global_id()函数的链接信息,它似乎返回NULL。(因为.cl文件中没有定义函数的实体)。在这种情况下,获取函数名称的微妙方法应该是什么?
以下是整个源代码
__kernel void convolution(__global int* input, __constant int* mask, __global int* output, int num_elements, int width, int mask_width)
{
int gid = get_global_id(0);
if (gid >= num_elements) return;
int tx = gid % width;
int ty = gid / width;
int offset = mask_width/2;
if (tx < offset || ty < offset || tx >= (width-offset) || ty >= (width-offset)) {
output[gid] = 0;
return;
}
int sum = 0;
int tmpx = tx - offset;
int tmpy = ty - offset;
for (int r = 0; r < mask_width; ++r) {
for (int c = 0; c < mask_width; ++c) {
sum += mask[r * mask_width + c] * input[(tmpy + r ) * width + tmpx + c];
}
}
output[gid] = sum;
}
和.ll文件
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define void @convolution(i32* nocapture readonly %input, i32* nocapture readonly %mask, i32* nocapture %output, i32 %num_elements, i32 %width, i32 %mask_width) #0 {
entry:
%call = tail call i32 (i32, ...) bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
%cmp = icmp slt i32 %call, %num_elements
br i1 %cmp, label %if.end, label %cleanup35
if.end: ; preds = %entry
%rem = srem i32 %call, %width
%div = sdiv i32 %call, %width
%div1 = sdiv i32 %mask_width, 2
%cmp2 = icmp slt i32 %rem, %div1
%cmp3 = icmp slt i32 %div, %div1
%or.cond = or i1 %cmp2, %cmp3
br i1 %or.cond, label %if.then9, label %lor.lhs.false4
lor.lhs.false4: ; preds = %if.end
%sub = sub nsw i32 %width, %div1
%cmp5 = icmp slt i32 %rem, %sub
%cmp8 = icmp slt i32 %div, %sub
%or.cond73 = and i1 %cmp5, %cmp8
br i1 %or.cond73, label %if.end10, label %if.then9
if.then9: ; preds = %lor.lhs.false4, %if.end
%idxprom = sext i32 %call to i64
%arrayidx = getelementptr inbounds i32, i32* %output, i64 %idxprom
store i32 0, i32* %arrayidx, align 4, !tbaa !7
br label %cleanup35
if.end10: ; preds = %lor.lhs.false4
%cmp1377 = icmp sgt i32 %mask_width, 0
br i1 %cmp1377, label %for.cond14.preheader.lr.ph, label %for.cond.cleanup
for.cond14.preheader.lr.ph: ; preds = %if.end10
%sub12 = sub i32 %div, %div1
%sub11 = sub i32 %rem, %div1
%0 = sext i32 %mask_width to i64
%1 = sext i32 %sub12 to i64
%2 = sext i32 %width to i64
%3 = sext i32 %sub11 to i64
%xtraiter = and i32 %mask_width, 1
%lcmp.mod = icmp eq i32 %xtraiter, 0
%4 = icmp eq i32 %mask_width, 1
br label %for.body17.lr.ph
for.body17.lr.ph: ; preds = %for.cond14.preheader.lr.ph, %for.cond.cleanup16
%indvars.iv84 = phi i64 [ 0, %for.cond14.preheader.lr.ph ], [ %indvars.iv.next85, %for.cond.cleanup16 ]
%sum.078 = phi i32 [ 0, %for.cond14.preheader.lr.ph ], [ %add27.lcssa, %for.cond.cleanup16 ]
%5 = mul nsw i64 %indvars.iv84, %0
%6 = add nsw i64 %1, %indvars.iv84
%7 = mul nsw i64 %6, %2
%8 = add nsw i64 %3, %7
br i1 %lcmp.mod, label %for.body17.lr.ph.split, label %for.body17.prol
for.body17.prol: ; preds = %for.body17.lr.ph
%arrayidx19.prol = getelementptr inbounds i32, i32* %mask, i64 %5
%9 = load i32, i32* %arrayidx19.prol, align 4, !tbaa !7
%10 = add i64 %3, %7
%arrayidx25.prol = getelementptr inbounds i32, i32* %input, i64 %10
%11 = load i32, i32* %arrayidx25.prol, align 4, !tbaa !7
%mul26.prol = mul nsw i32 %11, %9
%add27.prol = add nsw i32 %mul26.prol, %sum.078
br label %for.body17.lr.ph.split
for.body17.lr.ph.split: ; preds = %for.body17.lr.ph, %for.body17.prol
%add27.lcssa.unr = phi i32 [ undef, %for.body17.lr.ph ], [ %add27.prol, %for.body17.prol ]
%indvars.iv.unr = phi i64 [ 0, %for.body17.lr.ph ], [ 1, %for.body17.prol ]
%sum.175.unr = phi i32 [ %sum.078, %for.body17.lr.ph ], [ %add27.prol, %for.body17.prol ]
br i1 %4, label %for.cond.cleanup16, label %for.body17.lr.ph.split.split
for.body17.lr.ph.split.split: ; preds = %for.body17.lr.ph.split
br label %for.body17
for.cond.cleanup.loopexit: ; preds = %for.cond.cleanup16
%add27.lcssa.lcssa = phi i32 [ %add27.lcssa, %for.cond.cleanup16 ]
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %if.end10
%sum.0.lcssa = phi i32 [ 0, %if.end10 ], [ %add27.lcssa.lcssa, %for.cond.cleanup.loopexit ]
%idxprom31 = sext i32 %call to i64
%arrayidx32 = getelementptr inbounds i32, i32* %output, i64 %idxprom31
store i32 %sum.0.lcssa, i32* %arrayidx32, align 4, !tbaa !7
br label %cleanup35
for.cond.cleanup16.unr-lcssa: ; preds = %for.body17
%add27.1.lcssa = phi i32 [ %add27.1, %for.body17 ]
br label %for.cond.cleanup16
for.cond.cleanup16: ; preds = %for.body17.lr.ph.split, %for.cond.cleanup16.unr-lcssa
%add27.lcssa = phi i32 [ %add27.lcssa.unr, %for.body17.lr.ph.split ], [ %add27.1.lcssa, %for.cond.cleanup16.unr-lcssa ]
%indvars.iv.next85 = add nuw nsw i64 %indvars.iv84, 1
%lftr.wideiv90 = trunc i64 %indvars.iv.next85 to i32
%exitcond91 = icmp eq i32 %lftr.wideiv90, %mask_width
br i1 %exitcond91, label %for.cond.cleanup.loopexit, label %for.body17.lr.ph
for.body17: ; preds = %for.body17, %for.body17.lr.ph.split.split
%indvars.iv = phi i64 [ %indvars.iv.unr, %for.body17.lr.ph.split.split ], [ %indvars.iv.next.1, %for.body17 ]
%sum.175 = phi i32 [ %sum.175.unr, %for.body17.lr.ph.split.split ], [ %add27.1, %for.body17 ]
%12 = add nsw i64 %indvars.iv, %5
%arrayidx19 = getelementptr inbounds i32, i32* %mask, i64 %12
%13 = load i32, i32* %arrayidx19, align 4, !tbaa !7
%14 = add nsw i64 %8, %indvars.iv
%arrayidx25 = getelementptr inbounds i32, i32* %input, i64 %14
%15 = load i32, i32* %arrayidx25, align 4, !tbaa !7
%mul26 = mul nsw i32 %15, %13
%add27 = add nsw i32 %mul26, %sum.175
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%16 = add nsw i64 %indvars.iv.next, %5
%arrayidx19.1 = getelementptr inbounds i32, i32* %mask, i64 %16
%17 = load i32, i32* %arrayidx19.1, align 4, !tbaa !7
%18 = add nsw i64 %8, %indvars.iv.next
%arrayidx25.1 = getelementptr inbounds i32, i32* %input, i64 %18
%19 = load i32, i32* %arrayidx25.1, align 4, !tbaa !7
%mul26.1 = mul nsw i32 %19, %17
%add27.1 = add nsw i32 %mul26.1, %add27
%indvars.iv.next.1 = add nsw i64 %indvars.iv, 2
%lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
%exitcond.1 = icmp eq i32 %lftr.wideiv.1, %mask_width
br i1 %exitcond.1, label %for.cond.cleanup16.unr-lcssa, label %for.body17
cleanup35: ; preds = %if.then9, %for.cond.cleanup, %entry
ret void
}
declare i32 @get_global_id(...) #1
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }
!opencl.kernels = !{!0}
!llvm.ident = !{!6}
!0 = !{void (i32*, i32*, i32*, i32, i32, i32)* @convolution, !1, !2, !3, !4, !5}
!1 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none", !"none", !"none"}
!3 = !{!"kernel_arg_type", !"int*", !"int*", !"int*", !"int", !"int", !"int"}
!4 = !{!"kernel_arg_base_type", !"int*", !"int*", !"int*", !"int", !"int", !"int"}
!5 = !{!"kernel_arg_type_qual", !"", !"const", !"", !"", !"", !""}
!6 = !{!"clang version 3.8.0 (trunk 253973)"}
!7 = !{!8, !8, i64 0}
!8 = !{!"int", !9, i64 0}
!9 = !{!"omnipotent char", !10, i64 0}
!10 = !{!"Simple C/C++ TBAA"}
在我的项目中,我正在使用它... ...
....
InstListType & inst_list = b.getInstList();
InstListType::iterator inst_it;
for(inst_it = inst_list.begin(); inst_it != inst_list.end() ; inst_it++)
{
Instruction & inst = *inst_it;
switch(inst.getOpcode())
{
case Instruction::Call:
CallInst* ci ;
ci = (CallInst*)&inst;
//Memory access error here since getCalledFunction returns NULL
std::cout<<ci->getCalledFunction()->getName().str()<<std::endl;
....
答案 0 :(得分:1)
IR中get_global_id
的签名是
declare i32 @get_global_id(...) #1
并且在调用网站上,有一个常量的强制转换表达式:
%call = tail call i32 (i32, ...) bitcast (i32 (...)* @get_global_id to i32 (i32, ...)*)(i32 0) #2
我认为这是因为C代码缺少get_global_id
的声明。如果您无法更改代码以添加具有正确签名的extern
声明(我不熟悉OpenCL,因此通常不会为这些声明声明),那么您可以做的只是条带在寻找名字之前被抛弃,如:
if (Function *Callee = dyn_cast<Function>(ci->getCalledValue()->stripPointerCasts())) {
// Callee->getName() should be "get_global_id"
}