我使用LLVM C ++ API生成一个非常简单的函数。这是IR输出:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i64 @"foo"(i64) {
%2 = alloca i64
store i64 %0, i64* %2
%3 = load i64, i64* %2
ret i64 %3
}
我希望LLVM在将其编译为目标文件时摆脱多余的存储/加载,但这不是我观察到的。如果我使用llvm::CodeGenOpt::None
,我会
$ objdump -M intel -d out.o | grep -A 10 foo
0000000000000000 <foo>:
0: 48 89 7c 24 f8 mov QWORD PTR [rsp-0x8],rdi
5: 48 8b 44 24 f8 mov rax,QWORD PTR [rsp-0x8]
a: c3 ret
如果我使用llvm::CodeGenOpt::Aggressive
,我会
$ objdump -M intel -d out.o | grep -A 10 foo
0000000000000000 <foo>:
0: 48 89 7c 24 f8 mov QWORD PTR [rsp-0x8],rdi
5: 48 89 f8 mov rax,rdi
8: c3 ret
我希望LLVM能够优化mov QWORD PTR [rsp-0x8],rdi
。我错过了一些选择吗?这是目标文件生成代码:
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargets();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmPrinters();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();
auto target_triple = llvm::sys::getDefaultTargetTriple();
string error;
auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error);
if (target == nullptr) {
return error;
}
auto cpu = llvm::sys::getHostCPUName();
llvm::SubtargetFeatures subtarget_features;
llvm::StringMap<bool> feature_map;
if (llvm::sys::getHostCPUFeatures(feature_map)) {
for (auto &feature : feature_map) {
subtarget_features.AddFeature(feature.first(), feature.second);
}
}
auto features = subtarget_features.getString();
llvm::TargetOptions target_options;
auto reloc_model = llvm::Optional<llvm::Reloc::Model>();
auto* target_machine = target->createTargetMachine(
target_triple,
cpu,
features,
target_options,
reloc_model,
llvm::CodeModel::Default,
// llvm::CodeGenOpt::Default,
llvm::CodeGenOpt::Aggressive);
if (target_machine == nullptr) {
error = "Failed to create target machine";
return error;
}
llvm::legacy::PassManager pass_manager;
module.setDataLayout(target_machine->createDataLayout());
module.print(llvm::errs(), nullptr);
auto file = "out.o";
error_code ec;
llvm::raw_fd_ostream out(llvm::StringRef(file), ec, llvm::sys::fs::F_None);
if (ec) {
error = ec.message();
return error;
}
llvm::MachineModuleInfo* mmi = new llvm::MachineModuleInfo(
reinterpret_cast<const llvm::LLVMTargetMachine*>(target_machine));
if (mmi == nullptr) {
error = "Failed to create machine module info";
return error;
}
if (target_machine->addPassesToEmitFile(pass_manager, out, llvm::TargetMachine::CGFT_ObjectFile)) {
error = "Failed to emit file";
return error;
}
pass_manager.run(module);
out.close();
return error;