R-用户,
我试图解决组合问题暴力以评估近似计算的质量。为此,我编写了一个小R函数,对于变量r输出一个令人难以置信的丑陋的rcpp函数,其中包含r个嵌套循环和一些中断条件。但是,当r开始变为20+的范围时,用rcpp :: cppFunction编译函数需要花费很长的时间来编译。 ..
为什么rcpp编译为大量嵌套循环分解的任何解释?当我使用g ++将函数编译为普通的C ++程序时,它可以在不到一秒的时间内直接编译,并且运行完美(使用cout而不是Rcout)。
我可能遗漏了一些明显的东西,因为当我删除除了最里面的断裂条件之外的所有东西时,它会与rcpp完美地编译。但是,当我也删除这个最后的休息条件时,它还没有完成编译... 有什么建议吗?
P.S。这是r = 20的示例程序,我还在等待完成编译。警告:它很难看,但会自动生成。
cppFunction('
int make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
for (int t20=0; t20 <= floor(r/20); t20++) {
for (int t19=0; t19 <= floor(r/19); t19++) {
for (int t18=0; t18 <= floor(r/18); t18++) {
for (int t17=0; t17 <= floor(r/17); t17++) {
for (int t16=0; t16 <= floor(r/16); t16++) {
for (int t15=0; t15 <= floor(r/15); t15++) {
for (int t14=0; t14 <= floor(r/14); t14++) {
for (int t13=0; t13 <= floor(r/13); t13++) {
for (int t12=0; t12 <= floor(r/12); t12++) {
for (int t11=0; t11 <= floor(r/11); t11++) {
for (int t10=0; t10 <= floor(r/10); t10++) {
for (int t9=0; t9 <= floor(r/9); t9++) {
for (int t8=0; t8 <= floor(r/8); t8++) {
for (int t7=0; t7 <= floor(r/7); t7++) {
for (int t6=0; t6 <= floor(r/6); t6++) {
for (int t5=0; t5 <= floor(r/5); t5++) {
for (int t4=0; t4 <= floor(r/4); t4++) {
for (int t3=0; t3 <= floor(r/3); t3++) {
for (int t2=0; t2 <= floor(r/2); t2++) {
for (int t1=0; t1 <= floor(r/1); t1++) {
if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18) > r) {
break;
}
}
if ((20*t20+19*t19) > r) {
break;
}
}
if ((20*t20) > r) {
break;
}
}
return(0);
}')
正如@spacedman所建议的,当使用sourceCpp时,这里有一些调试信息。正如对sourceCpp建议的进一步评论所述,它似乎可以在Linux上运行,因此可能与Mac相关的问题......:
> sourceCpp(file="foobar.cpp",verbose=TRUE, rebuild=TRUE)
Generated extern "C" functions
--------------------------------------------------------
#include <Rcpp.h>
// make_tList_rcpp
void make_tList_rcpp();
RcppExport SEXP sourceCpp_1_make_tList_rcpp() {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
make_tList_rcpp();
return R_NilValue;
END_RCPP
}
Generated R functions
-------------------------------------------------------
`.sourceCpp_1_DLLInfo` <- dyn.load('/private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a/sourceCpp_8.so')
make_tList_rcpp <- Rcpp:::sourceCppFunction(function() {}, TRUE, `.sourceCpp_1_DLLInfo`, 'sourceCpp_1_make_tList_rcpp')
rm(`.sourceCpp_1_DLLInfo`)
Building shared library
--------------------------------------------------------
DIR: /private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a
/Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB -o 'sourceCpp_8.so' --preclean 'foobar.cpp'
clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source" -fPIC -Wall -mtune=core2 -g -O2 -c foobar.cpp -o foobar.o
(这就是它挂起的地方......)
P.S。这是sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Rcpp_0.12.9
loaded via a namespace (and not attached):
[1] compiler_3.3.2 tools_3.3.2
答案 0 :(得分:6)
这将在但 macOS。
的每个平台上编译并运行编辑后,我们有一些有用的调试信息。特别是,正在使用的标志:
clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source" -fPIC -Wall -mtune=core2 -g -O2 -c foobar.cpp -o foobar.o
在macOS上编译嵌套for
循环有问题的原因与clang
如何优化-O2
vs -Os
下的嵌套循环有关。具体而言,大约clang
v3.0的回归直接影响了优化所述循环的能力。特别是,见:
https://llvm.org/bugs/show_bug.cgi?id=16196
此问题看起来像是在3.8内修复的。缺点是您必须手动更新到此编译器,因为此回归在所有macOS计算机上附带clang
版本。您可能只想切换到macOS上的gcc
。无论如何,以下帖子应该有助于通过自制软件和~/.R/Makevars
http://thecoatlessprofessor.com/programming/openmp-in-r-on-os-x/
您可能希望使用sourceCpp()
代替cppFunction()
,因为后者适用于更简单的功能。 sourceCpp()
函数适用于更复杂的情况。另外,我可能会选择避免指定int
返回类型。
使用sourceCpp()
# In R
sourceCpp("path_to/example_comb.cpp")
文件: example_comb.cpp
#include <Rcpp.h>
// [[Rcpp::export]]
void make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
for (int t20=0; t20 <= floor(r/20); t20++) {
for (int t19=0; t19 <= floor(r/19); t19++) {
for (int t18=0; t18 <= floor(r/18); t18++) {
for (int t17=0; t17 <= floor(r/17); t17++) {
for (int t16=0; t16 <= floor(r/16); t16++) {
for (int t15=0; t15 <= floor(r/15); t15++) {
for (int t14=0; t14 <= floor(r/14); t14++) {
for (int t13=0; t13 <= floor(r/13); t13++) {
for (int t12=0; t12 <= floor(r/12); t12++) {
for (int t11=0; t11 <= floor(r/11); t11++) {
for (int t10=0; t10 <= floor(r/10); t10++) {
for (int t9=0; t9 <= floor(r/9); t9++) {
for (int t8=0; t8 <= floor(r/8); t8++) {
for (int t7=0; t7 <= floor(r/7); t7++) {
for (int t6=0; t6 <= floor(r/6); t6++) {
for (int t5=0; t5 <= floor(r/5); t5++) {
for (int t4=0; t4 <= floor(r/4); t4++) {
for (int t3=0; t3 <= floor(r/3); t3++) {
for (int t2=0; t2 <= floor(r/2); t2++) {
for (int t1=0; t1 <= floor(r/1); t1++) {
if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
Rcpp::Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18) > r) {
break;
}
}
if ((20*t20+19*t19) > r) {
break;
}
}
if ((20*t20) > r) {
break;
}
}
}
/*** R
# Runs automatically in R after compile
make_tList_rcpp(42)
*/
功能输出:
https://gist.github.com/coatless/aa51267dcda82b42622fdc8e6e566ab7