当2个大的正数乘以时,Rcpp返回大的负数

时间:2016-07-26 02:15:48

标签: r rcpp

我正在创建一个计算曲线下面积的函数,当我取2个部分并将它们乘以分子时,我超过2^31,然后在计算中使用-2013386137之类的值。

以下是cpp块

#include <Rcpp.h>
using namespace Rcpp;


// [[Rcpp::export]]
NumericVector sort_rcpp(NumericVector x) {
  std::vector<double> tmp = Rcpp::as< std::vector<double> > (x);
  std::sort(tmp.begin(), tmp.end());
  return wrap(tmp);
}

// [[Rcpp::export]]
IntegerVector rank(NumericVector x) {
  return match(x, sort_rcpp(x));
}

// [[Rcpp::export]]
double auc_(NumericVector actual, NumericVector predicted) {

  double n = actual.size();

  IntegerVector Ranks = rank(predicted);
  int NPos = sum(actual == 1);
  int NNeg = (actual.size() - NPos);

  int sumranks = 0;

  for(int i = 0; i < n; ++i) {
    if (actual[i] == 1){
      sumranks = sumranks + Ranks[i];
    }
  }

  double p1 = (sumranks - NPos*( NPos + 1 ) / 2);
  long double p2 = NPos*NNeg;

  double auc =  p1 / p2;
  return auc ;

}

然后是有问题的测试示例

N = 100000
Actual = as.numeric(runif(N) > .65)
Predicted = as.numeric(runif(N))

actual = Actual
predicted = Predicted

auc_(Actual, Predicted)

我也把它放在R包中

devtools::install_github("JackStat/ModelMetrics")

N = 100000
Actual = as.numeric(runif(N) > .65)
Predicted = as.numeric(runif(N))

actual = Actual
predicted = Predicted

ModelMetrics::auc(Actual, Predicted)

1 个答案:

答案 0 :(得分:5)

您在函数内部使用int导致溢出。使用double,事物看起来更阳光:

R> sourceCpp("/tmp/jackstat.cpp")

R> N <- 100000

R> Actual <- as.numeric(runif(N) > .65)

R> Predicted <- as.numeric(runif(N))

R> auc1(Actual, Predicted)   # your function
[1] -0.558932

R> auc2(Actual, Predicted)   # my variant using double
[1] 0.499922
R> 

完整更正的文件如下:

#include <Rcpp.h>

using namespace Rcpp;

// [[Rcpp::export]]
NumericVector sort_rcpp(NumericVector x) {
  std::vector<double> tmp = Rcpp::as< std::vector<double> > (x);
  std::sort(tmp.begin(), tmp.end());
  return wrap(tmp);
}

// [[Rcpp::export]]
IntegerVector rank(NumericVector x) {
  return match(x, sort_rcpp(x));
}

// [[Rcpp::export]]
double auc1(NumericVector actual, NumericVector predicted) {

  double n = actual.size();

  IntegerVector Ranks = rank(predicted);
  int NPos = sum(actual == 1);
  int NNeg = (actual.size() - NPos);

  int sumranks = 0;

  for(int i = 0; i < n; ++i) {
    if (actual[i] == 1){
      sumranks = sumranks + Ranks[i];
    }
  }

  double p1 = (sumranks - NPos*( NPos + 1 ) / 2);
  long double p2 = NPos*NNeg;

  double auc =  p1 / p2;
  return auc ;

}

// [[Rcpp::export]]
double auc2(NumericVector actual, NumericVector predicted) {

  double n = actual.size();

  IntegerVector Ranks = rank(predicted);
  double NPos = sum(actual == 1);
  double NNeg = (actual.size() - NPos);

  double sumranks = 0;

  for(int i = 0; i < n; ++i) {
    if (actual[i] == 1){
      sumranks = sumranks + Ranks[i];
    }
  }

  double p1 = (sumranks - NPos*( NPos + 1 ) / 2);
  long double p2 = NPos*NNeg;

  double auc =  p1 / p2;
  return auc ;

}

/*** R
N <- 100000
Actual <- as.numeric(runif(N) > .65)
Predicted <- as.numeric(runif(N))

auc1(Actual, Predicted)
auc2(Actual, Predicted)
*/