Question

我正在玩Kaiser聚类系数。原始代码在MATLAB中，可在作者website（页面底部的最后一个链接）上找到。

首先，我（重新）在纯R中实现原始MATLAB函数。代码粘贴在下面（kaiser_r.R）。

# R equivalent of MATLAB find function
# Find indices of nonzero elements in a vector
rfind <- function(adj) seq(along = adj)[adj != 0]

# Main function
cc_kaiser <- function(adj) {
  n_count <- nrow(adj)
  w <- rep(0, n_count)
  # Number of nodes with at least two neighbors
  n_neigh = 0
  for (i in 1:n_count) {
    n <- rfind(adj[i, ] + t(adj[, i]))
    n_e <- 0
    l_n <- length(n)
    for (j in 1:l_n) {
      vec <- t(as.matrix(adj[n[j], ]))
      n_v <- rfind(vec)
      n_e <- n_e + l_n + length(n_v) - length(union(n, n_v))
    }
    if (l_n > 1) {
      w[i] <- n_e / (l_n * (l_n - 1))
      n_neigh <- n_neigh + 1
    }
  }
  cl <- sum(w) / n_neigh
  return(cl)
}

我用以下方法测试此功能：

> A <- matrix(c(0,1,1,0,1,0,1,1,1,1,0,0,0,1,0,0), 4, 4)
> cc_kaiser(A)
[1] 0.7777778

结果是正确的（我用MATLAB测试过）。然后我尝试用Rcpp实现相同的功能。这是我的尝试（kaiser_rcpp.cpp）：

#include <RcppArmadillo.h>
using namespace Rcpp;

// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
double kaiser(arma::mat A) {
  int n_count = A.n_rows;
  std::vector<int> w(n_count);
  int n_neigh = 0;

  for(int i = 0; i < n_count; i++) {
    arma::rowvec bla = A.row(i) + A.col(i).t();
    arma::uvec n = unique(find(bla > 0));
    int n_e = 0;
    int l_n = n.n_elem;
    for(int j = 0; j < l_n; j++) {
      arma::colvec vec = A.row(n(j)).t();
      arma::uvec n_v = unique(find(vec > 0));
      IntegerVector uni = union_(as<IntegerVector>(wrap(n)), as<IntegerVector>(wrap(n_v)));
      n_e = n_e + l_n + n_v.n_elem - uni.size();
    }
    if(l_n > 1) {
      w[i] =  n_e / (l_n * (l_n - 1));
      n_neigh = n_neigh + 1;
    }
  }
  double s = std::accumulate(w.begin(), w.end(), 0.0);
  double cl = s / n_neigh;

  return(cl);
}

当我运行kaiser_rcpp.cpp时，我获得了不同的价值：

> kaiser(A)
[1] 0.6666667

我请求帮助。我不知道我的Rcpp代码中哪里出错了。

Answer 1

首先w是双精度矢量，而不是整数。

然后，行w[i] = n_e / (l_n * (l_n - 1));错了。您需要将其替换为w[i] = (double)n_e / (l_n * (l_n - 1));。由于n_e和(l_n * (l_n - 1))都是整数，因此它会执行整数除法（例如3 / 2 = 1）。

完整代码：

#include <RcppArmadillo.h>
using namespace Rcpp;

// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::export]]
double kaiser(arma::mat A) {
  int n_count = A.n_rows;
  std::vector<double> w(n_count);                        // CHANGE HERE
  int n_neigh = 0;

  for(int i = 0; i < n_count; i++) {
    arma::rowvec bla = A.row(i) + A.col(i).t();
    arma::uvec n = unique(find(bla > 0));
    int n_e = 0;
    int l_n = n.n_elem;
    for(int j = 0; j < l_n; j++) {
      arma::colvec vec = A.row(n(j)).t();
      arma::uvec n_v = unique(find(vec > 0));
      IntegerVector uni = union_(as<IntegerVector>(wrap(n)), as<IntegerVector>(wrap(n_v)));
      n_e = n_e + l_n + n_v.n_elem - uni.size();
      // Rcout << n_e << std::endl;
    }
    if(l_n > 1) {
      w[i] =  (double)n_e / (l_n * (l_n - 1));          // CHANGE HERE
      n_neigh++;                                        // (CHANGE HERE)
    }
  }
  double s = std::accumulate(w.begin(), w.end(), 0.0);
  double cl = s / n_neigh;

  // Rcout << as<NumericVector>(wrap(w)) << std::endl;

  return(cl);
}

Riser中的Kaiser聚类系数

1 个答案: