我正在尝试使用RcppArmadillo编写MCMC过程,该过程涉及计算约30,000 x 30,000稀疏矩阵的对数行列式。似乎Armadillo中的log_det()目前不支持sp_mat,所以我正在执行以下操作:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
using namespace arma;
double eigen_ldet(sp_mat arma_mat) {
Eigen::SparseMatrix<double> eigen_s = Rcpp::as<Eigen::SparseMatrix<double>>(Rcpp::wrap(arma_mat));
Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
solver.compute(eigen_s);
double det = solver.logAbsDeterminant();
return det;
}
我觉得这真的很烂而且很慢。任何帮助将不胜感激。
编辑: 这是样机:
library(Matrix)
m_mat = function(i = 1688, j = 18, rho = 0.5, alp = 0.5){
w1 = matrix(runif(i^2),nrow = i, ncol = i)
w2 = matrix(runif(j^2),nrow = j, ncol = j)
w1 = w1/rowSums(w1)
w2 = w2/rowSums(w2)
diag(w1) = 0
diag(w2) = 0
w1 = diag(i) - rho*w1
w2 = diag(j) - alp*w2
w1 = kronecker(Matrix(diag(j)), w1)
w2 = kronecker(Matrix(diag(i)), w2)
ind = matrix(c(rep(seq(1,i), each = j), rep(seq(1,j),i)), ncol = 2)
w2 = cbind(ind, w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
w2 = cbind(as.matrix(ind), w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
return(w1 + w2)
}
Edit2:这是第二个带有稀疏w1的模型:
m_mat2 = function(i = 1688, j = 18, nb = 4, range = 10, rho = 0.5, alp = 0.5){
w1 = Matrix(0, nrow = i, ncol = i)
for ( h in 1:i){
rnd = as.integer(rnorm(nb, h, range))
rnd = ifelse(rnd > 0 & rnd <= i, rnd, h)
rnd = unique(rnd)
w1[h, rnd] = 1
}
w1 = w1/rowSums(w1)
w2 = matrix(runif(j^2),nrow = j, ncol = j)
w2 = w2/rowSums(w2)
diag(w1) = 0
diag(w2) = 0
w1 = diag(i) - rho*w1
w2 = diag(j) - alp*w2
w1 = kronecker(Matrix(diag(j)), w1)
w2 = kronecker(Matrix(diag(i)), w2)
ind = matrix(c(rep(seq(1,i), each = j), rep(seq(1,j),i)), ncol = 2)
w2 = cbind(ind, w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
w2 = cbind(as.matrix(ind), w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
return(w1 + w2)
}
实际的稀疏w1情况应该更加不规则,但是(通过以上代码)使用实际w1来计算该行列式的行列式大约需要相同的时间。
答案 0 :(得分:0)
从我的实验中,我发现从Armadillo到本征矩阵的转换非常快。大部分时间都花在solver.compute()
中。我不知道是否有任何更快的算法来确定稀疏矩阵的对数行列式,但是我发现了至少近似适用于您的模型的近似值:仅使用(密集)块对角线(请参见例如{ {3}},以包含矩阵的其他部分。如果一个近似的解决方案是足够的,那将是非常好的和快速的:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppEigen)]]
#include <RcppArmadillo.h>
#include <RcppEigen.h>
#include <Rcpp/Benchmark/Timer.h>
using namespace arma;
// [[Rcpp::export]]
double arma_sldet(sp_mat arma_mat, int blocks, int size) {
double ldet = 0.0;
double val = 0.0;
double sign = 0.0;
for (int i = 0; i < blocks; ++i) {
int begin = i * size;
int end = (i + 1) * size - 1;
sp_mat sblock = arma_mat.submat(begin, begin, end, end);
mat dblock(sblock);
log_det(val, sign, dblock);
ldet += val;
}
return ldet;
}
// [[Rcpp::export]]
Rcpp::List eigen_ldet(sp_mat arma_mat) {
Rcpp::Timer timer;
timer.step("start");
Eigen::SparseMatrix<double> eigen_s = Rcpp::as<Eigen::SparseMatrix<double>>(Rcpp::wrap(arma_mat));
timer.step("conversion");
Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
solver.compute(eigen_s);
timer.step("solver");
double det = solver.logAbsDeterminant();
timer.step("log_det");
Rcpp::NumericVector res(timer);
return Rcpp::List::create(Rcpp::Named("log_det") = det,
Rcpp::Named("timer") = res);
}
/*** R
library(Matrix)
m_mat = function(i = 1688, j = 18, rho = 0.5, alp = 0.5){
w1 = matrix(runif(i^2),nrow = i, ncol = i)
w2 = matrix(runif(j^2),nrow = j, ncol = j)
w1 = w1/rowSums(w1)
w2 = w2/rowSums(w2)
diag(w1) = 0
diag(w2) = 0
w1 = diag(i) - rho*w1
w2 = diag(j) - alp*w2
w1 = kronecker(Matrix(diag(j)), w1)
w2 = kronecker(Matrix(diag(i)), w2)
ind = matrix(c(rep(seq(1,i), each = j), rep(seq(1,j),i)), ncol = 2)
w2 = cbind(ind, w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
w2 = cbind(as.matrix(ind), w2)
w2 = w2[order(w2[,2]),]
w2 = t(w2[, -c(1,2)])
return(w1 + w2)
}
m <- m_mat(i = 200)
system.time(eigen <- eigen_ldet(m))
system.time(arma <- arma_sldet(m, 18, 200))
diff(eigen$timer)/1000000
all.equal(eigen$log_det, arma)
m <- m_mat()
#eigen_ldet(m) # takes to long ...
system.time(arma <- arma_sldet(m, 18, 1688))
*/
较小模型的结果:
> m <- m_mat(i = 200)
> system.time(eigen <- eigen_ldet(m))
user system elapsed
3.703 0.049 3.751
> system.time(arma <- arma_sldet(m, 18, 200))
user system elapsed
0.059 0.012 0.019
> diff(eigen$timer)/1000000
conversion solver log_det
5.208586 3738.131168 0.582578
> all.equal(eigen$log_det, arma)
[1] "Mean relative difference: 0.002874847"
近似解决方案非常接近并且速度更快。我们还会看到确切解决方案的时序分布。
完整模型的结果:
> m <- m_mat()
> #eigen_ldet(m) # takes to long ...
> system.time(arma <- arma_sldet(m, 18, 1688))
user system elapsed
5.965 2.529 2.578
仅考虑对角线,甚至可以实现更快的逼近度:
// [[Rcpp::export]]
double arma_sldet_diag(sp_mat arma_mat) {
vec d(arma_mat.diag());
return sum(log(d));
}
答案 1 :(得分:0)
如果您的计算机上有足够的内存(例如32+ Gb),并且可以快速实施LAPACK(例如:OpenBLAS或Intel MKL),那么一种快速而肮脏的方法是将稀疏矩阵转换为密集矩阵,并在密集矩阵上计算对数行列式。
示例:
sp_mat X = sprandu(30000,30000,0.01);
cx_double log_result = log_det( mat(X) );
虽然这显然需要占用大量内存,但优点是它避免了耗时的稀疏求解器/分解。 OpenBLAS或MKL也将利用多个内核。