tmp_var=sample(c(rep(1, 1000), rep(0, 29000)))
spM <- Matrix::cbind2(tmp_var)
for(i in 1:1000)
spM <- Matrix::cbind2(spM, dplyr::lag(tmp_var, n=i, default=0))
object.size(spM) # 11 844 184 bytes for dgCMatrix
X = xgb.DMatrix(spM)
object.size(X) # 448 bytes for xgb.DMatrix ???
xgb.DMatrix.save(X, 'X') # 7.9 MB on disk which is much smaller then object.size(M)
如何计算xgb.DMatrix类的X
对象的实际大小?
ps。
Matrix::writeMM(spM, 'spM.txt') # 10.1 MB (because txt is not binary format)
saveRDS(spM, "spM.rds") # 2.3 MB small because its binary format
xgb.DMatrix.save(X, 'X') # 7.9 MB much more then size of spM.rds