这是for循环的可重现示例。由于我想做3000次迭代并且我的矩阵比这个可重复的例子大,所以计算机会崩溃。关于我该怎么办的任何想法?
我已经读过R中没有推荐循环,而是网站建议使用向量并应用函数,但是我无法用这些函数构建我的公式......
矩阵:
public List<Interface1<Implement>> temp = new List<Interface1<Implement>>();
循环功能:
row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
sp1 <- c(4,83,1,2,4,3,1,5,7,2,4)
sp2 <- c(5,0,2,3,10,5,0,2,4,3,1)
sp3 <- c(7,2,4,8,7,2,4,83,1,5,7)
sp4 <- c(0,2,4,2,4,12,1,5,7,2,4)
Site <- c('A','B','C','D','E','F','F','G','G','H','H')
Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010')
Obs <- c(1,1,1,4,9,6,8,2,5,2,3)
ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID)
rownames(df) <- df[,1]
df[,1] <- NULL
df
df.1 <- subset(df, Obs == 1)
df.more <- subset(df, Obs >= 2)
df.1
df.more
结果提取:
require (vegan)
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
答案 0 :(得分:0)
不熟悉vegan
包,我对你能给你的建议有点限制。在大多数情况下,您已经很好地构建了for
循环,如下所示,通过将其转换为函数并通过lapply
运行它,您获得的收益并不高。
我认为最好的办法是并行化您的代码。在下面的示例中,如果将for
循环转换为函数并使用parLapply
,如果包含集群构建时间,则可以减少几秒钟。如果排除群集构建时间,则在我的7个内核上快5倍。计算时间的变化将根据您可以运行的核心数量而变化。但我认为这可能是你现在最好的选择。
library(parallel)
library(vegan)
row.names <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
sp1 <- c(4,83,1,2,4,3,1,5,7,2,4)
sp2 <- c(5,0,2,3,10,5,0,2,4,3,1)
sp3 <- c(7,2,4,8,7,2,4,83,1,5,7)
sp4 <- c(0,2,4,2,4,12,1,5,7,2,4)
Site <- c('A','B','C','D','E','F','F','G','G','H','H')
Year <- c('2003','2010','2011','2010','2001','2005','2009','2003','2007','2004','2010')
Obs <- c(1,1,1,4,9,6,8,2,5,2,3)
ID <- c('A2003','B2010','C2011','D2010','E2001','F2005','F2009','G2003','G2007','H2004','H2010')
df<- data.frame(row.names, sp1, sp2, sp3, sp4, Site, Year, Obs, ID)
rownames(df) <- df[,1]
df[,1] <- NULL
df
df.1 <- subset(df, Obs == 1)
df.more <- subset(df, Obs >= 2)
df.1
df.more
more.fun <- function(df.more, df.1)
{
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
start.orig <- Sys.time()
set.seed(pi)
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.more <- do.call(rbind, lapply(split(df.more, df.more$Site),
function(df.more) df.more[sample(nrow(df.more), 1,replace=FALSE) , ])
)
rnd.df <- rbind(df.1,rnd.more)
rnd.df.bc <- as.matrix(vegdist(rnd.df[1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
end.orig <- Sys.time()
start.apply <- Sys.time()
fn = out <- lapply(1:3000, function(i) more.fun(df.more, df.1))
end.apply <- Sys.time()
start.parallel <- Sys.time()
cl <- makeCluster(7)
clusterEvalQ(cl, library(vegan))
clusterExport(cl, c("df.more", "df.1", "more.fun"))
start.parallel.apply <- Sys.time()
out <- parLapply(cl, 1:3000, function(i) more.fun(df.more, df.1))
end.parallel <- Sys.time()
#* Compare times
end.orig - start.orig
end.apply - start.apply
end.parallel - start.parallel
end.parallel - start.parallel.apply
(这里的时间比较非常粗糙)
答案 1 :(得分:0)
预先计算样本索引:
idx <- lapply(1:iterations, function(x) {
tapply(1:nrow(df.more), as.character(df.more$Site), function(y) {
if(length(y) == 1) y else sample(y, 1)
})
})
idx <- lapply(idx, function(ids) c(1:nrow(df.1), ids + nrow(df.1)))
预先计算占位符data.frame to index
rnd.df <- rbind(df.1, df.more)
现在您只需索引预先计算的对象,而无需在每个循环中计算它们:
iterations <- 3000
out <- vector("list", iterations)
for(i in 1:iterations){
rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
out[[i]] <- list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
基准:
f1 = my method
f2 = OPs code
> microbenchmark(f1(), f2(), times=5L)
Unit: seconds
expr min lq mean median uq max neval
f1() 2.21069 4.877017 4.666875 5.27416 5.444411 5.528096 5
f2() 13.54813 13.554965 19.500247 14.51089 27.074520 28.812732 5
cl <- makeCluster(3)
registerDoSNOW(cl)
out <- foreach(i = 1:iterations, .packages=c('vegan')) %do%
{
rnd.df.bc <- as.matrix(vegdist(rnd.df[idx[[i]] ,1:4], method="bray"))
rnd.df.bc[lower.tri(rnd.df.bc,diag=TRUE)] <- NA
triang <- rnd.df.bc[!is.na(rnd.df.bc)]
mean.bc <- mean(triang)
list(rnd = rnd.df, bc = rnd.df.bc, ave = mean.bc)
}
stopCluster(cl)