以前曾有人问过这个问题,但据我所知没有正确的答案(或我能理解的答案)。
如果外部循环的长度不同,如何在嵌套foreach
循环中删除重复的结果?
可复制的示例:
library(RCurl)
library(doParallel)
library(foreach)
library(data.table)
library(dplyr)
dt <- getURL("https://raw.githubusercontent.com/Zhaoju-
Deng/farm/master/dt.csv")
dt <-fread(dt, drop = 1)
makeCluster(detectCores())
n_cow <- dt%>%group_by(farm_id)%>%summarise(n_cow =
length(unique(cow_id))) # number of cows per farm
n_cow <- n_cow[order(n_cow$farm_id), ] # order number of cows, max =463
y <- foreach(i =1:5, .combine = "cbind")%do%{
foreach(j = unique(dt$cow_id[which(dt$farm_id==i)]),.combine ="c")%dopar%{
mean(dt$scc[which(dt$cow_id ==j)], na.rm = T) # calculate the mean of SCC per cow on each farm
}
}
y <- as.data.frame(y)
y <- y[order(y$result.1), ]# 463 rows, duplicated rows for farms with less than 463 cows
y <- foreach(i =1:5, .combine = "c")%do%{
foreach(j = unique(dt$cow_id[which(dt$farm_id==i)]),.combine
="c")%dopar%{
mean(dt$scc[which(dt$cow_id ==j)], na.rm = T)
}
} # this gives me an distinct vector, but I prefer to the data.frame with each farm as a row or column
sessionInfo()
R version 3.4.4 (2018-03-15)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 16.04.5 LTS
doParallel_1.0.11
foreach_1.4.4
我也尝试过使用iterators包,但结果也重复。