带有多巴的foreach循环不会返回R中的任何变量

时间:2015-11-03 07:24:01

标签: r foreach parallel-processing

require(plyr)
library(reshape)
library(iterators)
library(parallel)
library(foreach)
library(doParallel)    

getCosine <- function(x,y) 
{
  this.cosine <- sum(x*y) / (sqrt(sum(x*x)) * sqrt(sum(y*y)))
  return(this.cosine)
}

visitordata <- read.csv("~/Hotels.csv", sep = ",", header = TRUE , stringsAsFactors = FALSE )
visitordata <- subset(visitordata, Product.Views > 0)
head(visitordata)

   Visitor_ID       Products   Product.Views 
2 1001863689_3519696751   CZ1XQZ             2 
3 1001863689_3519696751   CZR3CN             1
4 1001863689_3519696751   CZTNKN             3
5 121021834007_98749174   CZ2LB0             1
6 11029477426_678878300   CZTNKN             1
7 21029477426_678878300   CZVDHR             1

ColumnBasedData <- reshape(visitordata, idvar="Visitor_ID", timevar="Products", direction="wide")

ColumnBasedData[is.na(ColumnBasedData)] <- 0

x <<- (ColumnBasedData[,!(names(ColumnBasedData) %in% c("Visitor_ID"))])
head(x)

  Product.Views.CZ1XQZ Product.Views.CZR3CN Product.Views.CZTNKN Product.Views.CZVDHR Product.Views.CZ36D3 Product.Views.CZE0EN
2                     1                    1                    1                    0                    0                    0
6                     0                    0                    1                    1                    0                    0
9                     0                    0                    0                    0                    1                    1
24                    0                    0                    0                    0                    0                    0
37                    0                    0                    0                    0                    0                    0
40                    0                    0                    0                    0                    0                    0

holder <- matrix(NA, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x)))

dataframe_y <<- as.data.frame(holder)



cl<-makeCluster(detectCores() -1)
doParallel::registerDoParallel(cl)

ls <- foreach(i = 1:ncol(x)) %dopar% {
 for(j in 1:ncol(x)) {
    dataframe_y[i,j] <- getCosine(x[i],x[j])
 }
}
stopCluster(cl)

write.csv(dataframe_y,file="~/cosine.csv")

适用于%do%,但不适用于%dopar%。使用%dopar%dataframe_y返回null。有什么想法吗?

编辑:图书馆,功能,数据示例。 我将处理大数据,所以我试图使用并行处理。脚本需要一天以上才能完成而无需并行处理。

1 个答案:

答案 0 :(得分:0)

非常感谢所有人。嵌套的Foreach为我工作。看看我在下面有什么变化。

 ls <-
   foreach(i = 1:ncol(x), .combine = rbind) %:%
   foreach(j = 1:ncol(x), .combine=cbind) %dopar% {
     dataframe_y[i,j] <- getCosine(x[i],x[j])
   }

holder <- matrix(ls, nrow=ncol(x),ncol=ncol(x),dimnames=list(colnames(x),colnames(x)))
dataframe_y <<- as.data.frame(holder)