R doParallel中的并行处理foreach保存数据

时间:2016-06-19 18:29:47

标签: r parallel-foreach

在使并行处理部分工作方面取得了进展,但保存带有提取距离的向量无法正常工作。我得到的错误是

df_Test_Fetch <- data.frame(x_lake_length)
Error in data.frame(x_lake_length) : object 'x_lake_length' not found
write.table(df_Test_Fetch,file="C:/tempTest_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
Error in is.data.frame(x) : object 'df_Test_Fetch' not found

我尝试更改下面的代码,以便将foreach步骤输出到x_lake_length。但是这并没有像我希望的那样输出矢量。如何将实际结果保存到csv文件中。我正在运行一台带有R x64 3.3.0的Windows 8计算机。

提前谢谢你 仁

这是完整的代码。

 # make sure there is no prexisting data
rm(x_lake_length)

# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
               doParallel)

# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl          <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)

# Data --------------------------------------------------------------------

ogrDrivers()

dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)

# Analysis ----------------------------------------------------------------  
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
x_lake_length<-vector("numeric",length = nrow(x))
x_lake_length[i]<-lakeMaxLength(tmp,200)
print(i)
Sys.sleep(0.1)}

foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal"))  %dopar% (
  myfun(x,i)
)
options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())

1 个答案:

答案 0 :(得分:0)

我认为这就是你想要的,虽然我不能100%肯定地理解这个主题。

我所做的是在您的并行化函数中添加return(),并在调用x_lake_length时将返回的对象的值分配给foreach。但我只是猜测那是你想要做的,所以如果我错了请纠正我。

# make sure there is no prexisting data
rm(x_lake_length)

# Libraries ---------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(lakemorpho,rgdal,maptools,sp,doParallel,foreach,
               doParallel)

# HPC ---------------------------------------------------------------------
cores_2_use <- detectCores() - 2
cl          <- makeCluster(cores_2_use, useXDR = F)
clusterSetRNGStream(cl, 9956)
registerDoParallel(cl, cores_2_use)

# Data --------------------------------------------------------------------

ogrDrivers()

dsn <- system.file("vectors", package = "rgdal")[1]
# the line below is commented out but when I run the script on my data the line below is what I use instead of the one above
# then making the name changes as needed
# dsn<-setwd("J:\\Elodea\\ByHUC6\\")
ogrListLayers(dsn)
ogrInfo(dsn=dsn, layer="trin_inca_pl03")
owd <- getwd()
setwd(dsn)
ogrInfo(dsn="trin_inca_pl03.shp", layer="trin_inca_pl03")
setwd(owd)
x <- readOGR(dsn=dsn, layer="trin_inca_pl03")
summary(x)

# Analysis ----------------------------------------------------------------  
myfun <- function(x,i){tmp<-lakeMorphoClass(x[i,],NULL,NULL,NULL)
                      x_lake_length<-vector("numeric",length = nrow(x))
                      x_lake_length[i]<-lakeMaxLength(tmp,200)
                      print(i)
                      Sys.sleep(0.1)
                      return(x_lake_length)
}

x_lake_length <- foreach(i = 1:nrow(x),.combine=cbind,.packages=c("lakemorpho","rgdal"))  %dopar% (
  myfun(x,i)
)

options(digits=10)
df_Test_Fetch <- data.frame(x_lake_length)
write.table(df_Test_Fetch,file="C:/temp/Test_Fetch.csv",row.names=TRUE,col.names=TRUE, sep=",")
print(proc.time())