R并行编程:{:任务1失败 - "无法找到功能"%>%""

时间:2016-12-13 09:04:14

标签: r

我尝试通过修改我的脚本在R中进行并行编程。在我的脚本上,我做了两个并行编程。第一个完成但第二个是错误,而脚本结构是相同的。以下是我的代码:

library(rvest)
library(RMySQL)
library(curl)
library(gdata)
library(doMC)
library(foreach)
library(doParallel)
library(raster)


trim <- function (x) gsub("^\\s+|\\s+$", "", x)

setwd('/home/chandra/R/IlmuOne/MisterAladin')

no_cores <- detectCores() 
cl<-makeCluster(no_cores)
registerDoParallel(cl)

MasterData = read.xls("Master Hotels - FINAL.xlsx", sheet = 1, header = TRUE)
MasterData$url_agoda = as.character(MasterData$url_agoda)

today = as.Date(format(Sys.time(), "%Y-%m-%d"))+2

ntasks <- nrow(MasterData)

#This section perfomed well
foreach(i=1:ntasks) %dopar% {    
  url = MasterData$url_agoda[i]        
  if (trim(url)!='-' & trim(url)!='')
  {
    from = gregexpr(pattern ='=',url)[[1]][1]
    piece1 = substr(url,1,from)
    from = gregexpr(pattern ='&los=',url)[[1]][1]
    piece2 = substr(url,from,nchar(url))
    MasterData$url_agoda[i] = paste0(piece1,today,piece2)  
  }   
}

con <-  dbConnect(RMySQL::MySQL(), username = "root", password = "master",host = "localhost", dbname = "mister_aladin")



#Tried first 10 data
#Below section was error and always return error: Error in { : task 1 failed - "could not find function "%>%""
foreach(a=1:10, .packages='foreach') %dopar% {


  hotel_id = MasterData$id[a]
  vendor = 'Agoda'
  url = MasterData$url_agoda[a]    

  if (url!='-')
  {

    tryCatch({                  
      hotel <- curl(url) %>%
        read_html() %>%        
        html_nodes(xpath='//*[@id="room-grouping"]') %>%
        html_table(fill = TRUE)
      hotel <- hotel[[1]]

      hotel$hotel_id= hotel_id
      hotel$vendor= vendor

      colnames(hotel)[1] = 'TheSpace'
      colnames(hotel)[4] = 'PricePerNight'

      room = '-'
      hotel$NormalPrice = 0
      hotel$FinalPrice = 0

      for(i in 1:nrow(hotel))
      {

        if (i==1 | (!grepl('See photos',hotel$TheSpace[i]) & hotel$TheSpace[i]!='') )  
        {
          room = hotel$TheSpace[i]
        }
        hotel$TheSpace[i] = room

        #Normal Price
        if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
        {
          split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
          NormalPrice = trim(split[2])
          hotel$NormalPrice[i] = NormalPrice            
          NormalPrice = as.integer(gsub(",","",NormalPrice))
          hotel$NormalPrice[i] = NormalPrice          
        }        

        #Final Price
        if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
        {
          split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
          FinalPrice = trim(split[6])
          hotel$FinalPrice[i] = FinalPrice
          FinalPrice = as.integer(gsub(",","",FinalPrice))
          hotel$FinalPrice[i] = FinalPrice
        }

        hotel$NormalPrice[is.na(hotel$NormalPrice)] <- 0
        hotel$FinalPrice[is.na(hotel$FinalPrice)] <- 0

      }

      hotel = hotel[which(hotel$FinalPrice!=0),c("TheSpace","NormalPrice","FinalPrice")]
      colnames(hotel) = c('room','normal_price','final_price')

      hotel$log = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
      hotel$hotel_id = hotel_id
      hotel$vendor = vendor


      Push = hotel[,c('hotel_id','room','normal_price','final_price','vendor','log')]

      #print(paste0('Agoda: push one record, hotel id ',hotel_id,'!'))
      #cat(paste(paste0('Agoda: push one record, hotel id ',hotel_id,'!'),'\n'))      
      dbWriteTable(conn=con,name='prices_',value=as.data.frame(Push), append = TRUE, row.names = F)


    },
    error = function(e) {
      Sys.sleep(2)
      e
    })

  }

}

dbDisconnect(con)
stopImplicitCluster()

每次我运行脚本时,它总是给我错误:{:任务1失败 - &#34;无法找到功能&#34;%&gt;%&#34;&#34;

我已经检查了这个论坛上的每个帖子并尝试应用它但没有人工作。

请告知任何解决方案

2 个答案:

答案 0 :(得分:4)

您必须使用.packages = c("magrittr", ...)并包含在foreach循环中运行代码所必需的所有包。但是,.packages = "foreach"没有帮助。

请参阅,您可以想象在.packages中定义的所有程序包都在每个并行工作程序中进行了备份/加载。

答案 1 :(得分:2)

%>%运算符需要包magrittr。但是在这种情况下,在脚本开头加载它是不够的 - 需要为每个节点加载它。您可以将此行添加到群集的创建中以实现此目的:

cl<-makeCluster(no_cores)
registerDoParallel(cl)
clusterCall(cl, function() library(magrittr))