我尝试通过修改我的脚本在R中进行并行编程。在我的脚本上,我做了两个并行编程。第一个完成但第二个是错误,而脚本结构是相同的。以下是我的代码:
library(rvest)
library(RMySQL)
library(curl)
library(gdata)
library(doMC)
library(foreach)
library(doParallel)
library(raster)
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
setwd('/home/chandra/R/IlmuOne/MisterAladin')
no_cores <- detectCores()
cl<-makeCluster(no_cores)
registerDoParallel(cl)
MasterData = read.xls("Master Hotels - FINAL.xlsx", sheet = 1, header = TRUE)
MasterData$url_agoda = as.character(MasterData$url_agoda)
today = as.Date(format(Sys.time(), "%Y-%m-%d"))+2
ntasks <- nrow(MasterData)
#This section perfomed well
foreach(i=1:ntasks) %dopar% {
url = MasterData$url_agoda[i]
if (trim(url)!='-' & trim(url)!='')
{
from = gregexpr(pattern ='=',url)[[1]][1]
piece1 = substr(url,1,from)
from = gregexpr(pattern ='&los=',url)[[1]][1]
piece2 = substr(url,from,nchar(url))
MasterData$url_agoda[i] = paste0(piece1,today,piece2)
}
}
con <- dbConnect(RMySQL::MySQL(), username = "root", password = "master",host = "localhost", dbname = "mister_aladin")
#Tried first 10 data
#Below section was error and always return error: Error in { : task 1 failed - "could not find function "%>%""
foreach(a=1:10, .packages='foreach') %dopar% {
hotel_id = MasterData$id[a]
vendor = 'Agoda'
url = MasterData$url_agoda[a]
if (url!='-')
{
tryCatch({
hotel <- curl(url) %>%
read_html() %>%
html_nodes(xpath='//*[@id="room-grouping"]') %>%
html_table(fill = TRUE)
hotel <- hotel[[1]]
hotel$hotel_id= hotel_id
hotel$vendor= vendor
colnames(hotel)[1] = 'TheSpace'
colnames(hotel)[4] = 'PricePerNight'
room = '-'
hotel$NormalPrice = 0
hotel$FinalPrice = 0
for(i in 1:nrow(hotel))
{
if (i==1 | (!grepl('See photos',hotel$TheSpace[i]) & hotel$TheSpace[i]!='') )
{
room = hotel$TheSpace[i]
}
hotel$TheSpace[i] = room
#Normal Price
if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
{
split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
NormalPrice = trim(split[2])
hotel$NormalPrice[i] = NormalPrice
NormalPrice = as.integer(gsub(",","",NormalPrice))
hotel$NormalPrice[i] = NormalPrice
}
#Final Price
if (gregexpr(pattern ='IDR',hotel$PricePerNight[i])[[1]][1][1]==1)
{
split = strsplit(hotel$PricePerNight[i],'\n')[[1]]
FinalPrice = trim(split[6])
hotel$FinalPrice[i] = FinalPrice
FinalPrice = as.integer(gsub(",","",FinalPrice))
hotel$FinalPrice[i] = FinalPrice
}
hotel$NormalPrice[is.na(hotel$NormalPrice)] <- 0
hotel$FinalPrice[is.na(hotel$FinalPrice)] <- 0
}
hotel = hotel[which(hotel$FinalPrice!=0),c("TheSpace","NormalPrice","FinalPrice")]
colnames(hotel) = c('room','normal_price','final_price')
hotel$log = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
hotel$hotel_id = hotel_id
hotel$vendor = vendor
Push = hotel[,c('hotel_id','room','normal_price','final_price','vendor','log')]
#print(paste0('Agoda: push one record, hotel id ',hotel_id,'!'))
#cat(paste(paste0('Agoda: push one record, hotel id ',hotel_id,'!'),'\n'))
dbWriteTable(conn=con,name='prices_',value=as.data.frame(Push), append = TRUE, row.names = F)
},
error = function(e) {
Sys.sleep(2)
e
})
}
}
dbDisconnect(con)
stopImplicitCluster()
每次我运行脚本时,它总是给我错误:{:任务1失败 - &#34;无法找到功能&#34;%&gt;%&#34;&#34;
我已经检查了这个论坛上的每个帖子并尝试应用它但没有人工作。
请告知任何解决方案
答案 0 :(得分:4)
您必须使用.packages = c("magrittr", ...)
并包含在foreach
循环中运行代码所必需的所有包。但是,.packages = "foreach"
没有帮助。
请参阅,您可以想象在.packages
中定义的所有程序包都在每个并行工作程序中进行了备份/加载。
答案 1 :(得分:2)
%>%
运算符需要包magrittr
。但是在这种情况下,在脚本开头加载它是不够的 - 需要为每个节点加载它。您可以将此行添加到群集的创建中以实现此目的:
cl<-makeCluster(no_cores)
registerDoParallel(cl)
clusterCall(cl, function() library(magrittr))