我想同时在R中的2000页上进行Web剪贴。我使用3个群集,一个群集后我想关闭浏览器。 我已经尝试过下面的代码,但没有关闭任何内容。
links <- c("https://www.cora.ro/alimente/fructe/piersici-P-1815045",
"https://www.cora.ro/alimente/fructe/banane-P-1810719",
"https://www.cora.ro/alimente/fructe/nectarine-P-1815059",
"https://www.cora.ro/alimente/fructe/banane-dole-P-2493619",
"https://www.cora.ro/alimente/fructe/lamai-la-kg-P-1832454",
"https://www.cora.ro/alimente/fructe/pepene-verde-P-1810582")
cl <- makeCluster(3)
registerDoParallel(cl)
clusterEvalQ(cl,{
library(RSelenium)
library(rvest)
library(xml2)
firefoxProxyProfile <- makeFirefoxProfile(list(
"network.proxy.ssl" = "proxy",
"network.proxy.ssl_port" = 8080L,
"network.proxy.type" = 1L
))
rmdSel <- remoteDriver(remoteServerAddr = "127.0.0.1",
port = 4444L,
browserName = "firefox",
extraCapabilities = firefoxProxyProfile)
})
resp <- foreach(x=1:4, .errorhandling = "pass") %dopar%{
rmdSel$open()
rmdSel$navigate(links[x])
name <- read_html(rmdSel$getPageSource()[[1]]) %>%
html_nodes("span.title") %>%
html_text("href")
}
clusterEvalQ(cl, {
rmdSel$close()
})
stopImplicitCluster()