我正在尝试从包含多个页面的网页中抓取一些数据(代码旨在从所有页面中抓取,这些页面由网站上的“下一步”按钮分隔),但出现错误 “summary.connection(connection) 中的错误:连接无效”,当我运行以下代码时:
function calculateBinary(bits) {
return [...bits].reverse().reduce(
(a, bit, i) => bit === '0' ? a : a + 2 ** i,
0
);
}
console.log(calculateBinary('11111111'));
console.log(calculateBinary('1000'));
对出了什么问题有任何想法吗?
答案 0 :(得分:0)
在前几页的 lapply
循环中使用您的代码对我有用。
library(rvest)
baseUrl <- "https://www.imdb.com/search/name/?gender=male,female&ref_=,%20desc&start="
do.call(rbind, lapply(0:5, function(i) {
url <- paste0(baseUrl, i*50 + 1)
sourceCode <- read_html(url) # read source of current URL
# scrape actor/actress name:
#HTML elements that correspond to the area with the
#actors' names are <class="lister-item header">
#Extract all such nodes from the source code
actorNodes <- html_nodes(sourceCode, ".lister-item-header")
#extract lower level nodes
actorAreas <- html_nodes(actorNodes, "a")
#extract the text that lays between <a href=...> and </a>
actor <- html_text(actorAreas)
#clean up the name by removing the \n at the end
actor <- gsub("\n", "", actor)
# scrape movie name
#HTML elements that correspond to the area with the
#movie title the actor is noted for are <class="lister-item header">
#Extract all such nodes from the source code
movieNodes<- html_nodes(sourceCode, ".text-muted.text-small")
#extract lower level nodes
movieAreas <- html_nodes(movieNodes, "a")
#extract the text that lays between <a href=...> and </a>
movies<- html_text(movieAreas)
# scrape actor/actress rank
#HTML elements that correspond to the area with the
#actors' popularity on IMDB rank are <class="lister-item header">
#Extract all such nodes from the source code
rankNodes<- html_nodes(sourceCode, ".lister-item-header")
#extract lower level nodes
rankAreas<- html_nodes(rankNodes, ".lister-item-index.unbold.text-primary")
#extract the text that lays between <a href=...> and </a>
rank<- html_text(rankAreas)
#clean up the rank by removing the period and making it numeric
rank<- gsub("\\.", "", rank)
rank<- as.numeric(rank)
# create a data.frame with data scraped from current URL:
actorData <- data.frame(actor=actor, movie=movies, rank=rank)
actorData
})) -> result
result