我不知道我做错了什么,或者我应该做些什么。由于最终的抓取程序,我只是没有得到MyData.csv文件。而且我需要帮助来构建更复杂的搜寻器,对于可能的每个页面,它们都将在页面中的每个区域,房屋等中导航,并废弃它们的详细信息(例如,页面sreality.cz/detail/prodej/byt/ 3 + kk /…)。我想获得的其他字段是:详细说明和下面第一个表中的一些信息。
library(rvest)
library(xml2)
library(stringr)
library(tibble)
library(dplyr)
url = 'https://www.sreality.cz/hledani/prodej/byty?strana='
main_page = read_html('https://www.sreality.cz/hledani/prodej/byty?strana=')
list.of.pages = str_c(url, 1:30)
name = function(url) {
data = html_nodes(url, css=".name.ng-binding") %>%
html_text()
return(data)
}
locality = function(url) {
data = html_nodes(url, css=".locality.ng-binding") %>%
html_text()
return(data)
}
normprice = function(url) {
data = html_nodes(url, css=".norm-price.ng-binding") %>%
html_text()
return(data)
}
sreality_url = function(url) {
data = html_nodes(url, css=".title") %>%
html_attr("href")
return(data)
}
get.data.table = function(html) {
name = name(html)
locality = locality(html)
normprice = normprice(html)
hrefs = sreality_url(html)
combine = tibble(adtext = name,
loc = locality,
price = normprice,
URL = sreality_url)
combine %>%
select(adtext, loc, price, URL)
return(combine)
}
scrape.all = function(urls) {
list.of.pages %>%
lapply(get.data.table) %>%
bind_rows() %>%
write.csv(file = 'MyData.csv')
}