下面是代码。奇怪的是昨天我可以毫无问题地运行它,但今天总是返回字符(0)。查了一下,发现是因为 html_nodes
行。
我尝试用其他节点替换“.photo-cards li article”,还是不行。
有人遇到同样的问题并解决了吗?预先感谢您的帮助!
library(tidyverse)
library(rvest)
links <- sprintf("https://www.zillow.com/sacramento-ca/%d_p", 1:11)
results <- map(links, ~ {
# http://selectorgadget.com/
# <body
# class="photo-cards
houses <- read_html(.x) %>%
html_nodes(".photo-cards li article")
z_id <- houses %>%
html_attr("id")
address <- houses %>%
html_node(".list-card-addr") %>%
html_text()
price <- houses %>%
html_node(".list-card-price") %>%
html_text() %>%
readr::parse_number()
params <- houses %>%
html_node(".list-card-info") %>%
html_text()
# number of bedrooms
beds <- params %>%
str_extract("\\d+(?=\\s*bds)") %>%
as.numeric()
# number of bathrooms
baths <- params %>%
str_extract("\\d+(?=\\s*ba)") %>%
as.numeric()
# total square footage
house_a <- params %>%
str_extract("[0-9,]+(?=\\s*sqft)") %>%
str_replace(",", "") %>%
as.numeric()
tibble(price = price, beds= beds, baths=baths, house_area = house_a)
}
) %>%
bind_rows(.id = 'page_no')