我想废除TripAdvisor的一些酒店点评。以下代码为我提供了所有页面的巨型案例酒店评论的所有评论。然而,当我添加费率,报价和费率时,我只获得第一页评论。重申一下,如果没有这3个项目,我会将所有评论从第1页到第60页等等。有人可以帮助我吗?
#Load library
library(tm)
library(stringr)
library(rvest)
df <- data.frame(Date=as.Date(character()), File=character(),User=character(), ?stringsAsFactors=FALSE)
x <- 0
for(i in c(1:500)){
url <- ""
if(x == 0){
url <- "https://www.tripadvisor.co.uk/Attraction_Review-g209948-d189773-Reviews-Giant_s_Causeway-Bushmills_County_Antrim_Northern_Ireland.html"
x <- x + 10
} else{
url <- paste("https://www.tripadvisor.com/Attraction_Review-g209948-d189773-Reviews-or",x,"-Giant_s_Causeway-Bushmills_County_Antrim_Northern_Ireland.html#REVIEWS", sep = "")
x <- x + 10
}
reviews <- url %>%
read_html() %>%
html_nodes("#REVIEWS .innerBubble")
id <- reviews %>%
html_node(".quote a") %>%
html_attr("id")
review <- reviews %>%
html_node(".entry .partial_entry") %>%
html_text()
#quote <- reviews %>%
#html_node(".quote span") %>%
#html_text()
#rating <- reviews %>%
#html_node(".rating span") %>%
#html_attr("class") %>%
#gsub("ui_bubble_rating bubble_", "", .) %>%
#gsub("0", "", .) %>%
#as.integer()
#date <- reviews %>%
#html_node(".rating .ratingDate") %>%
#html_attr("title") %>%
#strptime("%d %b %Y") %>%
#as.POSIXct()
if(nrow(df) == 0){
df <- data.frame(id, review, stringsAsFactors = FALSE)
}
else{
temp <- df
df <- rbind(temp, data.frame(id, review, stringsAsFactors = FALSE))
}
}