我试图提取tripadvisor酒店评论

时间:2017-09-18 11:15:24

标签: r

这里我试图从tripadvisor网站上提取酒店评论。我得到了我的输出,但我还需要一个细节,即review picture

我需要价值评级和氛围以及所有..

这是检验代码:

inspect code

这是我的代码:

library(rvest)

url <- "https://www.tripadvisor.in/Restaurant_Review-g297584-d4058953-Reviews-Corbyn_s_Delight-Port_Blair_South_Andaman_Island_Andaman_and_Nicobar_Islands.html"

morepglist <-seq(10,47,10)

pickhotel <- url
# get list of urllinks corresponding to different pages

# url link for first search page
urllinkmain=pickhotel
# counter for additional pages
morepg=as.numeric(morepglist)

urllinkpre=paste(strsplit(urllinkmain,"Reviews-")[[1]][1],"Reviews",sep="")
urllinkpost=strsplit(urllinkmain,"Reviews-")[[1]][2]

urllink=rep(NA,length(morepg)+1)

urllink[1]=urllinkmain
for(i in 1:length(morepg)){
  urllink[i+1]=paste(urllinkpre,"-or",morepg[i],"-",urllinkpost,sep="")
}
head(urllink)
REview_Data <- data.frame()
for (i in 1:5) {
  reviews <- urllink[i] %>%
    read_html() %>%
    html_nodes("#REVIEWS .innerBubble")
  
  usercount <- urllink[i] %>%
    read_html() %>%
    html_nodes("#REVIEWS .memberBadgingNoText")
  
  id <- reviews %>%
    html_node(".quote a") %>%
    html_attr("id")
  
  quote <- reviews %>%
    html_node(".quote span") %>%
    html_text()
  
  value_rating <- reviews %>%
    html_node("li .recommend-answer") %>%
    html_attr("li") %>%
    gsub("ui_bubble_rating bubble_40", "", .) %>%
    gsub("0", "", .) %>%
    as.integer()
  
  
  rating <- reviews %>%
    html_node(".rating span") %>%
    html_attr("class") %>%
    gsub("ui_bubble_rating bubble_", "", .) %>%
    gsub("0", "", .) %>%
    as.integer()
  
  date <- reviews %>%
    html_node(".rating .ratingDate") %>%
    html_attr("title") %>%
    strptime("%d %b %Y") %>%
    as.POSIXct()
  
  review <- reviews %>%
    html_node(".entry .partial_entry") %>%
    html_text()
  webpage <- read_html(urllink[i])
  
  count <- webpage %>%
    html_node(".see_all_count") %>%
    html_text()
  
  things <- webpage %>%
    html_node("span .header_popularity") %>%
    html_text()
  
  Noofcontribution <- usercount %>%
    html_node(".badgetext") %>%
    html_text()
  
  Noofusefulvotes <- usercount %>%
    html_node(".badgetext:last-child") %>%
    html_text()
  Restaurant_Name<-webpage%>%
    html_node(".heading_title")%>%
    html_text()
  REview_Data <- rbind(REview_Data, data.frame(Restaurant_Name,id, quote,value_rating ,rating,date, review,count,things,Noofcontribution,Noofusefulvotes, stringsAsFactors = FALSE))
}

REview_Data
write.csv(REview_Data,"F:/R_Studio/Restaurant3.csv")

我无法得到我的输出..

我尽我所能。但我不知道如何从那个特定的地方提取气泡值..我成功地从所有评级中提取它使用跨度......

但是我不能......

1 个答案:

答案 0 :(得分:1)

这对我来说很好。

write.table(REview_Data, file = "C:/your_path_here/foo.csv", sep = ",", col.names = NA,
            qmethod = "double")