需要在booking.com,expedia.com,cleartrip.com和Agoda等网站上搜集所有酒店的数据(特定日期的酒店价格)。
答案 0 :(得分:0)
以下是我迄今为止尝试过的一个例子 - 目标 - 刮取酒店名称和价格
import time
from selenium import webdriver
import BeautifulSoup
#city = str(raw_input())
#now = time.time()
#then = now + 24*3600
#tommorow = time.strftime("%d %m %Y",time.localtime(then)).replace(" ","%2F")
#today = time.strftime("%d %m %Y",time.localtime(now)).replace(" ","%2F")
#url = "http://www.cleartrip.com/hotels/results?city="+city+"&chk_in="+today+"&chk_out="+tommorow+"&adults1=1&children1=0&num_rooms=1"
url = "http://www.cleartrip.com/hotels/results?city=Gurgaon&state=&country=&area=&poi=&hotelName=&dest_code=&chk_in=28%2F02%2F2015&chk_out=01%2F03%2F2015&adults1=1&children1=0&num_rooms=1"
browser = webdriver.Chrome()
browser.get(url)
content = browser.page_source
#print content
browser.quit()
soup = BeautifulSoup(content)
hotelNames = soup.find_all('a',class_="hotelDetails")
roomRents = soup.find_all('span',class_="INR")
hotelRatings = soup.find_all('li',class_="review")
names = [name.get('title') for name in hotelNames]
rents = [span.get("data-pr") for span in roomRents]
ratingss = [rating.find_all("span")[1].get("title") for rating in hotelRatings]
print names
print rents
print reviews