进口沙皮 导入html5lib TourSpider(scrapy.Spider)类: 名称=“游览”
def start_requests(self):
urls = [
'http://www.traveldglobe.com/tour']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
categorys = response.css(".col-sm-4.js-cards::text").extract()
for category in categorys:
title = category.div.img["alt"]
print title
info = category.css(".tag::text").extract()
# accomodation available
acc = info[0].text
trim_acc = acc.split(":")
accomodation_available = trim_acc[1].strip()
print accomodation_available
# difficulty
diff = info[1].text
trim_diff = diff.split(":")
difficulty = trim_diff[1].strip()
print difficulty
# Tour type
type = info[2].text
trim_type = type.split(":")
tour_type = trim_type[1].strip()
print tour_type
# location
loc = info[3].text
trim_loc = loc.split(":")
location = trim_loc[1].strip()
print location
# holidays
info2 = category.css(".content::text").extract()
days = info2[0].text
trim_days = "".join(days.split(" "))
final_days = [i for i in trim_days if i.isdigit()]
holiday = final_days[0] + " days/ " + final_days[1] + " Night"
print holiday
# price
price = category.css(".offer::text").extract()
trim_price = price[0].text
digit = [i for i in trim_price if i.isdigit()]
final_price = "Rs." + "".join(str(i) for i in digit)
print final_price
#--编码:utf-8--
#在此定义您所刮取物品的模型 # #请参阅以下文档: #https://doc.scrapy.org/en/latest/topics/items.html
进口沙皮
Webscrap2Item(scrapy.Item)类:
categorys= scrapy.Field()
title=scrapy.Field()
info=scrapy.Field()
info2=scrapy.Field()
price=scrapy.Field()