用刮y的蟒蛇刮{

时间:2019-06-25 07:18:42

标签: python web-scraping request

大家好,你好吗?

我不知道自己在做错什么,但是没有按照我想要的顺序(从上到下,从第一个链接开始)获取数据。我认为这是规则的问题。

也有一些不受欢迎的东西,例如邮件,电话和图表

from scrapy.item import Field,Item
from scrapy.spiders import CrawlSpider,Rule
from scrapy.linkextractors import LinkExtractor
from scrapy.loader import ItemLoader
from scrapy.loader.processors import MapCompose

class PropietiItem(Item):
titulo=Field()
tipo= Field()
reference=Field()
agente= Field()
company=Field()
orn=Field()
brn=Field()
precio=Field()
tipodepropiedad=Field()
trakheesi=Field()
bedrooms=Field()
bathrooms=Field()
furnishings=Field()
area=Field()
amenities=Field()
fecha=Field()
descripcion=Field()
trendsandprice=Field()
averagerent=Field()
averagesize=Field()
script=Field()
grafico=Field()
telefono=Field()
mail=Field()






class PropietiCrawler(CrawlSpider):
    name= "MiPrimerCrawler"
    start_urls= ['https://www.propertyfinder.ae/en/search?c=2&l=1&ob=nd&page=1&rp=y']
    allowed_domains= ['propertyfinder.ae']

    rules= (
    Rule(LinkExtractor(allow=r'page=')),
    Rule(LinkExtractor(allow=r'rent/'),callback= 'parse_items'),
)

def parse_items(self,response):
    item= ItemLoader(PropietiItem(),response)
    item.add_xpath('titulo','/html/body/main/div[1]/div/div[2]/div[2]/div[1]/div/h1/text()')
    item.add_xpath('tipo','/html/body/main/div[1]/div/div[2]/div[2]/div[1]/div/div/h2/text()')
    item.add_xpath('reference','/html/body/main/div[1]/div/div[2]/div[2]/div[1]/div/div/div/strong/text()')
    item.add_xpath('agente','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div[2]/div[1]/div[2]/a/text()')
    item.add_xpath('company','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div[2]/div[2]/div[2]/text()')
    item.add_xpath('orn','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')
    item.add_xpath('brn','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[2]/div[1]/div[1]/div[2]/div[4]/div/text()')
    item.add_xpath('precio','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[1]/div[2]/div/div/span[1]/text()')
    item.add_xpath('tipodepropiedad','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[2]/div[2]/text()')
    item.add_xpath('trakheesi','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[4]/div[2]/text()')
    item.add_xpath('bedrooms','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[5]/div[2]/text()')
    item.add_xpath('bathrooms','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[6]/div[2]/text()')
    item.add_xpath('furnishings','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[7]/div[2]/text()')
    item.add_xpath('area','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[1]/div/div/div[8]/div[2]/text()')
    item.add_xpath('amenities','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[2]/div/div[1]/text()')
    item.add_xpath('fecha','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[2]/div[2]/div/div[2]/text()')
    item.add_xpath('descripcion','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[4]/text()')
    item.add_xpath('trendsandprice','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[8]/div/div[2]/h3/text()')
    item.add_xpath('averagerent','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[8]/div/div[3]/div[1]/div[1]/div[2]/strong/text()')
    item.add_xpath('averagesize','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[8]/div/div[3]/div[1]/div[2]/div[2]/strong/text()')
    item.add_xpath('script','/html/body/script[5]/text()')
    item.add_xpath('grafico','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[1]/div[8]/div/div[2]/div/div/div/svg/g[12]/g/text()')
    item.add_xpath('telefono','/html/body/main/div[1]/div/div[2]/div[2]/div[3]/div[2]/div[1]/div[2]/div[2]/div[1]/div/div/div/span/span[2]/text()')
    yield item.load_item()

我的想法是:在每个链接中输入页面的链接(所有链接房屋,直到第20页),获取一些信息,例如代理商名称,电话,邮件,价格,房屋,面积以及下图。 您是最棒的,我喜欢这个论坛以及人们之间的相互帮助

0 个答案:

没有答案