如何在scrapy的itemloader中传递列表?

时间:2018-09-11 10:00:49

标签: python scrapy

我很难获取图片网址。我如何在itemloader中获取image_url?如果生成的字典的值为image_url(没有ItemLoader),则效果很好。

def parse_property(自身,响应):

    l = ItemLoader(item=PropertiesItem(), response=response)
    url=response.meta.get('URL')
    l.add_value('url', response.url)
    l.add_xpath('title', '//div[@class="property- 
                 title"]/h1/text()',MapCompose(str.strip, str.title))
    l.add_xpath('offering', '//span[@class="property-badge"]/text()',MapCompose(str.strip))
    l.add_xpath('area', '//ul[@class="property-main-features"]/li[contains(text(), "Area")]/span/text()',MapCompose(str.strip))
    l.add_xpath('rooms', '//ul[@class="property-main-features"]/li[contains(text(), "Rooms")]/span/text()',MapCompose(str.strip))
    l.add_xpath('bed_room', '//ul[@class="property-main-features"]/li[contains(text(), "Bedroom")]/span/text()',MapCompose(str.strip))
    l.add_xpath('bath_room', '//ul[@class="property-main-features"]/li[contains(text(), "Bathroom")]/span/text()',MapCompose(str.strip))
    #l.add_value('description', '//div[@class="description-div"]/ul/li/text()')
    #l.add_value('description', '//div[@class="description-div"]/p/text()')
    #l.add_value('description', '//div[@class="description-div"]/descendant::*/text()')
    l.add_xpath('ground_floor', '//*/strong[contains(text(), "Ground Floor")]/following-sibling::text()',MapCompose(str.strip))
    l.add_xpath('first_floor', '//*/strong[contains(text(), "1st Floor")]/following-sibling::text()',MapCompose(str.strip))
    l.add_xpath('top_floor', '//*/strong[contains(text(), "Top Floor")]/following-sibling::text()',MapCompose(str.strip))
    l.add_xpath('facing', '//div[@class="additional-details"]/p[contains(text(), "Facing")]/text()',MapCompose(str.strip))
    l.add_xpath('location', '//div[@class="additional-details"]/p[contains(text(), "Location")]/text()',MapCompose(str.strip))
    l.add_xpath('building_age', '//div[@class="additional-details"]/p[contains(text(), "Building Age")]/span/text()',MapCompose(str.strip))
    l.add_xpath('parking', '//div[@class="additional-details"]/p[contains(text(), "Parking")]/text()',MapCompose(str.strip))
    l.add_xpath('floors', '//div[@class="additional-details"]/p[contains(text(), "floors")]/text()',MapCompose(str.strip))
    l.add_xpath('area1', '//div[@class="additional-details"]/p[contains(text(), "Area")]/text()',MapCompose(str.strip))
    images=response.xpath('//div[@class="col-md-12"]/descendant::img/@src').extract()
    image=("https://www.epropertynepal.com" + image for image in images)
    images=[i.split('?')[0] for i in image]
    image_url=[x for x in images if 'original' in x]
    #l.add_value('image_url', self.image_url)
    l.add_xpath('price', '//div[@class="property-pricing"]/div[1]/text()')

    return l.load_item()

1 个答案:

答案 0 :(得分:0)

images=[i.split('?')[0] for i in image]
image_url=[x for x in images if 'original' in x]

l.add_value('images', image_url)

这会很好