Scrapy不创建JSON文件

时间:2019-09-17 08:31:44

标签: python scrapy

完成蜘蛛程序后,我无法从API下载数据,错误文件中没有显示任何内容,是否有人可以提供提示

import json
import scrapy
class SpidyQuotesSpider(scrapy.Spider):
        name = 'spidyquotes'
        quotes_base_url = 'https://www.olx.co.id/api/relevance/search?category=198&facet_limit=100&location=1000001&location_facet_limit=20&page=%s'
        start_urls = [quotes_base_url % 1]
        download_delay = 1.5
        def parse(self, response):
            data = json.loads(response.body)
            for item in data.get('data', []):
                yield {
                    'car_id': item.get('id'),
                    'car_name' : item.get('title'),
                    'price': item.get('price.value.currency.display'),
                    'user_id': item.get('user_id')
                 #   'user_name':
                }
            if data['has_next']:
                next_page = data['page'] + 1
                yield scrapy.Request(self.quotes_base_url % next_page)

1 个答案:

答案 0 :(得分:1)

没有data['has_next'],只有data['metadata']['next_page_url'],因此您可以使用

        url = data['metadata']['next_page_url']
        if url:
            yield scrapy.Request(url)

或使其更安全

        metadata = data.get('metadata')
        if metadata:
           url = metadata.get('next_page_url')
           if url:
               yield scrapy.Request(url)

或者您可以使用try/except

        try:
            yield scrapy.Request(data['metadata']['next_page_url'])
        except Exception as ex:
            print("Ex:", ex)

无需创建项目即可运行的完整代码

import json
import scrapy

class MySpider(scrapy.Spider):
        name = 'spidyquotes'
        quotes_base_url = 'https://www.olx.co.id/api/relevance/search?category=198&facet_limit=100&location=1000001&location_facet_limit=20&page=%s'
        start_urls = [quotes_base_url % 1]
        download_delay = 1.5
        def parse(self, response):
            data = json.loads(response.body)
            for item in data.get('data', []):
                yield {
                    'car_id': item.get('id'),
                    'car_name' : item.get('title'),
                    'price': item.get('price.value.currency.display'),
                    'user_id': item.get('user_id')
                 #   'user_name':
                }

            metadata = data.get('metadata')
            if metadata:
                url = metadata.get('next_page_url')
                if url:
                    yield scrapy.Request(url)

# --- it runs without project and saves in `output.csv` ---

from scrapy.crawler import CrawlerProcess

c = CrawlerProcess({
    'USER_AGENT': 'Mozilla/5.0',
    # save in file as CSV, JSON or XML
    'FEED_FORMAT': 'csv',     # csv, json, xml
    'FEED_URI': 'output.csv', # 
})
c.crawl(MySpider)
c.start()