循环请求不会按顺序执行请求

时间:2019-09-15 06:49:18

标签: python web-scraping scrapy

它运行时没有任何错误,但是请求输出是异步的。

我想要的输出是after_timeline函数中for循环中的每个url,另外的函数被调用 然后返回并进一步调用函数等等。但是现在结果是不规则的,这意味着第一个URL的结果会首先显示,并且也缺少一些结果。请帮帮我吗?

def scroll_page(self,response):
        timeline_url = response.xpath("//div[@class='cr d cg']/a/@href").extract_first()
        new_timeline = response.urljoin(timeline_url)
        yield scrapy.Request(url=new_timeline,callback=self.after_timeline)

    def after_timeline(self,response):
        photos_url = response.xpath("//*[contains(text(),'Full Story')]/@href").extract()
        for photo in photos_url:
            new_story_url = "https://mobile.facebook.com" + photo
            yield scrapy.Request(url=new_story_url,callback=self.after_story)

    def after_story(self,response):
        friend_page = response.xpath("//a[@class='_45m8']/@href").extract_first()
        global no_likes
        no_likes = response.xpath("//div[@class='_1g06']/text()").extract_first()
        #https://static.xx.fbcdn.net/rsrc.php/v3/yv/r/dOJFaVZihS_.png
        new_friend_url = "https://mobile.facebook.com" + friend_page
        yield scrapy.Request(url=new_friend_url,callback=self.friends)


    def friends(self,response):
        #top_conn ={}
        friend_list = response.xpath("//h3[@class='bd']").extract()
        #print(friend_list)
        for friend in friend_list:
            sel = Selector(text=friend)
            fbid= sel.xpath(".//a/@href").extract_first(),
            name= sel.xpath(".//a/text()").extract_first()
            print(name)
        print(response.url)
        para = '?limit=200'
        #print(para)
        #inspect_response(response, self)
        url = response.xpath("//*[contains(text(),'See more')]/parent::*/@href").extract_first()
        if url is not None:
            url=url.replace("?limit=10",para)
            kaint_url = "https://mobile.facebook.com" + url
            yield scrapy.Request(url=kaint_url,callback=self.fetch_fr)

    def fetch_fr(self,response):
        friend_list = response.xpath("//h3[@class='bd']").extract()
        for friend in friend_list:
            sel = Selector(text=friend)
            fbid= sel.xpath(".//a/@href").extract_first(),
            name= sel.xpath(".//a/text()").extract_first()
            print(name)
        print(response.url)

0 个答案:

没有答案