它运行时没有任何错误,但是请求输出是异步的。
我想要的输出是after_timeline函数中for循环中的每个url,另外的函数被调用 然后返回并进一步调用函数等等。但是现在结果是不规则的,这意味着第一个URL的结果会首先显示,并且也缺少一些结果。请帮帮我吗?
def scroll_page(self,response):
timeline_url = response.xpath("//div[@class='cr d cg']/a/@href").extract_first()
new_timeline = response.urljoin(timeline_url)
yield scrapy.Request(url=new_timeline,callback=self.after_timeline)
def after_timeline(self,response):
photos_url = response.xpath("//*[contains(text(),'Full Story')]/@href").extract()
for photo in photos_url:
new_story_url = "https://mobile.facebook.com" + photo
yield scrapy.Request(url=new_story_url,callback=self.after_story)
def after_story(self,response):
friend_page = response.xpath("//a[@class='_45m8']/@href").extract_first()
global no_likes
no_likes = response.xpath("//div[@class='_1g06']/text()").extract_first()
#https://static.xx.fbcdn.net/rsrc.php/v3/yv/r/dOJFaVZihS_.png
new_friend_url = "https://mobile.facebook.com" + friend_page
yield scrapy.Request(url=new_friend_url,callback=self.friends)
def friends(self,response):
#top_conn ={}
friend_list = response.xpath("//h3[@class='bd']").extract()
#print(friend_list)
for friend in friend_list:
sel = Selector(text=friend)
fbid= sel.xpath(".//a/@href").extract_first(),
name= sel.xpath(".//a/text()").extract_first()
print(name)
print(response.url)
para = '?limit=200'
#print(para)
#inspect_response(response, self)
url = response.xpath("//*[contains(text(),'See more')]/parent::*/@href").extract_first()
if url is not None:
url=url.replace("?limit=10",para)
kaint_url = "https://mobile.facebook.com" + url
yield scrapy.Request(url=kaint_url,callback=self.fetch_fr)
def fetch_fr(self,response):
friend_list = response.xpath("//h3[@class='bd']").extract()
for friend in friend_list:
sel = Selector(text=friend)
fbid= sel.xpath(".//a/@href").extract_first(),
name= sel.xpath(".//a/text()").extract_first()
print(name)
print(response.url)