导航正在通过POST填充表单进行 第一次抓取结果没问题。
{
def start_requests(self):
for search in self.searchs:
for u in self.start_urls:
self.cursor.execute("UPDATE motcleazloop SET nb=nb+1")
self.conn.commit()
frmdata = {"q": search}
return [FormRequest (url=u, method="POST", formdata=frmdata, callback=self.parse_o)]
def parse_o(self,response):
#count the number of forms in the response
print 'number of forms:'
print len(response.css('form'))
sel = Selector(response)
#Call item function
item = SniffyduckItem()
#Extract items to a array
item['urls'] = sel.xpath('//*[@id=\'links\']/*/*/*/*/@href').extract()
# Process item
yield item
# Call next job of the robot
yield self.loop_a(response)
def loop_a(self,response):
# print response.body
nbforms = len(response.css('form'))
if nbforms == 3:
print "4"
yield FormRequest.from_response (response, formnumber=2, callback=self.parse_o)
elif nbforms == 4:
print "3"
yield FormRequest.from_response (response, formnumber=3, callback=self.parse_o)
else:
# if only 2 forms
exit(0)
}
错误:
2018-02-24 10:50:06 [scrapy.core.scraper]错误:蜘蛛必须返回 Request,BaseItem,dict或None,得到了' list'在
答案 0 :(得分:0)
返回FormRequest而不是[FormRequest],你应该删除[]