到目前为止,我已经完成了以下代码。 正如我看到的那样,最初的帖子已经完成,但是在规则中定义的链接上没有成功。
class MySpider(CrawlSpider):
name = "myspider"
rules = [
Rule(
# Define links to be followed and parsed
SgmlLinkExtractor(allow=['xxx/xxx']),
callback='my_parse',
follow=True),
]
def start_requests(self):
print "start_requests"
return [FormRequest(
"http://domain/postUrl",
formdata={
'data1': '111',
'data2': '222',
},
callback=self.after_post
)]
def after_post(self, response):
print "after_post"
return
def my_parse(self, response):
print "my_parse"
return
提前多多感谢