我尝试使用formrequest方法通过scrapy自动化登录表单。但是,我使用的网站不使用简单的HTML表单" fieldset" contianing different" divs" (其中两个是用户名和密码)。我想要定位这些字段并输入我自己的文本。以下是我查看文档后的内容
import scrapy
from loginform import fill_login_form
from scrapy.http import FormRequest
class PollSpider(scrapy.Spider):
genres = [""]
login_user = "drexel"
login_pass = "dragons"
name = "poll"
start_urls = ["http://www.pollstarpro.com/home"]
def parse(self, response):
divs = response.xpath('//div')
for inp in divs.xpath('input'):
print(inp) #Typo
return scrapy.FormRequest.from_response(
response,
formname='fieldset',
formdata={'ctl11$userNameText': 'drexel', 'ctl11$passwordText': 'fuck'},
callback=self.after_login
)
def after_login(self, response):
if "Invalid" in response.body:
print("fucked up")
else:
print("success!")
#scrape away!
这不仅不起作用,而且我的控制台正在打印"成功"即使输入的密码不正确。任何帮助将不胜感激!
答案 0 :(得分:1)
我认为这个问题来自于body属性。试试这个。 还有一些我修复的错误
import scrapy
from loginform import fill_login_form
from scrapy.http import FormRequest
from scrapy.selector import Selector #FIXED
class PollSpider(scrapy.Spider):
genres = [""]
login_user = "drexel"
login_pass = "dragons"
name = "poll"
start_urls = ["http://www.pollstarpro.com/home"]
def parse(self, response):
s = Selector(response) #FIXED
for inp in s.xpath('//div/input'): #REFACTORED
print(in)
return scrapy.FormRequest.from_response(
response,
formname='fieldset',
formdata={'ctl11$userNameText': 'drexel', 'ctl11$passwordText': 'fuck'},
callback=self.after_login
)
def after_login(self, response):
if u"Invalid" in response.body_as_unicode():
print("failed")
else:
print("success!")
#scrape away!