我以前使用splash form请求登录其中一个站点。但是,开发人员改变了它,增加了更多的javascript,而我无法弄清楚我做错了什么。我添加了javascript,也用于该网站。
class MySpider(scrapy.Spider):
name = "lost"
start_urls = ["mysite",] ###########changed main loggin form
def start_requests(self):
for url in self.start_urls:
yield SplashRequest(
url,
self.parse,
args={'wait': 1},
)
def parse(self, response):
return SplashFormRequest.from_response(
response,
formdata={'mail': 'mymail', 'pass': 'mypasswd'},
callback=self.after_login
)
def after_login(self,response):
print('This is body '+response.body+' The end of body')
### Going to film list ######
if "Username" in response.body:
self.logger.error("##Success##")
使用Javascript:
$(document).ready(function(){
$('input[name="mail"],input[name="pass"]').keydown(function (e)
{
if(e.keyCode == 13)
{
login();
}
});
});
function login()
{
mail = $('input[name="mail"]').val();
pass = $('input[name="pass"]').val();
if($('input[name="rem"]:checked').length)
rem = 1;
else
rem = 0;
if(mail.length && pass.length > 5)
{
metrikaEvents('LOGIN');
console.log('OK!');
$.ajax({
type: "POST",
url: "/ajaxik.php",
dataType : "json",
data:
{
act:'users',
type:'login',
mail:encodeURIComponent(mail),
pass:encodeURIComponent(pass),
rem:encodeURIComponent(rem)
},
success: function(msg)
{
if(msg.result == 'ok')
{
if(msg.error)
{
switch(msg.error)
{
default:
text = lf_config.errors.user.login_error;
break;
}
ntfctn(text,'error');
}
else if(msg.success)
{
ntfctn(msg.name+lf_config.notifications['user_login'],'information');
setTimeout('goTo("/",false)',1000);
// goTo('/',false);
}
}
},
});
}
}
function loginTogglePass(t)
{
if($('input[name="'+t+'"]').attr('type') == 'password')
{
$('input[name="'+t+'"]').attr('type','text');
$('input[name="'+t+'"]').prev('div.eye- icon').removeClass('closed').addClass('opened');
}
else
{
$('input[name="'+t+'"]').attr('type','password');
$('input[name="'+t+'"]').prev('div.eye-icon').removeClass('opened').addClass('closed');
}
}
我看到javascript正在寻找"输入"关键。但点击按钮也应该有效。谁能让我朝着正确的方向前进?感谢
答案 0 :(得分:3)
从这个主题借来的想法
enter Scrapy + splash: can't select element
使用formdata登录,使用splash并逐个检测页面元素
class MySpider(scrapy.Spider):
name = "lost"
allowed_domains = ["mydomain"]
start_urls = ['myurl']
req = 10
series = {}
def start_requests(self):
script = """
function main(splash)
local url = splash.args.url
assert(splash:go(url))
assert(splash:wait(10))
splash:set_viewport_full()
local search_input = splash:select('input[name=mail]')
search_input:send_text("email")
local search_input = splash:select('input[name=pass]')
search_input:send_text("password")
assert(splash:wait(5))
local submit_button = splash:select('input[class^=primary-btn]')
submit_button:click()
assert(splash:wait(10))
return {
html = splash:html(),
png = splash:png(),
}
end
"""
yield SplashRequest(
'myurl',
callback = self.after_login, ###inserting callabck
endpoint='execute',
args={
'lua_source': script,
'ua': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36"
}
)
def parse(self, response):
script = response.body