我想登录一个网站,然后删除一些详细信息,但是无法使用scrapy登录。 这是我的代码:
from scrapy import Spider
from scrapy.http import FormRequest
from lxml import html
from scrapy.utils.response import open_in_browser
class QuotesSpider(Spider):
name = 'quotes'
start_urls = ('https://app.thecoachingmanual.com/login',)
def parse(self, response):
return FormRequest.from_response(response,
formdata={'emailAddress':'email@gmail.com','password':'MyPassword'},
callback=self.scrape_pages)
def scrape_pages(self, response):
open_in_browser(response)
# Complete your code here to scrape the pages that you are redirected to after logging in
# ....
# ....
答案 0 :(得分:0)
您的代码完全错误
打开https://app.thecoachingmanual.com/login,打开DevTools
,然后单击Network
标签,然后选中Preserve logs
查看在哪个URL请求上发送,并分析Form Data
以查看正在发送哪些字段
from scrapy.http import FormRequest
import logging
class QuotesSpider(Spider):
name = 'quotes'
headers = {
'origin': 'https://app.thecoachingmanual.com',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', #NOTICE THIS
'accept': 'application/json, text/javascript, */*; q=0.01',
'referer': 'https://app.thecoachingmanual.com/',
'authority': 'api.thecoachingmanual.com',
'dnt': '1',
}
def start_requests(self):
data = '{"emailAddress":"test@gmail.com","password":"test"}'
yield FormRequest('https://api.thecoachingmanual.com/commandapi/auth/login',
callback=self.login,
headers=self.headers,
formdata=data,
)
def login(self, response):
logging.info(response.status)
logging.info(response.text)