我可以登录,但是之后,当我想从其他URL获取信息时,它说我需要登录。 这是登录名:
def login(username, password):
headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) snap Chromium/76.0.3809.100 Chrome/76.0.3809.100 Safari/537.36'
}
client = requests.Session()
login_url = 'https://quera.ir/accounts/login'
client.get(login_url)
csrftoken = client.cookies['csrf_token']
payload = {
'csrfmiddlewaretoken': csrftoken,
'login': username,
'password': password,
'next': '/dashboard/'
}
r = client.post(login_url, data=payload, headers=dict(Referer=login_url))
print(username, 'has logged in')
return r
这是抓取部分:
def get_problemsets(request, url, start=1, stop=1):
page_url = url
pattern = 'https://quera.ir/problemset/contest/'
result = list()
ids = list()
for page in range(start, stop+1):
page_url += str(page)
r = request.get(page_url)
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find(
'table', {'class': 'ui striped center aligned unstackable table'}).find_all('tr')[1:-1] # hazfe satre headers, paginations
for i in table:
id_num = int(i.find('td', class_='collapsing').text)
ids.append(id_num)
result.append(pattern + str(id_num))
return ids