I am trying to crawl some info from a Discuz!(v3.3) website.(http://bbs.guitarera.com/)
I write a file A to login, and save the cookies as a file. Then I write a file B to login with the cookies saved by file A.
file A worked well.with the respond html, I can search my username(1016zym), means it is working. But then I run file B, which never worked. Can't stay logged by the saved cookies.
Wonder why?
File A: login with username and password
import requests
try:
import cookielib
except:
import http.cookiejar as cookielib
import re
from bs4 import BeautifulSoup
agent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.61 Mobile Safari/537.36'
headers = {
"Host":"bbs.guitarera.com",
"Referer":"http://bbs.guitarera.com/forum.php",
'User-Agent':agent
}
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
session.cookies.load(ignore_discard = True)
print("Cookie loaded")
except:
print("Cookie not loaded")
def login(account,password):
formhash = getFormhash()
print("formhash:",formhash)
postUrl = "http://bbs.guitarera.com/member.php?mod=logging&action=login&loginsubmit=yes&handlekey=login"
postData = {
'fastloginfield':"username",
'username':account,
'password':password,
'quickforward':'yes',
'handlekey':'ls',
'formhash':formhash
}
loginPage = session.post(postUrl,data=postData,headers=headers,allow_redirects=True)
soup = BeautifulSoup(loginPage.text)
#print(soup.prettify())
session.cookies.save()
print(soup.find(text=re.compile("1016zym")))# could found means successful logged in
return ""
def getFormhash():
url = "http://bbs.guitarera.com/member.php?mod=register"
page = session.get(url,headers=headers).text
soup = BeautifulSoup(page)
# print(soup.prettify())
k = soup.find_all(type="hidden")
r = soup.find_all("formhash")
pattern = r'name="formhash" value="(.*?)"'
formhash = re.findall(pattern, page)
if len(formhash) < 1:
print("formhash acquire failed")
return formhash[0]
if __name__ == '__main__':
print("prepare to log in")
account = '1016zym'
password = "qwer"# should be md5(password)
login(account,password)
file B: trying to stay logged with cookies
import requests
try:
import cookielib
except:
import http.cookiejar as cookielib
import re
from bs4 import BeautifulSoup
agent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.61 Mobile Safari/537.36'
headers = {
"Host":"bbs.guitarera.com",
"Referer":"http://bbs.guitarera.com/forum.php",
'User-Agent':agent
}
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='cookies')
try:
session.cookies.load(ignore_discard = True)
print("Cookie loaded")
except:
print("Cookie not loaded")
def isLogin():
url = "http://bbs.guitarera.com/forum.php"
t = session.post(url,headers=headers,allow_redirects=False)
soup = BeautifulSoup(t.text)
# print(soup.prettify())
print(soup.find(text=re.compile("1016zym")))
return
if __name__ == '__main__':
print("prepare")
isLogin()
答案 0 :(得分:1)
通过一些调试解决了这个问题。
在File A
,
session.cookies.save()
应该是
session.cookies.save(ignore_discard = True,ignore_expires=True)
这将保存文件中的所有Cookie。save()
只会保存不丢弃而不会过期的Cookie。我碰巧需要记录。
在File B
session.cookies.load(ignore_discard = True)
应该是
session.cookies.load(ignore_discard = True,ignore_expires=True)
通过这两步,您可以准确地存储/使用您获得的cookie。