当我访问' https://baike.baidu.com/wikitag/taglist?tagId=75953'在chrome上,通过提琴手,我发现浏览器会发送一个帖子请求到https://baike.baidu.com//wikitag/api/getlemmas'。 所以我试图发送一个' POST'请求将表单数据添加到网址:' https://baike.baidu.com//wikitag/api/getlemmas'并从其响应中获取JSON数据'请求。 我通过Fiddler获取所有标题和表单数据,并尝试发送相同的' POST' python3使用请求包请求。 但即使我发送了' POST'请求具有相同的标题和表单数据,我得到一个空主体的请求(状态:200)。 由邮递员发送的同一请求'也没关系,但是通过python3我无论如何都失败了。
# -*- coding:UTF-8 -*-
import requests
def disease_json():
host = 'https://baike.baidu.com'
target = host + '/wikitag/api/getlemmas'
cookies={
'BAIDUID':'EEE35ACB030447144E615B191397065B:FG=1;PSTM=1523192637;BIDUPSID=B34DD366905D15BB907C1667346970AE;Hm_lvt_55b574651fcae74b0a9f1cf9c8d7c93a=1522304864,1522305101,1523192946,1523253565;PSINO=2;H_PS_PSSID=1990_1438_26082_21 125_22074;BDORZ=B490B5EBF6F3CD402E515D22BCDA1598'
}
headers = {
'Accept':'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'X-Requested-With':'XMLHttpRequest',
'Content-Length':'91',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Referer':'https://baike.baidu.com/wikitag/taglist?tagId=75953',
'Origin':'https://baike.baidu.com',
'Connection':'keep-alive',
'Host':'baike.baidu.com',
}
forms = {
'limit': '24',
'timeout': '3000',
'filterTags':'[]',
'tagID': '75953',
'fromLemma': 'false',
'contentLength': '40',
'page': '0',
}
req=requests.post(url=target,data=forms,verify=False,headers=headers)
print(req.text)
"""
html = json.loads(req.text)
for each in html['lemmaList']:
print('lemmaCroppedTitle:',each['lemmaCroppedTitle'])
print(req.text)
"""
def main():
disease_json()
if __name__ == '__main__':
main()
以下是浏览器发送的correct request:
答案 0 :(得分:0)
修改了content-type
和您的request payload
。还为有效负载转换添加了方法encode_multipart_data
以与multipart-form-data
import sys
import requests
def encode_multipart_data(fields):
boundary = '------WebKitFormBoundary7MA4YWxkTrZu0gW'
CRLF = '\r\n'
L = []
for key, value in fields.items():
L.append(boundary)
L.append('Content-Disposition: form-data; name="%s"\r\n' % key)
L.append(value)
L.append(boundary + "--")
body = CRLF.join(L)
return body
def disease_json():
host = 'https://baike.baidu.com'
target = host + '/wikitag/api/getlemmas'
headers = {
'Accept':'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'X-Requested-With':'XMLHttpRequest',
# changed content-type
'content-type': "multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW",
'Referer':'https://baike.baidu.com/wikitag/taglist?tagId=75953',
'Origin':'https://baike.baidu.com',
'Connection':'keep-alive',
'Host':'baike.baidu.com'
}
forms = {
'limit': '24',
'timeout': '3000',
'filterTags': '[]',
'tagId': '75953',
'fromLemma': 'false',
'contentLength': '40',
'page': '0',
}
payload = encode_multipart_data(forms)
resp = requests.post(url=target, data=payload, headers=headers)
print(resp.text)
if __name__ == '__main__':
disease_json()
答案 1 :(得分:0)
这种方式也可以解决问题。
import requests
import http.cookiejar
import json
url = "https://baike.baidu.com/wikitag/api/getlemmas"
payload = "limit=24&timeout=3000&filtetTags=%5B%5D&tagId=75953&fromLemma=false&contentLegth=40&page=0"
headers = {
'Content-Type': "application/x-www-form-urlencoded",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181\
Safari/537.36"
}
def get_cookies():
session = requests.Session()
session.cookies = http.cookiejar.LWPCookieJar("cookie")
response = session.post(url, headers=headers, data=payload, allow_redirects=False,verify=False)
session.cookies.save(ignore_discard=True, ignore_expires=True)
return response
def disease_json(times=-1):
times += 1
response = get_cookies()
if response.status_code == 302:
session = requests.session()
session.cookies = http.cookiejar.LWPCookieJar(filename='cookie')
session.cookies.load(ignore_discard=True)
url = response.headers['Location']
response = session.post(url, headers=headers, data=payload, allow_redirects=False)
json_data = response.text
print(json.loads(json_data))
print(times)
if __name__ == '__main__':
disease_json()