Python3请求正确发布但什么都没有(但浏览器没问题)

时间:2018-04-11 06:00:34

标签: python-3.x python-requests http-post postman

当我访问' https://baike.baidu.com/wikitag/taglist?tagId=75953'在chrome上,通过提琴手,我发现浏览器会发送一个帖子请求到https://baike.baidu.com//wikitag/api/getlemmas'。 所以我试图发送一个' POST'请求将表单数据添加到网址:' https://baike.baidu.com//wikitag/api/getlemmas'并从其响应中获取JSON数据'请求。 我通过Fiddler获取所有标题和表单数据,并尝试发送相同的' POST' python3使用请求包请求。 但即使我发送了' POST'请求具有相同的标题和表单数据,我得到一个空主体的请求(状态:200)。 由邮递员发送的同一请求'也没关系,但是通过python3我无论如何都失败了。

# -*- coding:UTF-8 -*-
import requests

def disease_json():
    host = 'https://baike.baidu.com'
    target = host + '/wikitag/api/getlemmas'
    cookies={
     'BAIDUID':'EEE35ACB030447144E615B191397065B:FG=1;PSTM=1523192637;BIDUPSID=B34DD366905D15BB907C1667346970AE;Hm_lvt_55b574651fcae74b0a9f1cf9c8d7c93a=1522304864,1522305101,1523192946,1523253565;PSINO=2;H_PS_PSSID=1990_1438_26082_21 125_22074;BDORZ=B490B5EBF6F3CD402E515D22BCDA1598'
    }

    headers = {
        'Accept':'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding':'gzip, deflate, br',
        'Accept-Language':'zh-CN,zh;q=0.8',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'X-Requested-With':'XMLHttpRequest',
        'Content-Length':'91',
        'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
        'Referer':'https://baike.baidu.com/wikitag/taglist?tagId=75953',
        'Origin':'https://baike.baidu.com',
        'Connection':'keep-alive',
        'Host':'baike.baidu.com',
    }
    forms = {
        'limit': '24',
        'timeout': '3000',
        'filterTags':'[]',
        'tagID': '75953',
        'fromLemma': 'false',
        'contentLength': '40',
        'page': '0',
    }
    req=requests.post(url=target,data=forms,verify=False,headers=headers)
    print(req.text)

    """
      html = json.loads(req.text)
    for each in html['lemmaList']:
        print('lemmaCroppedTitle:',each['lemmaCroppedTitle'])
    print(req.text)
    """
def main():
    disease_json()



if __name__ == '__main__':
    main()

以下是浏览器发送的correct request

2 个答案:

答案 0 :(得分:0)

修改了content-type和您的request payload。还为有效负载转换添加了方法encode_multipart_data以与multipart-form-data

保持一致
import sys
import requests

def encode_multipart_data(fields):
    boundary = '------WebKitFormBoundary7MA4YWxkTrZu0gW'
    CRLF = '\r\n'
    L = []
    for key, value in fields.items():
        L.append(boundary)
        L.append('Content-Disposition: form-data; name="%s"\r\n' % key)
        L.append(value)

    L.append(boundary + "--")
    body = CRLF.join(L)
    return body

def disease_json():
    host = 'https://baike.baidu.com'
    target = host + '/wikitag/api/getlemmas'

    headers = {
        'Accept':'application/json, text/javascript, */*; q=0.01',
        'Accept-Encoding':'gzip, deflate, br',
        'Accept-Language':'zh-CN,zh;q=0.8',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'X-Requested-With':'XMLHttpRequest',

        # changed content-type
        'content-type': "multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW",
        'Referer':'https://baike.baidu.com/wikitag/taglist?tagId=75953',
        'Origin':'https://baike.baidu.com',
        'Connection':'keep-alive',
        'Host':'baike.baidu.com'
    }
    forms = {
        'limit': '24',
        'timeout': '3000',
        'filterTags': '[]',
        'tagId': '75953',
        'fromLemma': 'false',
        'contentLength': '40',
        'page': '0',
    }
    payload = encode_multipart_data(forms)

    resp = requests.post(url=target, data=payload, headers=headers)
    print(resp.text)



if __name__ == '__main__':
    disease_json()

答案 1 :(得分:0)

这种方式也可以解决问题。

import requests
import http.cookiejar
import json

url = "https://baike.baidu.com/wikitag/api/getlemmas"
payload = "limit=24&timeout=3000&filtetTags=%5B%5D&tagId=75953&fromLemma=false&contentLegth=40&page=0"
headers = {
    'Content-Type': "application/x-www-form-urlencoded",
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181\
     Safari/537.36"
}


def get_cookies():
    session = requests.Session()
    session.cookies = http.cookiejar.LWPCookieJar("cookie")
    response = session.post(url, headers=headers, data=payload, allow_redirects=False,verify=False)
    session.cookies.save(ignore_discard=True, ignore_expires=True)
    return response


def disease_json(times=-1):
    times += 1
    response = get_cookies()
    if response.status_code == 302:
        session = requests.session()
        session.cookies = http.cookiejar.LWPCookieJar(filename='cookie')
        session.cookies.load(ignore_discard=True)
        url = response.headers['Location']

        response = session.post(url, headers=headers, data=payload, allow_redirects=False)
    json_data = response.text
    print(json.loads(json_data))
    print(times)


if __name__ == '__main__':
    disease_json()