我做错了什么?我以前曾抓过网站,但这个有效载荷对我来说很难。
from requests import session
from bs4 import BeautifulSoup as bs
payload = {
"p_flow_id" : 838 ,
"p_flow_step_id": 4 ,
"p_instance" :1282563643133 ,
"p_page_submission_id" :4990502617229,
"p_request" : ""
}
with session() as c:
c.post('http://nid.usace.army.mil/cm_apex/f?p=838:4:0::NO/wwv_flow.accept', data=payload)
url = c.get('http://nid.usace.army.mil/cm_apex/f?p=838:4:0::NO/wwv_flow.accept')
html = url.content
soup = bs(html)
答案 0 :(得分:0)
您的第一个请求返回404错误:
from bs4 import BeautifulSoup as bs
payload = {
"p_flow_id" : 838 ,
"p_flow_step_id": 4 ,
"p_instance" :1282563643133 ,
"p_page_submission_id" :4990502617229,
"p_request" : ""
}
with session() as c:
print 'in'
r1 = c.post('http://nid.usace.army.mil/cm_apex/f?p=838:4:0::NO/wwv_flow.accept', data=payload)
print r1.status_code # 404
url = c.get('http://nid.usace.army.mil/cm_apex/f?p=838:4:0::NO/wwv_flow.accept')
html = url.content
soup = bs(html,'lxml')
print soup