我试图从
here。
使用 Beatifulsoup 结果我得到了空的 [ ]List。问题是我要抓取的数据在 viewpagesource 中不可用。在 Developertool > network 中,content-type 为 JSON。所以我尝试使用以下代码:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import json
url = 'https://ngc.taleo.net/careersection/ng_pro_intl_aujobs/jobsearch.ftl?lang=en_GB&location=756140022608&radiusType=K&searchExpanded=true&radius=1&portal=34140031600&_ga=2.197392303.1699610010.1604351575-1311873605.1579627290'
s = requests.Session()
cookies = {
'locale': 'en-GB',
'_gcl_au': '1.1.79711829.1614933155',
'_ga': 'GA1.2.693390019.1614933178',
'__atssc': 'google^%^3B1',
'_gid': 'GA1.2.1213481278.1618077337',
'__atuvc': '1^%^7C10^%^2C0^%^7C11^%^2C9^%^7C12^%^2C14^%^7C13^%^2C28^%^7C14',
'__atuvs': '6071e67dc413e3d6001',
}
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '^\\^Google',
'tzname': 'Asia/Calcutta',
'sec-ch-ua-mobile': '?0',
'tz': 'GMT+05:30',
'Content-Type': 'application/json',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'User-Agent': '###MY USER AGENT HERE####',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://ngc.taleo.net',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://ngc.taleo.net/careersection/ng_pro_intl_aujobs/jobsearch.ftl?lang=en_GB&location=756140022608&radiusType=K&searchExpanded=true&radius=1&portal=34140031600&_ga=2.197392303.1699610010.1604351575-1311873605.1579627290',
'Accept-Language': 'en-US,en;q=0.9',
}
params = (
('lang', 'en_GB'),
('portal', '34140031600'),
)
data = '^{^\\^multilineEnabled^\\^:true,^\\^sortingSelection^\\^:^{^\\^sortBySelectionParam^\\^:^\\^3^\\^,^\\^ascendingSortingOrder^\\^:^\\^false^\\^^},^\\^fieldData^\\^:^{^\\^fields^\\^:^{^\\^KEYWORD^\\^:^\\^^\\^,^\\^LOCATION^\\^:^\\^756140022608^\\^,^\\^JOB_TITLE^\\^:^\\^^\\^^},^\\^valid^\\^:true^},^\\^filterSelectionParam^\\^:^{^\\^searchFilterSelections^\\^:^[^{^\\^id^\\^:^\\^POSTING_DATE^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^LOCATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_FIELD^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_TYPE^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_SCHEDULE^\\^,^\\^selectedValues^\\^:^[^]^}^]^},^\\^advancedSearchFiltersSelectionParam^\\^:^{^\\^searchFilterSelections^\\^:^[^{^\\^id^\\^:^\\^ORGANIZATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^LOCATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_FIELD^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_NUMBER^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^URGENT_JOB^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^EMPLOYEE_STATUS^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^STUDY_LEVEL^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^WILL_TRAVEL^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_SHIFT^\\^,^\\^selectedValues^\\^:^[^]^}^]^},^\\^pageNo^\\^:1^}'
response = s.post(url, headers=headers, cookies=cookies, data=data).json()
#res_json = json.loads(response)
#print(response.status_code)
但在 response 行中,我收到了一个错误,如 JSONDecodeError("Expecting value", s, err.value) from None json.decoder.JSONDecodeError:期望值:第 1 行第 1 列(字符 0)
非常感谢您对此的任何帮助!
不幸的是,我目前只能使用请求或其他流行的 Python 库。
提前致谢..
答案 0 :(得分:0)
您需要发送 JSON 数据,您可以将 json = data
与 python requests 模块一起使用。您还需要将数据格式化为字典:
import requests
r = requests.post("https://ngc.taleo.net/careersection/rest/jobboard/searchjobs",
params={
"lang": "en_GB",
"location": "756140022608",
"radiusType": "K",
"searchExpanded": "true",
"radius": "1",
"portal": "34140031600"
},
headers={
"tzname": "Asia/Calcutta",
"tz": "GMT+05:30"
},
json={
"multilineEnabled": True,
"sortingSelection": {
"sortBySelectionParam": "3",
"ascendingSortingOrder": "false"
},
"fieldData": {
"fields": {
"KEYWORD": "",
"LOCATION": "756140022608",
"JOB_TITLE": ""
},
"valid": True
},
"filterSelectionParam": {
"searchFilterSelections": [{
"id": "POSTING_DATE",
"selectedValues": []
}, {
"id": "LOCATION",
"selectedValues": []
}, {
"id": "JOB_FIELD",
"selectedValues": []
}, {
"id": "JOB_TYPE",
"selectedValues": []
}, {
"id": "JOB_SCHEDULE",
"selectedValues": []
}]
},
"advancedSearchFiltersSelectionParam": {
"searchFilterSelections": [{
"id": "ORGANIZATION",
"selectedValues": []
}, {
"id": "LOCATION",
"selectedValues": []
}, {
"id": "JOB_FIELD",
"selectedValues": []
}, {
"id": "JOB_NUMBER",
"selectedValues": []
}, {
"id": "URGENT_JOB",
"selectedValues": []
}, {
"id": "EMPLOYEE_STATUS",
"selectedValues": []
}, {
"id": "STUDY_LEVEL",
"selectedValues": []
}, {
"id": "WILL_TRAVEL",
"selectedValues": []
}, {
"id": "JOB_SHIFT",
"selectedValues": []
}]},
"pageNo": 1
})
print(r.json())