发布请求500错误Python请求Web抓取

时间:2019-12-21 21:25:49

标签: python-3.x web-scraping python-requests http-post

过去,我使用请求库和此处使用的方法抓取了许多站点,但由于某些原因,尽管我可以在浏览器中访问信息,但该站点仍返回500错误。

以下是他们所有IT职位与Progressive的工作委员会的链接:https://progressive.taleo.net/careersection/2/jobsearch.ftl?lang=en#

这是我的代码:

from bs4 import BeautifulSoup
import requests
import json


url = "https://progressive.taleo.net/careersection/rest/jobboard/searchjobs?lang=en&portal=101430233"

headers = {
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Content-Type': 'application/json',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
}


# headers = {
#   'Accept': 'application/json, text/javascript, */*; q=0.01',
#   'Accept-Encoding': 'gzip, deflate, br',
#   'Accept-Language': 'en-US,en;q=0.9',
#   'Cache-Control': 'no-cache, no-store',
#   'Connection': 'Keep-Alive',
#   'Content-Encoding': 'gzip',
#   'Content-Type': 'application/json',
#   'Origin': 'https://progressive.taleo.net',
#   'Referer': 'https://progressive.taleo.net/careersection/2/jobsearch.ftl?lang=en',
#   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36',
#   'Host': 'progressive.taleo.net',
#   'Sec-Fetch-Mode': 'cors',
#   'Sec-Fetch-Site': 'same-origin',
# }

data = '{"multilineEnabled":false,"sortingSelection":{"sortBySelectionParam":"3","ascendingSortingOrder":"false"},"fieldData":{"fields":{"KEYWORD":"","JOB_TITLE":"","JOB_NUMBER":""},"valid":true},"filterSelectionParam":{"searchFilterSelections":[{"id":"POSTING_DATE","selectedValues":[]},{"id":"LOCATION","selectedValues":["213860137526","209460137526"]},{"id":"JOB_FIELD","selectedValues":["4460472188"]},{"id":"JOB_SCHEDULE","selectedValues":[]}]},"advancedSearchFiltersSelectionParam":{"searchFilterSelections":[{"id":"ORGANIZATION","selectedValues":[]},{"id":"LOCATION","selectedValues":[]},{"id":"JOB_FIELD","selectedValues":[]},{"id":"URGENT_JOB","selectedValues":[]},{"id":"EMPLOYEE_STATUS","selectedValues":[]}]},"pageNo":1}'

def test_post(url):
    dirty = requests.post(url, headers=headers, data=data)
    print(dirty.headers)
    print(dirty.status_code)
    # clean = json.loads(dirty)
    # print(clean)

test_post(url)

使用Chrome工具,您可以查看XHR请求,并看到可以向我的代码中的url发送一个发布请求,并且该请求应返回一个包含所有职位空缺的JSON对象。相反,我不断收到500错误,除了“ Request Failed”(请求失败)或其他可能导致影响的信息外,没有其他特定信息。

如果有人可以阐明这个问题,我将不胜感激。我希望能够从网站上抓取所有职位。

P.S。我离开了评论部分,因为我还包括其他标题,但似乎没有一个起作用。

1 个答案:

答案 0 :(得分:2)

您似乎需要设置他的time zone标头:

import requests

url = "https://progressive.taleo.net/careersection/rest/jobboard/searchjobs?lang=en&portal=101430233"

headers = {
    'Content-Type': 'application/json',
    'tz': 'GMT+00:00',
}

data = '{"multilineEnabled":false,"sortingSelection":{"sortBySelectionParam":"3","ascendingSortingOrder":"false"},"fieldData":{"fields":{"KEYWORD":"","JOB_TITLE":"","JOB_NUMBER":""},"valid":true},"filterSelectionParam":{"searchFilterSelections":[{"id":"POSTING_DATE","selectedValues":[]},{"id":"LOCATION","selectedValues":["213860137526","209460137526"]},{"id":"JOB_FIELD","selectedValues":["4460472188"]},{"id":"JOB_SCHEDULE","selectedValues":[]}]},"advancedSearchFiltersSelectionParam":{"searchFilterSelections":[{"id":"ORGANIZATION","selectedValues":[]},{"id":"LOCATION","selectedValues":[]},{"id":"JOB_FIELD","selectedValues":[]},{"id":"URGENT_JOB","selectedValues":[]},{"id":"EMPLOYEE_STATUS","selectedValues":[]}]},"pageNo":1}'


def test_post(url):
    dirty = requests.post(url, headers=headers, data=data)
    print(dirty.headers)
    print(dirty.status_code)
    clean = dirty.json()
    print(clean)

test_post(url)

输出:

{'Date': 'Sun, 22 Dec 2019 00:48:53 GMT', 'Server': 'Taleo Web Server 8', 'Cache-Control': 'no-cache, no-store', 'Pragma': 'no-cache', 'Expires': '-1', 'P3P': 'CP="CAO PSA OUR"', 'Content-Encoding': 'gzip', 'Vary': 'Accept-Encoding', 'X-Content-Type-Options': 'nosniff', 'Set-Cookie': 'locale=en; path=/careersection/', 'X-XSS-Protection': '1', 'X-UA-Compatible': 'IE=edge', 'Keep-Alive': 'timeout=5, max=100', 'Connection': 'Keep-Alive', 'Transfer-Encoding': 'chunked', 'Content-Type': 'application/json'}
200
{'requisitionList': [{'hotJob': True, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '810924', 'contestNo': '161749', 'column': ['Big Data Lead Developer', '["Multiple Locations"]', 'Dec 6, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '834485', 'contestNo': '164658', 'column': ['DevOps Engineer Senior or Lead - Big Data', '["United States-Ohio-Mayfield Village"]', 'Dec 20, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '834269', 'contestNo': '164625', 'column': ['IT Systems Analyst Senior/Lead – Enterprise Services Operations Center', '["Multiple Locations"]', 'Dec 20, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '832904', 'contestNo': '164433', 'column': ['Data Warehouse Developer', '["Multiple Locations"]', 'Dec 19, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '827706', 'contestNo': '163768', 'column': ['Quality Assurance Analyst', '["Multiple Locations"]', 'Dec 18, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '831205', 'contestNo': '164213', 'column': ['BI Developer to BI Developer Senior', '["United States-Ohio-Mayfield Heights"]', 'Dec 18, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '832344', 'contestNo': '164360', 'column': ['Software Developer Senior / Lead', '["United States-Ohio-Mayfield Village"]', 'Dec 18, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '830766', 'contestNo': '164139', 'column': ['Developer QA Analyst', '["United States-Ohio-Mayfield Heights"]', 'Dec 17, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '833006', 'contestNo': '164460', 'column': ['Business Systems Analyst Senior or Lead - ASWP', '["United States-Ohio-Mayfield Village"]', 'Dec 17, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '831704', 'contestNo': '164255', 'column': ['Software Developer Lead', '["United States-Ohio-Mayfield Village"]', 'Dec 17, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '807764', 'contestNo': '161418', 'column': ['Business Systems Analyst (Sr or Lead) - CRM', '["Multiple Locations"]', 'Dec 16, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '832230', 'contestNo': '164349', 'column': ['Software Developer / Senior / Lead', '["Multiple Locations"]', 'Dec 16, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '831664', 'contestNo': '164247', 'column': ['Business Systems Analyst Intermediate to Senior - Mobile Native Apps', '["Multiple Locations"]', 'Dec 15, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '825524', 'contestNo': '163476', 'column': ['Software Developer / Senior', '["Multiple Locations"]', 'Dec 14, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '831264', 'contestNo': '164215', 'column': ['BI Developer to BI Developer Senior', '["United States-Ohio-Mayfield Heights"]', 'Dec 13, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '831484', 'contestNo': '164236', 'column': ['Full Stack Developer - Enterprise Resource Organization', '["Multiple Locations"]', 'Dec 13, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '832004', 'contestNo': '164304', 'column': ['Network IT Systems Engineer Senior for WAN/Carrier', '["Multiple Locations"]', 'Dec 11, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '830304', 'contestNo': '164085', 'column': ['Business Systems Analyst Sr.', '["Multiple Locations"]', 'Dec 9, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '829325', 'contestNo': '163974', 'column': ['Quality Assurance Analyst Senior or Lead', '["Multiple Locations"]', 'Nov 27, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '829125', 'contestNo': '163961', 'column': ['Software Developer / Senior', '["Multiple Locations"]', 'Nov 26, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '827944', 'contestNo': '163802', 'column': ['Quality Assurance Analyst Senior', '["United States-Ohio-Mayfield Village"]', 'Nov 25, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '827686', 'contestNo': '163774', 'column': ['Quality Assurance Test Lead', '["United States-Ohio-Mayfield Village"]', 'Nov 25, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '826384', 'contestNo': '163601', 'column': ['Software Developer Senior / Lead', '["Multiple Locations"]', 'Nov 25, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '825644', 'contestNo': '163496', 'column': ['IT DevOps Engineer Lead', '["Multiple Locations"]', 'Nov 13, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}, {'hotJob': False, 'addedToJobCart': False, 'draft': False, 'alreadyAppliedOn': False, 'toReApply': False, 'jobId': '824252', 'contestNo': '163292', 'column': ['Systems Test Engineer Intermediate to Senior', '["Multiple Locations"]', 'Nov 8, 2019'], 'linkedColumn': 0, 'locationsColumns': [1]}], 'facetResults': [{'id': 'POSTING_DATE', 'facetValueResults': [{'id': '1', 'text': 'Today', 'quantity': ''}, {'id': '2', 'text': 'Yesterday', 'quantity': ''}, {'id': '3', 'text': 'Last 7 Days', 'quantity': ''}, {'id': '4', 'text': 'Last 14 Days', 'quantity': ''}, {'id': '5', 'text': 'Last 21 Days', 'quantity': ''}, {'id': '6', 'text': 'Last 28 Days', 'quantity': ''}]}, {'id': 'LOCATION', 'facetValueResults': [{'id': '209460137526', 'text': 'Mayfield Village', 'quantity': '23', 'level': 3}, {'id': '213860137526', 'text': 'Mayfield Heights', 'quantity': '3', 'level': 3}], 'levelList': [{'level': 1, 'name': 'Country'}, {'level': 2, 'name': 'State'}, {'level': 3, 'name': 'City'}, {'level': 4, 'name': 'Work Location'}]}, {'id': 'JOB_FIELD', 'facetValueResults': [{'id': '4460472188', 'text': 'Information Technology', 'quantity': '26', 'level': 1}], 'levelList': [{'level': 1, 'name': 'Job Category'}]}, {'id': 'JOB_SCHEDULE', 'facetValueResults': [{'id': '1', 'text': 'Full-time', 'quantity': '26'}]}, {'id': 'JOB_LOCALE', 'facetValueResults': [{'id': 'en', 'text': 'English (26)', 'quantity': '26'}]}], 'pagingData': {'currentPageNo': 1, 'pageSize': 25, 'totalCount': 26}, 'queryString': 'f=LOCATION(213860137526,209460137526)|JOB_FIELD(4460472188)&s=3|D&a=null&multiline=false', 'careerSectionUnAvailable': False, 'supportedLanguages': []}