之前我被问过这个问题,但仍然没有得到解决方案,我在网上搜索就像没有人遇到过同样的问题。我有安装错误的xlwt吗?或者json有什么不对劲? 有人能帮助我吗?我是python的新手。 非常感谢! KeyError:' content'
# -*- coding:utf-8 -*-
import requests
import time
import random
import xlwt
import json
def post_request(url=None, para={}, headers={}):
print 'Downloading: ' + str(para['pn'])
req = requests.post(url, data=para, headers=headers)
return req
if __name__ == '__main__':
url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=0'
headers = {
'Host':'www.lagou.com',
'Referer':'https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90?labelWords=&fromSearch=true&suginput',
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Mobile Safari/537.36'
}
pn = 1
end = 31
for x in range(pn, end, 1):
para = {
'first': 'true',
'kd': 'Python',
'pn': pn
}
req = post_request(url, para, headers)
path = 'd://PyCharmProject//lagou_scraping//Python_search_result//'
f = open(path + para['kd'] + '-' + str(para['pn']) + '.json', 'wb')
f.write(req.content)
f.close()
time.sleep(random.randint(3, 8))
pn = pn + 1
def read_json(path):
f = open(path)
dictory = json.load(f, encoding='utf-8')
return dictory
if __name__ == '__main__':
xls = xlwt.Workbook()
sheet = xls.add_sheet('native')
sheet.write(0, 0, 'positionName')
sheet.write(0, 1, 'salary')
sheet.write(0, 2, 'education')
sheet.write(0, 3, 'workYear')
sheet.write(0, 4, 'city')
sheet.write(0, 5, 'companyShortName')
sheet.write(0, 6, 'companySize')
sheet.write(0, 7, 'financeStage')
sheet.write(0, 8, 'industryField')
sheet.write(0, 9, 'jobNature')
sheet.write(0, 10, 'companyLogo')
sheetPosition = {'row':1, 'col':0}
pn = 1
for x in range(pn, 31, 1):
path = 'd://PyCharmProject//lagou_scraping//Python_search_result//Python-' + str(pn) + '.json'
dictory = read_json(path)
row = sheetPosition['row']
col = sheetPosition['col']
pn = pn + 1
for x in dictory['content']['positionResult']['result']:
sheet.write(row, col, x['positionName']) # 0
col = col + 1
sheet.write(row, col, x['salary']) # 1
col = col + 1
sheet.write(row, col, x['education']) # 2
col = col + 1
sheet.write(row, col, x['workYear']) # 3
col = col + 1
sheet.write(row, col, x['city']) # 4
col = col + 1
sheet.write(row, col, x['companyShortName']) # 5
col = col + 1
sheet.write(row, col, x['companySize']) # 6
col = col + 1
sheet.write(row, col, x['financeStage']) # 7
col = col + 1
sheet.write(row, col, x['industryField']) # 8
col = col + 1
sheet.write(row, col, x['jobNature']) # 9
col = col + 1
sheet.write(row, col, x['companyLogo']) # 10
col = col + 1
col = 0
row = row + 1
sheetPosition = {'row': row, 'col': col}
xls.save('d://PyCharmProject//lagou_scraping//lagou_python.xls')
答案 0 :(得分:0)
好像你的错误来自这里:
f = open(path + para['kd'] + '-' + str(para['pn']) + '.json', 'wb')
f.write(req.content) # <-- Here is the probable error.
f.close()
也许您必须先检查状态代码或尝试阻止 抓住错误:
#using status code 200
#some server may return different status code
#other than 200, check how the server return on success
if(req.status_code == 200):
#now you're ok to resume with your code above
else:
#since you're in the for loop, skip this iteration
pass
我希望这会有所帮助。