我有一个由JSON组成的文件,每行一行,并希望通过update_time对文件进行排序。
示例JSON文件:
{ "page": { "url": "url1", "update_time": "1415387875"}, "other_key": {} }
{ "page": { "url": "url2", "update_time": "1415381963"}, "other_key": {} }
{ "page": { "url": "url3", "update_time": "1415384938"}, "other_key": {} }
想要输出:
{ "page": { "url": "url1", "update_time": "1415387875"}, "other_key": {} }
{ "page": { "url": "url3", "update_time": "1415384938"}, "other_key": {} }
{ "page": { "url": "url2", "update_time": "1415381963"}, "other_key": {} }
我的代码:
#!/bin/env python
#coding: utf8
import sys
import os
import json
import operator
#load json from file
lines = []
while True:
line = sys.stdin.readline()
if not line: break
line = line.strip()
json_obj = json.loads(line)
lines.append(json_obj)
#sort json
lines = sorted(lines, key=lambda k: k['page']['update_time'], reverse=True)
#output result
for line in lines:
print line
代码适用于示例JSON文件,但如果JSON没有' update_time',则会引发KeyError异常。是否有非常规方法可以做到这一点?
答案 0 :(得分:21)
编写一个使用try...except
处理KeyError
的函数,然后将其用作key
参数而不是lambda。
def extract_time(json):
try:
# Also convert to int since update_time will be string. When comparing
# strings, "10" is smaller than "2".
return int(json['page']['update_time'])
except KeyError:
return 0
# lines.sort() is more efficient than lines = lines.sorted()
lines.sort(key=extract_time, reverse=True)
答案 1 :(得分:13)
您可以将dict.get()
与默认值一起使用:
lines = sorted(lines, key=lambda k: k['page'].get('update_time', 0), reverse=True)
示例:
>>> lines = [
... {"page": {"url": "url1", "update_time": "1415387875"}, "other_key": {}},
... {"page": {"url": "url2", "update_time": "1415381963"}, "other_key": {}},
... {"page": {"url": "url3", "update_time": "1415384938"}, "other_key": {}},
... {"page": {"url": "url4"}, "other_key": {}},
... {"page": {"url": "url5"}, "other_key": {}}
... ]
>>> lines = sorted(lines, key=lambda k: k['page'].get('update_time', 0), reverse=True)
>>> for line in lines:
... print line
...
{'other_key': {}, 'page': {'url': 'url1', 'update_time': '1415387875'}}
{'other_key': {}, 'page': {'url': 'url3', 'update_time': '1415384938'}}
{'other_key': {}, 'page': {'url': 'url2', 'update_time': '1415381963'}}
{'other_key': {}, 'page': {'url': 'url4'}}
{'other_key': {}, 'page': {'url': 'url5'}}
尽管如此,我仍然会遵循Ferdinand建议的EAFP
principle - 这样你也可以处理page
密钥丢失的情况。比检查各种角落情况更容易让它失败并处理它。
答案 2 :(得分:8)
# sort json
lines = sorted(lines, key=lambda k: k['page'].get('update_time', 0), reverse=True)
答案 3 :(得分:2)
def get_sortest_key(a: dict, o: dict):
v = None
k = None
for key, value in a.items():
if v is None:
v = value
k = key
continue
if v > value:
v = value
k = key
o.update({k: v})
a.pop(k)
if a:
get_sortest_key(a, o)
else:
return
def call(o):
a = {'a': 9, 'b': 1, 'c': 3, 'k': 3, 'l': -1, 's': 100}
z = get_sortest_key(a, o)
print(o)
o={}
call(o)