所以我使用的是Python 2.7,使用json
模块对以下数据结构进行编码:
'layer1': {
'layer2': {
'layer3_1': [ long_list_of_stuff ],
'layer3_2': 'string'
}
}
我的问题是我使用漂亮的打印方式打印出来,如下所示:
json.dumps(data_structure, indent=2)
哪个很好,除了我要缩进所有内容,除了"layer3_1"
中的内容 - 这是一个列出坐标的大量字典,因此,每个字符集都设置了一个值,这样就可以创建一个包含数千行的文件,示例如下:
{
"layer1": {
"layer2": {
"layer3_1": [
{
"x": 1,
"y": 7
},
{
"x": 0,
"y": 4
},
{
"x": 5,
"y": 3
},
{
"x": 6,
"y": 9
}
],
"layer3_2": "string"
}
}
}
我真正想要的是类似于以下内容:
{
"layer1": {
"layer2": {
"layer3_1": [{"x":1,"y":7},{"x":0,"y":4},{"x":5,"y":3},{"x":6,"y":9}],
"layer3_2": "string"
}
}
}
我听说可以扩展json
模块:是否可以将其设置为仅在"layer3_1"
对象内部时关闭缩进?如果是的话,有人请告诉我怎么做?
答案 0 :(得分:13)
<强>更新强>
以下是我原来答案的一个版本,已多次修订。不同于原版,我发布的只是为了展示如何在J.F.Sebastian的answer中获得第一个想法,并且像他一样,返回了对象的非缩进字符串表示。最新更新版本返回单独格式化的Python对象JSON。
根据OP的评论之一,每个坐标dict
的键将按排序顺序显示,但前提是在初始sort_keys=True
来电驱动中指定了json.dumps()
关键字参数该过程,它不再将对象的类型更改为沿途的字符串。换句话说,现在维护“包裹”对象的实际类型。
我认为不理解我的帖子的原始意图会导致一些人失败 - 因此,主要是因为这个原因,我已经“修复”并多次改进了我的答案。当前版本是我原来的答案与@Erik Allik在answer中使用的一些想法的混合,加上其他用户在本答复下面的评论中显示的有用反馈。
以下代码在Python 2.7.14和3.6.5中似乎都没有改变。
from _ctypes import PyObj_FromPtr
import json
import re
class NoIndent(object):
""" Value wrapper. """
def __init__(self, value):
self.value = value
class MyEncoder(json.JSONEncoder):
FORMAT_SPEC = '@@{}@@'
regex = re.compile(FORMAT_SPEC.format(r'(\d+)'))
def __init__(self, **kwargs):
# Save copy of any keyword argument values needed for use here.
self.__sort_keys = kwargs.get('sort_keys', None)
super(MyEncoder, self).__init__(**kwargs)
def default(self, obj):
return (self.FORMAT_SPEC.format(id(obj)) if isinstance(obj, NoIndent)
else super(MyEncoder, self).default(obj))
def encode(self, obj):
format_spec = self.FORMAT_SPEC # Local var to expedite access.
json_repr = super(MyEncoder, self).encode(obj) # Default JSON.
# Replace any marked-up object ids in the JSON repr with the
# value returned from the json.dumps() of the corresponding
# wrapped Python object.
for match in self.regex.finditer(json_repr):
# see https://stackoverflow.com/a/15012814/355230
id = int(match.group(1))
no_indent = PyObj_FromPtr(id)
json_obj_repr = json.dumps(no_indent.value, sort_keys=self.__sort_keys)
# Replace the matched id string with json formatted representation
# of the corresponding Python object.
json_repr = json_repr.replace(
'"{}"'.format(format_spec.format(id)), json_obj_repr)
return json_repr
if __name__ == '__main__':
from string import ascii_lowercase as letters
data_structure = {
'layer1': {
'layer2': {
'layer3_1': NoIndent([{"x":1,"y":7}, {"x":0,"y":4}, {"x":5,"y":3},
{"x":6,"y":9},
{k: v for v, k in enumerate(letters)}]),
'layer3_2': 'string',
'layer3_3': NoIndent([{"x":2,"y":8,"z":3}, {"x":1,"y":5,"z":4},
{"x":6,"y":9,"z":8}]),
'layer3_4': NoIndent(list(range(20))),
}
}
}
print(json.dumps(data_structure, cls=MyEncoder, sort_keys=True, indent=2))
输出:
{
"layer1": {
"layer2": {
"layer3_1": [{"x": 1, "y": 7}, {"x": 0, "y": 4}, {"x": 5, "y": 3}, {"x": 6, "y": 9}, {"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25}],
"layer3_2": "string",
"layer3_3": [{"x": 2, "y": 8, "z": 3}, {"x": 1, "y": 5, "z": 4}, {"x": 6, "y": 9, "z": 8}],
"layer3_4": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
}
}
}
答案 1 :(得分:10)
bodge,但是一旦你从dumps()获得了字符串,你可以在它上面执行正则表达式替换,如果你确定它的内容的格式。有点像:
s = json.dumps(data_structure, indent=2)
s = re.sub('\s*{\s*"(.)": (\d+),\s*"(.)": (\d+)\s*}(,?)\s*', r'{"\1":\2,"\3":\4}\5', s)
答案 2 :(得分:8)
以下解决方案似乎在Python 2.7.x上正常运行。它使用从Custom JSON encoder in Python 2.7 to insert plain JavaScript code获取的变通方法,以避免使用基于UUID的替换方案在输出中以自定义编码对象结束为JSON字符串。
class NoIndent(object):
def __init__(self, value):
self.value = value
class NoIndentEncoder(json.JSONEncoder):
def __init__(self, *args, **kwargs):
super(NoIndentEncoder, self).__init__(*args, **kwargs)
self.kwargs = dict(kwargs)
del self.kwargs['indent']
self._replacement_map = {}
def default(self, o):
if isinstance(o, NoIndent):
key = uuid.uuid4().hex
self._replacement_map[key] = json.dumps(o.value, **self.kwargs)
return "@@%s@@" % (key,)
else:
return super(NoIndentEncoder, self).default(o)
def encode(self, o):
result = super(NoIndentEncoder, self).encode(o)
for k, v in self._replacement_map.iteritems():
result = result.replace('"@@%s@@"' % (k,), v)
return result
然后这个
obj = {
"layer1": {
"layer2": {
"layer3_2": "string",
"layer3_1": NoIndent([{"y": 7, "x": 1}, {"y": 4, "x": 0}, {"y": 3, "x": 5}, {"y": 9, "x": 6}])
}
}
}
print json.dumps(obj, indent=2, cls=NoIndentEncoder)
产生以下输出:
{
"layer1": {
"layer2": {
"layer3_2": "string",
"layer3_1": [{"y": 7, "x": 1}, {"y": 4, "x": 0}, {"y": 3, "x": 5}, {"y": 9, "x": 6}]
}
}
}
它还正确传递了所有选项(indent
除外),例如sort_keys=True
直到嵌套json.dumps
来电。
obj = {
"layer1": {
"layer2": {
"layer3_1": NoIndent([{"y": 7, "x": 1, }, {"y": 4, "x": 0}, {"y": 3, "x": 5, }, {"y": 9, "x": 6}]),
"layer3_2": "string",
}
}
}
print json.dumps(obj, indent=2, sort_keys=True, cls=NoIndentEncoder)
正确输出:
{
"layer1": {
"layer2": {
"layer3_1": [{"x": 1, "y": 7}, {"x": 0, "y": 4}, {"x": 5, "y": 3}, {"x": 6, "y": 9}],
"layer3_2": "string"
}
}
}
它也可以与例如collections.OrderedDict
:
obj = {
"layer1": {
"layer2": {
"layer3_2": "string",
"layer3_3": NoIndent(OrderedDict([("b", 1), ("a", 2)]))
}
}
}
print json.dumps(obj, indent=2, cls=NoIndentEncoder)
<强>输出强>:
{
"layer1": {
"layer2": {
"layer3_3": {"b": 1, "a": 2},
"layer3_2": "string"
}
}
}
答案 3 :(得分:4)
这产生了OP的预期结果:
import json
class MyJSONEncoder(json.JSONEncoder):
def iterencode(self, o, _one_shot=False):
list_lvl = 0
for s in super(MyJSONEncoder, self).iterencode(o, _one_shot=_one_shot):
if s.startswith('['):
list_lvl += 1
s = s.replace('\n', '').rstrip()
elif 0 < list_lvl:
s = s.replace('\n', '').rstrip()
if s and s[-1] == ',':
s = s[:-1] + self.item_separator
elif s and s[-1] == ':':
s = s[:-1] + self.key_separator
if s.endswith(']'):
list_lvl -= 1
yield s
o = {
"layer1":{
"layer2":{
"layer3_1":[{"y":7,"x":1},{"y":4,"x":0},{"y":3,"x":5},{"y":9,"x":6}],
"layer3_2":"string",
"layer3_3":["aaa\nbbb","ccc\nddd",{"aaa\nbbb":"ccc\nddd"}],
"layer3_4":"aaa\nbbb",
}
}
}
jsonstr = json.dumps(o, indent=2, separators=(',', ':'), sort_keys=True,
cls=MyJSONEncoder)
print(jsonstr)
o2 = json.loads(jsonstr)
print('identical objects: {}'.format((o == o2)))
答案 4 :(得分:2)
你可以尝试:
标记不应使用NoIndentList
替换它们的缩进列表:
class NoIndentList(list):
pass
覆盖json.Encoder.default method以生成NoIndentList
的非缩进字符串表示形式。
您可以将其强制转回列表并调用json.dumps()而不使用indent
来获取单行
上述方法似乎对json模块无效:
import json
import sys
class NoIndent(object):
def __init__(self, value):
self.value = value
def default(o, encoder=json.JSONEncoder()):
if isinstance(o, NoIndent):
return json.dumps(o.value)
return encoder.default(o)
L = [dict(x=x, y=y) for x in range(1) for y in range(2)]
obj = [NoIndent(L), L]
json.dump(obj, sys.stdout, default=default, indent=4)
它产生无效输出(列表被序列化为字符串):
[
"[{\"y\": 0, \"x\": 0}, {\"y\": 1, \"x\": 0}]",
[
{
"y": 0,
"x": 0
},
{
"y": 1,
"x": 0
}
]
]
如果您可以使用yaml
,则该方法有效:
import sys
import yaml
class NoIndentList(list):
pass
def noindent_list_presenter(dumper, data):
return dumper.represent_sequence(u'tag:yaml.org,2002:seq', data,
flow_style=True)
yaml.add_representer(NoIndentList, noindent_list_presenter)
obj = [
[dict(x=x, y=y) for x in range(2) for y in range(1)],
[dict(x=x, y=y) for x in range(1) for y in range(2)],
]
obj[0] = NoIndentList(obj[0])
yaml.dump(obj, stream=sys.stdout, indent=4)
它产生:
- [{x: 0, y: 0}, {x: 1, y: 0}]
- - {x: 0, y: 0}
- {x: 0, y: 1}
即,第一个列表是使用[]
序列化的,所有项目都在一行上,第二个列表每个项目使用一行。
答案 5 :(得分:1)
作为旁注,本网站有一个内置的JavaScript,当行短于70个字符时,它将避免JSON字符串中的换行符:
http://www.csvjson.com/json_beautifier
(使用JSON-js的修改版本实现)
选择&#34;内联短阵列&#34;
非常适合快速查看复制缓冲区中的数据。
答案 6 :(得分:0)
如果你有太多不同类型的对象有助于JSON尝试JSONEncoder方法和太多不同的类型来使用正则表达式,那么这是一个后处理解决方案。此函数在指定级别后折叠空白,而无需知道数据本身的细节。
def collapse_json(text, indent=12):
"""Compacts a string of json data by collapsing whitespace after the
specified indent level
NOTE: will not produce correct results when indent level is not a multiple
of the json indent level
"""
initial = " " * indent
out = [] # final json output
sublevel = [] # accumulation list for sublevel entries
pending = None # holder for consecutive entries at exact indent level
for line in text.splitlines():
if line.startswith(initial):
if line[indent] == " ":
# found a line indented further than the indent level, so add
# it to the sublevel list
if pending:
# the first item in the sublevel will be the pending item
# that was the previous line in the json
sublevel.append(pending)
pending = None
item = line.strip()
sublevel.append(item)
if item.endswith(","):
sublevel.append(" ")
elif sublevel:
# found a line at the exact indent level *and* we have sublevel
# items. This means the sublevel items have come to an end
sublevel.append(line.strip())
out.append("".join(sublevel))
sublevel = []
else:
# found a line at the exact indent level but no items indented
# further, so possibly start a new sub-level
if pending:
# if there is already a pending item, it means that
# consecutive entries in the json had the exact same
# indentation and that last pending item was not the start
# of a new sublevel.
out.append(pending)
pending = line.rstrip()
else:
if pending:
# it's possible that an item will be pending but not added to
# the output yet, so make sure it's not forgotten.
out.append(pending)
pending = None
if sublevel:
out.append("".join(sublevel))
out.append(line)
return "\n".join(out)
例如,使用此结构作为json.dumps的输入,缩进级别为4:
text = json.dumps({"zero": ["first", {"second": 2, "third": 3, "fourth": 4, "items": [[1,2,3,4], [5,6,7,8], 9, 10, [11, [12, [13, [14, 15]]]]]}]}, indent=4)
这是各种缩进级别的函数输出:
>>> print collapse_json(text, indent=0)
{"zero": ["first", {"items": [[1, 2, 3, 4], [5, 6, 7, 8], 9, 10, [11, [12, [13, [14, 15]]]]], "second": 2, "fourth": 4, "third": 3}]}
>>> print collapse_json(text, indent=4)
{
"zero": ["first", {"items": [[1, 2, 3, 4], [5, 6, 7, 8], 9, 10, [11, [12, [13, [14, 15]]]]], "second": 2, "fourth": 4, "third": 3}]
}
>>> print collapse_json(text, indent=8)
{
"zero": [
"first",
{"items": [[1, 2, 3, 4], [5, 6, 7, 8], 9, 10, [11, [12, [13, [14, 15]]]]], "second": 2, "fourth": 4, "third": 3}
]
}
>>> print collapse_json(text, indent=12)
{
"zero": [
"first",
{
"items": [[1, 2, 3, 4], [5, 6, 7, 8], 9, 10, [11, [12, [13, [14, 15]]]]],
"second": 2,
"fourth": 4,
"third": 3
}
]
}
>>> print collapse_json(text, indent=16)
{
"zero": [
"first",
{
"items": [
[1, 2, 3, 4],
[5, 6, 7, 8],
9,
10,
[11, [12, [13, [14, 15]]]]
],
"second": 2,
"fourth": 4,
"third": 3
}
]
}
答案 7 :(得分:0)
实际上,YAML比JSON更好。
我无法让NoIndentEncoder工作...但是我可以在JSON字符串上使用正则表达式...
def collapse_json(text, list_length=5):
for length in range(list_length):
re_pattern = r'\[' + (r'\s*(.+)\s*,' * length)[:-1] + r'\]'
re_repl = r'[' + ''.join(r'\{}, '.format(i+1) for i in range(length))[:-2] + r']'
text = re.sub(re_pattern, re_repl, text)
return text
问题是,如何在嵌套列表上执行此操作?
之前:
[
0,
"any",
[
2,
3
]
]
之后:
[0, "any", [2, 3]]
答案 8 :(得分:0)
最佳性能代码(10MB 文本花费 1 秒):
import json
def dumps_json(data, indent=2, depth=2):
assert depth > 0
space = ' '*indent
s = json.dumps(data, indent=indent)
lines = s.splitlines()
N = len(lines)
# determine which lines to be shortened
is_over_depth_line = lambda i: i in range(N) and lines[i].startswith(space*(depth+1))
is_open_bracket_line = lambda i: not is_over_depth_line(i) and is_over_depth_line(i+1)
is_close_bracket_line = lambda i: not is_over_depth_line(i) and is_over_depth_line(i-1)
#
def shorten_line(line_index):
if not is_open_bracket_line(line_index):
return lines[line_index]
# shorten over-depth lines
start = line_index
end = start
while not is_close_bracket_line(end):
end += 1
has_trailing_comma = lines[end][-1] == ','
_lines = [lines[start][-1], *lines[start+1:end], lines[end].replace(',','')]
d = json.dumps(json.loads(' '.join(_lines)))
return lines[line_index][:-1] + d + (',' if has_trailing_comma else '')
#
s = '\n'.join([
shorten_line(i)
for i in range(N) if not is_over_depth_line(i) and not is_close_bracket_line(i)
])
#
return s
更新: 这是我的解释:
首先我们使用 json.dumps 来获取已缩进的 json 字符串。 示例:
>>> print(json.dumps({'0':{'1a':{'2a':None,'2b':None},'1b':{'2':None}}}, indent=2))
[0] {
[1] "0": {
[2] "1a": {
[3] "2a": null,
[4] "2b": null
[5] },
[6] "1b": {
[7] "2": null
[8] }
[9] }
[10] }
如果我们设置 indent=2
和 depth = 2
,那么深度线以 6 个空格开头
我们有 4 种类型的线路:
我们将尝试将一系列行(类型 2 + 3 + 4)合并为一行。 示例:
[2] "1a": {
[3] "2a": null,
[4] "2b": null
[5] },
将合并为:
[2] "1a": {"2a": null, "2b": null},
注意:右括号行可能有尾随逗号
答案 9 :(得分:-1)
这个解决方案不像其他解决方案那么优雅和通用,你不会从中学到很多东西,但它快速而简单。
def custom_print(data_structure, indent):
for key, value in data_structure.items():
print "\n%s%s:" % (' '*indent,str(key)),
if isinstance(value, dict):
custom_print(value, indent+1)
else:
print "%s" % (str(value)),
用法和输出:
>>> custom_print(data_structure,1)
layer1:
layer2:
layer3_2: string
layer3_1: [{'y': 7, 'x': 1}, {'y': 4, 'x': 0}, {'y': 3, 'x': 5}, {'y': 9, 'x': 6}]