我正在使用Google Gson库来解析JSON文件,这些文件是ipython笔记本文件。是否可以收集JSON对象或数组开始或结束的行号。
JsonReader reader = new JsonReader(new FileReader(notebookFile));
Gson gson = new GsonBuilder().create();
// Read file in stream mode
reader.beginObject();
while (reader.hasNext()) {
String name = reader.nextName();
if (name.equals("cells")) {
//can we determine line number of name
reader.beginArray();
.....
}
....
}
笔记本的一部分:
"metadata": {
"name": "5-Scatterplots"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"store = pd.HDFStore('/Volumes/FreshBooks/data/store.h5')\n",
"may07 = store['may07']\n",
"may08 = store['may08']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
答案 0 :(得分:1)
您可以使用https://pypi.org/project/json-cfg/
在python中完成此操作这是一种递归策略,用于打印每个键和每个值的行号。
import jsoncfg
from jsoncfg.config_classes import ConfigJSONObject, ConfigJSONArray, ConfigJSONScalar
def recursivePrint(element):
if isinstance(element, ConfigJSONObject):
# Dictionary
for key, value in element:
print(f"key \"{key}\" at line {jsoncfg.node_location(element[key]).line}")
recursivePrint(element[key])
elif isinstance(element, ConfigJSONArray):
# Array
for item in element:
recursivePrint(item)
elif isinstance(element, ConfigJSONScalar):
value = element()
if isinstance(value, str):
value = value.strip()
print(f"value \"{value}\" at line {jsoncfg.node_location(element).line}")
parsed = jsoncfg.load_config("example.json")
recursivePrint(parsed)
结果截图
答案 1 :(得分:0)
完全披露:我是以下软件包的维护者。
现在有一个新的 Python 包可以解决这个用例:https://github.com/open-alchemy/json-source-map
安装:pip install json_source_map
例如,在您的情况下:
from json_source_map import calculate
source = '''
{
"metadata": {
"name": "5-Scatterplots"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\\n",
"store = pd.HDFStore('/Volumes/FreshBooks/data/store.h5')\\n",
"may07 = store['may07']\\n",
"may08 = store['may08']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
}
]
}
]
}
'''
print(calculate(source))
打印:
{
"": Entry(
value_start=Location(line=1, column=0, position=1),
value_end=Location(line=27, column=1, position=568),
key_start=None,
key_end=None,
),
"/metadata": Entry(
value_start=Location(line=2, column=14, position=17),
value_end=Location(line=4, column=3, position=51),
key_start=Location(line=2, column=2, position=5),
key_end=Location(line=2, column=12, position=15),
),
"/metadata/name": Entry(
value_start=Location(line=3, column=12, position=31),
value_end=Location(line=3, column=28, position=47),
key_start=Location(line=3, column=4, position=23),
key_end=Location(line=3, column=10, position=29),
),
"/nbformat": Entry(
value_start=Location(line=5, column=14, position=67),
value_end=Location(line=5, column=15, position=68),
key_start=Location(line=5, column=2, position=55),
key_end=Location(line=5, column=12, position=65),
),
"/nbformat_minor": Entry(
value_start=Location(line=6, column=20, position=90),
value_end=Location(line=6, column=21, position=91),
key_start=Location(line=6, column=2, position=72),
key_end=Location(line=6, column=18, position=88),
),
"/worksheets": Entry(
value_start=Location(line=7, column=16, position=109),
value_end=Location(line=26, column=3, position=566),
key_start=Location(line=7, column=2, position=95),
key_end=Location(line=7, column=14, position=107),
),
"/worksheets/0": Entry(
value_start=Location(line=8, column=4, position=115),
value_end=Location(line=25, column=5, position=562),
key_start=None,
key_end=None,
),
"/worksheets/0/cells": Entry(
value_start=Location(line=9, column=15, position=132),
value_end=Location(line=24, column=7, position=556),
key_start=Location(line=9, column=6, position=123),
key_end=Location(line=9, column=13, position=130),
),
"/worksheets/0/cells/0": Entry(
value_start=Location(line=10, column=8, position=142),
value_end=Location(line=23, column=9, position=548),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/cell_type": Entry(
value_start=Location(line=11, column=23, position=167),
value_end=Location(line=11, column=29, position=173),
key_start=Location(line=11, column=10, position=154),
key_end=Location(line=11, column=21, position=165),
),
"/worksheets/0/cells/0/collapsed": Entry(
value_start=Location(line=12, column=23, position=198),
value_end=Location(line=12, column=28, position=203),
key_start=Location(line=12, column=10, position=185),
key_end=Location(line=12, column=21, position=196),
),
"/worksheets/0/cells/0/input": Entry(
value_start=Location(line=13, column=19, position=224),
value_end=Location(line=18, column=11, position=425),
key_start=Location(line=13, column=10, position=215),
key_end=Location(line=13, column=17, position=222),
),
"/worksheets/0/cells/0/input/0": Entry(
value_start=Location(line=14, column=12, position=238),
value_end=Location(line=14, column=35, position=261),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/input/1": Entry(
value_start=Location(line=15, column=12, position=275),
value_end=Location(line=15, column=72, position=335),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/input/2": Entry(
value_start=Location(line=16, column=12, position=349),
value_end=Location(line=16, column=38, position=375),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/input/3": Entry(
value_start=Location(line=17, column=12, position=389),
value_end=Location(line=17, column=36, position=413),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/language": Entry(
value_start=Location(line=19, column=22, position=449),
value_end=Location(line=19, column=30, position=457),
key_start=Location(line=19, column=10, position=437),
key_end=Location(line=19, column=20, position=447),
),
"/worksheets/0/cells/0/metadata": Entry(
value_start=Location(line=20, column=22, position=481),
value_end=Location(line=20, column=24, position=483),
key_start=Location(line=20, column=10, position=469),
key_end=Location(line=20, column=20, position=479),
),
"/worksheets/0/cells/0/outputs": Entry(
value_start=Location(line=21, column=21, position=506),
value_end=Location(line=21, column=23, position=508),
key_start=Location(line=21, column=10, position=495),
key_end=Location(line=21, column=19, position=504),
),
"/worksheets/0/cells/0/prompt_number": Entry(
value_start=Location(line=22, column=27, position=537),
value_end=Location(line=22, column=28, position=538),
key_start=Location(line=22, column=10, position=520),
key_end=Location(line=22, column=25, position=535),
),
}
这会告诉您 JSON 文档中每个值的开始和结束位置的行、列和字符位置。