我正在尝试扁平化深度嵌套的json文件。
我有22个json文件,我想收集到一个熊猫数据框中。我设法用json_normalize将它们展平到第二层,但是我无法进一步解析它。有时json具有5个以上的级别。
我想提取_id
,actType
和位于“孩子”不同级别的所有文本数据。以下是Json文件的示例。非常感谢您的帮助!
{
"_id": "test1",
"actType": "FINDING",
"entries": [{
"text": "U Ergebnis:",
"isDocumentationNode": false,
"children": [{
"text": "U3: Standartext",
"isDocumentationNode": true,
"children": []
}, {
"text": "Brückner durchgeführt o.p.B.",
"isDocumentationNode": true,
"children": []
}, {
"text": "Normale körperliche und altersgerecht Entwicklung",
"isDocumentationNode": true,
"children": [{
"text": "J1/2",
"isDocumentationNode": false,
"children": [{
"text": "Schule:",
"isDocumentationNode": true,
"children": [{
"text": "Ziel Abitur",
"isDocumentationNode": true,
"children": [{
"text": "läuft",
"isDocumentationNode": true,
"children": []
}, {
"text": "gefährdet",
"isDocumentationNode": true,
"children": []
}, {
"text": "läuft",
"isDocumentationNode": true,
"children": []
}, {
"text": "gefährdet",
"isDocumentationNode": true,
"children": []
}
]
}
]
}
]
}
]
}
]
}
]
}
import pandas as pd
# load file
df = pd.read_json('test.json')
# display(df)
_id actType entries
0 test1 FINDING {'text': 'U Ergebnis:', 'isDocumentationNode': False, 'children': [{'text': 'U3: Standartext', 'isDocumentationNode': True, 'children': []}, {'text': 'Brückner durchgeführt o.p.B.', 'isDocumentationNode': True, 'children': []}, {'text': 'Normale körperliche und altersgerecht Entwicklung', 'isDocumentationNode': True, 'children': [{'text': 'J1/2', 'isDocumentationNode': False, 'children': [{'text': 'Schule:', 'isDocumentationNode': True, 'children': [{'text': 'Ziel Abitur', 'isDocumentationNode': True, 'children': [{'text': 'läuft', 'isDocumentationNode': True, 'children': []}, {'text': 'gefährdet', 'isDocumentationNode': True, 'children': []}, {'text': 'läuft', 'isDocumentationNode': True, 'children': []}, {'text': 'gefährdet', 'isDocumentationNode': True, 'children': []}]}]}]}]}]}
dict
列中嵌套'entries'
,但是我需要一个平坦,宽阔的数据框,所有键都作为列。答案 0 :(得分:0)
flatten_json
函数
flatten_json
函数。pandas.DataFrame.rename
重命名任何列。import json
import pandas as pd
# list of files
files = ['test1.json', 'test2.json']
# list to add dataframe from each file
df_list = list()
# iterate through files
for file in files:
with open(file, 'r', encoding='utf-8') as f:
# read with json
data = json.loads(f.read())
# flatten_json into a dataframe and add to the dataframe list
df_list.append(pd.DataFrame.from_dict(flatten_json(data), orient='index').T)
# concat all dataframes together
df = pd.concat(df_list).reset_index(drop=True)
# display(df)
_id actType entries_0_text entries_0_isDocumentationNode entries_0_children_0_text entries_0_children_0_isDocumentationNode entries_0_children_1_text entries_0_children_1_isDocumentationNode entries_0_children_2_text entries_0_children_2_isDocumentationNode entries_0_children_2_children_0_text entries_0_children_2_children_0_isDocumentationNode entries_0_children_2_children_0_children_0_text entries_0_children_2_children_0_children_0_isDocumentationNode entries_0_children_2_children_0_children_0_children_0_text entries_0_children_2_children_0_children_0_children_0_isDocumentationNode entries_0_children_2_children_0_children_0_children_0_children_0_text entries_0_children_2_children_0_children_0_children_0_children_0_isDocumentationNode entries_0_children_2_children_0_children_0_children_0_children_1_text entries_0_children_2_children_0_children_0_children_0_children_1_isDocumentationNode entries_0_children_2_children_0_children_0_children_0_children_2_text entries_0_children_2_children_0_children_0_children_0_children_2_isDocumentationNode entries_0_children_2_children_0_children_0_children_0_children_3_text entries_0_children_2_children_0_children_0_children_0_children_3_isDocumentationNode
0 test1 FINDING U Ergebnis: False U3: Standartext True Brückner durchgeführt o.p.B. True Normale körperliche und altersgerecht Entwicklung True J1/2 False Schule: True Ziel Abitur True läuft True gefährdet True läuft True gefährdet True
1 test2 FINDING U Ergebnis: False U3: Standartext True Brückner durchgeführt o.p.B. True Normale körperliche und altersgerecht Entwicklung True J1/2 False Schule: True Ziel Abitur True läuft True gefährdet True NaN NaN NaN NaN
test1.json
{
"_id": "test1",
"actType": "FINDING",
"entries": [{
"text": "U Ergebnis:",
"isDocumentationNode": false,
"children": [{
"text": "U3: Standartext",
"isDocumentationNode": true,
"children": []
}, {
"text": "Brückner durchgeführt o.p.B.",
"isDocumentationNode": true,
"children": []
}, {
"text": "Normale körperliche und altersgerecht Entwicklung",
"isDocumentationNode": true,
"children": [{
"text": "J1/2",
"isDocumentationNode": false,
"children": [{
"text": "Schule:",
"isDocumentationNode": true,
"children": [{
"text": "Ziel Abitur",
"isDocumentationNode": true,
"children": [{
"text": "läuft",
"isDocumentationNode": true,
"children": []
}, {
"text": "gefährdet",
"isDocumentationNode": true,
"children": []
}, {
"text": "läuft",
"isDocumentationNode": true,
"children": []
}, {
"text": "gefährdet",
"isDocumentationNode": true,
"children": []
}
]
}
]
}
]
}
]
}
]
}
]
}
test2.json
{
"_id": "test2",
"actType": "FINDING",
"entries": [{
"text": "U Ergebnis:",
"isDocumentationNode": false,
"children": [{
"text": "U3: Standartext",
"isDocumentationNode": true,
"children": []
}, {
"text": "Brückner durchgeführt o.p.B.",
"isDocumentationNode": true,
"children": []
}, {
"text": "Normale körperliche und altersgerecht Entwicklung",
"isDocumentationNode": true,
"children": [{
"text": "J1/2",
"isDocumentationNode": false,
"children": [{
"text": "Schule:",
"isDocumentationNode": true,
"children": [{
"text": "Ziel Abitur",
"isDocumentationNode": true,
"children": [{
"text": "läuft",
"isDocumentationNode": true,
"children": []
}, {
"text": "gefährdet",
"isDocumentationNode": true,
"children": []
}
]
}
]
}
]
}
]
}
]
}
]
}