我有一个json格式的输入,其中包含表信息。现在,我想从{tables->行-> content->“ DESCRIPTION”}中提取信息。如何从每个内容中提取内容?
input.json
{"tables": [
{
"bbox": [
186,
858,
1158,
1218
],
"column_types": [
"table_column_description",
"table_column_other",
"table_column_quantity",
"table_column_quantity",
"table_column_quantity",
"table_column_quantity",
"table_column_rate",
"table_column_amount_total"
],
"page": 0,
"rows": [
{
"cells": [
{
"bbox": [
186,
858,
408,
918
],
"content": "DESCRIPTION",
"value": "DESCRIPTION",
"value_type": "text"
},
{
"bbox": [
414,
858,
480,
918
],
"content": "M4A\nCode",
"value": "M4A\nCode",
"value_type": "text"
},
{
"bbox": [
558,
858,
648,
918
],
"content": "AMOUNT\n(RM)",
"value": "AMOUNT\n(RM)",
"value_type": "text"
},
{
"bbox": [
648,
858,
750,
918
],
"content": "DISCOUNT\n0RM)",
"value": "DISCOUNT\n0RM)",
"value_type": "text"
},
{
"bbox": [
744,
858,
870,
918
],
"content": "GROSS AMOUNT\n(RM)",
"value": "GROSS AMOUNT\n(RM)",
"value_type": "text"
},
{
"bbox": [
876,
858,
954,
918
],
"content": "GST\n(RM)",
"value": "GST\n(RM)",
"value_type": "text"
},
{
"bbox": [
954,
858,
1008,
918
],
"content": "TAX\nCODE",
"value": "TAX\nCODE",
"value_type": "text"
},
{
"bbox": [
1068,
858,
1158,
918
],
"content": "PAYABLE\n(PRM)",
"value": "PAYABLE\n(PRM)",
"value_type": "text"
}
],
"type": "header"
},
{
"cells": [
{
"bbox": [
186,
972,
408,
1008
],
"content": "EKOOI -CONSULTATION",
"value": "EKOOI -CONSULTATION",
"value_type": "text"
},
{
"bbox": [
414,
972,
480,
1008
],
"content": "",
"value": "",
"value_type": "text"
},
{
"bbox": [
558,
972,
648,
1008
],
"content": "90.00",
"value": "90.00",
"value_type": "number"
},
{
"bbox": [
648,
972,
750,
1008
],
"content": "0.00",
"value": "0.00",
"value_type": "number"
},
{
"bbox": [
744,
972,
870,
1008
],
"content": "90.00",
"value": "90.00",
"value_type": "number"
},
{
"bbox": [
876,
972,
954,
1008
],
"content": "5.40",
"value": "5.40",
"value_type": "number"
},
{
"bbox": [
954,
972,
1008,
1008
],
"content": "SR",
"value": null,
"value_type": null
},
{
"bbox": [
1068,
972,
1158,
1008
],
"content": "95.40",
"value": "95.40",
"value_type": "number"
}
],
"type": "data"
},
{
"cells": [
{
"bbox": [
186,
1008,
408,
1044
],
"content": "EKOOI - PROCEDURE FEE",
"value": "EKOOI - PROCEDURE FEE",
"value_type": "text"
},
{
"bbox": [
414,
1008,
480,
1044
],
"content": "",
"value": "",
"value_type": "text"
},
{
"bbox": [
558,
1008,
648,
1044
],
"content": "260.00",
"value": "260.00",
"value_type": "number"
},
{
"bbox": [
648,
1008,
750,
1044
],
"content": "(26.00)",
"value": null,
"value_type": null
},
{
"bbox": [
744,
1008,
870,
1044
],
"content": "234.00",
"value": "234.00",
"value_type": "number"
},
{
"bbox": [
876,
1008,
954,
1044
],
"content": "14.04",
"value": "14.04",
"value_type": "number"
},
{
"bbox": [
954,
1008,
1008,
1044
],
"content": "SR",
"value": null,
"value_type": null
},
{
"bbox": [
1068,
1008,
1158,
1044
],
"content": "248.04",
"value": "248.04",
"value_type": "number"
}
],
"type": "data"
},
{
"cells": [
{
"bbox": [
186,
1038,
408,
1068
],
"content": "EKOOI -TREATMENT FEE",
"value": "EKOOI -TREATMENT FEE",
"value_type": "text"
},
{
"bbox": [
414,
1038,
480,
1068
],
"content": "",
"value": "",
"value_type": "text"
},
{
"bbox": [
558,
1038,
648,
1068
],
"content": "125.00",
"value": "125.00",
"value_type": "number"
},
{
"bbox": [
648,
1038,
750,
1068
],
"content": "0.00",
"value": "0.00",
"value_type": "number"
},
{
"bbox": [
744,
1038,
870,
1068
],
"content": "125.00",
"value": "125.00",
"value_type": "number"
},
{
"bbox": [
876,
1038,
954,
1068
],
"content": "7.50",
"value": "7.50",
"value_type": "number"
},
{
"bbox": [
954,
1038,
1008,
1068
],
"content": "SR",
"value": null,
"value_type": null
},
{
"bbox": [
1068,
1038,
1158,
1068
],
"content": "132.50",
"value": "132.50",
"value_type": "number"
}
],
"type": "data"
},
{
"cells": [
{
"bbox": [
186,
1062,
408,
1170
],
"content": "COLONOSCOPY PLUS/MINUS\nIBCOPIO/DESTRUCTION OF\nLKSOON- TREATMENT FEE",
"value": "COLONOSCOPY PLUS/MINUS IBCOPIO/DESTRUCTION OF LKSOON- TREATMENT FEE",
"value_type": "text"
},
{
"bbox": [
414,
1062,
480,
1170
],
"content": "H2000",
"value": "H2000",
"value_type": "text"
},
{
"bbox": [
558,
1062,
648,
1170
],
"content": "1,125.00\n125.00\n--o.",
"value": null,
"value_type": null
},
{
"bbox": [
648,
1062,
750,
1170
],
"content": "(112.50)\n0.00\n-.--",
"value": null,
"value_type": null
},
{
"bbox": [
744,
1062,
870,
1170
],
"content": "1,012.50\n125.00\n-----",
"value": null,
"value_type": null
},
{
"bbox": [
876,
1062,
954,
1170
],
"content": "60.75\n7.50\n.--",
"value": null,
"value_type": null
},
{
"bbox": [
954,
1062,
1008,
1170
],
"content": "5R\n-",
"value": "5",
"value_type": "number"
},
{
"bbox": [
1068,
1062,
1158,
1170
],
"content": "1,073.25\n132.50\n---.0-",
"value": null,
"value_type": null
}
],
"type": "data"
},
{
"cells": [
{
"bbox": [
186,
1158,
408,
1194
],
"content": "OGDS MITH BIOPSY",
"value": "OGDS MITH BIOPSY",
"value_type": "text"
},
{
"bbox": [
414,
1158,
480,
1194
],
"content": "G6500",
"value": "G6500",
"value_type": "text"
},
{
"bbox": [
558,
1158,
648,
1194
],
"content": "405.00",
"value": "405.00",
"value_type": "number"
},
{
"bbox": [
648,
1158,
750,
1194
],
"content": "40.50)\nT0.2v",
"value": null,
"value_type": null
},
{
"bbox": [
744,
1158,
870,
1194
],
"content": "364.50",
"value": "364.50",
"value_type": "number"
},
{
"bbox": [
876,
1158,
954,
1194
],
"content": "21.87",
"value": "21.87",
"value_type": "number"
},
{
"bbox": [
954,
1158,
1008,
1194
],
"content": "SR",
"value": null,
"value_type": null
},
{
"bbox": [
1068,
1158,
1158,
1194
],
"content": "386.37",
"value": "386.37",
"value_type": "number"
}
],
"type": "data"
},
{
"cells": [
{
"bbox": [
186,
1182,
408,
1218
],
"content": "EKOOI - TREATMENT FEE",
"value": "EKOOI - TREATMENT FEE",
"value_type": "text"
},
{
"bbox": [
414,
1182,
480,
1218
],
"content": "",
"value": "",
"value_type": "text"
},
{
"bbox": [
558,
1182,
648,
1218
],
"content": "125.00",
"value": "125.00",
"value_type": "number"
},
{
"bbox": [
648,
1182,
750,
1218
],
"content": "0.00",
"value": "0.00",
"value_type": "number"
},
{
"bbox": [
744,
1182,
870,
1218
],
"content": "125.00",
"value": "125.00",
"value_type": "number"
},
{
"bbox": [
876,
1182,
954,
1218
],
"content": "7.50",
"value": "7.50",
"value_type": "number"
},
{
"bbox": [
954,
1182,
1008,
1218
],
"content": "SR",
"value": null,
"value_type": null
},
{
"bbox": [
1068,
1182,
1158,
1218
],
"content": "132.50",
"value": "132.50",
"value_type": "number"
}
],
"type": "data"
}
]
}
]`
在-> Excel或csv中的预期输出
说明M4A \ n代码AMOUNT \ n(RM)折扣\ n0RM)总金额\ n(RM) EKOOI-咨询90.00
import json
f = open('input_json_file.json')
data = json.load(f)
print(data['tables'][0]["rows"][0]["cells"][0]["content"])
print(data['tables'][0]["rows"][0]["cells"][1]["content"])
答案 0 :(得分:0)
此代码将json转换为CSV。
import json
import sys
import os
from collections import OrderedDict
def createOp(ipJsonFile, opCsvFile):
jsonData = None;
with open(ipJsonFile) as jFH:
jsonData = json.load(jFH, object_pairs_hook=OrderedDict)
oFH = open(opCsvFile,"w");
for row in jsonData["tables"][0]["rows"]:
#print "id:",id, row;
line = "";
for col in row["cells"]:
#print col["value"];
val = col["value"];
if val != None:
val = val.replace("\n","-");
line += str(val) + ",";
#print line;
oFH.write(line + "\n")
oFH.close();
def main(args):
createOp(args[1], args[2]);
if __name__ == '__main__':
if len(sys.argv) < 3:
print "json_to_csv.py <ip json file> <op csv file>";
sys.exit(-1);
main(sys.argv)
像这样运行此代码
python json_to_csv.py <ip json> <op csv file>
我更改了代码(仅1行)以在Python3上运行
import json
import sys
import os
from collections import OrderedDict
def createOp(ipJsonFile, opCsvFile):
jsonData = None;
with open(ipJsonFile) as jFH:
jsonData = json.load(jFH, object_pairs_hook=OrderedDict)
oFH = open(opCsvFile,"w");
for row in jsonData["tables"][0]["rows"]:
#print "id:",id, row;
line = "";
for col in row["cells"]:
#print col["value"];
val = col["value"];
if val != None:
val = val.replace("\n","-");
line += str(val) + ",";
#print line;
oFH.write(line + "\n")
oFH.close();
def main(args):
createOp(args[1], args[2]);
if __name__ == '__main__':
if len(sys.argv) < 3:
print("json_to_csv.py <ip json file> <op csv file>");
sys.exit(-1);
main(sys.argv)
它正常工作。
答案 1 :(得分:0)
您只需要从json文件中提取一些字段,然后将其转换为csv文件。但是由于您的某些字段包含特殊字符(\n
),因此我强烈建议您使用csv模块来编写csv文件。
代码可以很简单:
import json
import csv
with open('input.json') as jsonfile, open('output.csv', 'w', encoding='utf-8',
'newline'='') as csvfile:
js = json.load(jsonfile)
wr = csv.writer(csvfile)
for table in js['tables']:
for row in table['rows']:
wr.writerow([cell['content'] for cell in row['cells']])
如果您的Excel的本地化版本使用的分隔符不是逗号(,
),例如分号(;
),则只需声明是否使用csv.writer
:< / p>
wr = csv.writer(csvfile, delimiter=';')
答案 2 :(得分:0)
可以使用菜单选项将任何格式正确的JSON导入excel单元中。 (可以使用VBA,但不是必需的)。 使用“数据”选项卡打开对存储JSON的文件的查询。 电源查询编辑器窗口应打开,并带有导入的名称和值。 JSON格式的值通常显示为包含单词“ Record”的单元格。通过根据需要使用上下文菜单进行转换,或使用展开图标进行向下钻取以转换为列。 选择“关闭并加载”以将转换后的数据打开到新的工作表中。