我正在尝试从csv文件创建一个json文件。我还想在csv文件中对某些字段进行分组,并将它们组合在json文件中, 以下是我到目前为止的代码,但我不清楚如何将它们分组。
from csv import DictReader
import json
json_input_file="test.csv"
json_output_file="test.json"
# read csv for json conversion
def read_csv(file, json_file):
csv_rows = []
with open(json_input_file) as csvfile:
_reader = csv.DictReader(csvfile)
_title = _reader.fieldnames
for _row in _reader:
csv_rows.extend([{_title[i]:_row[_title[i]] for i in range(len(_title))}])
write_json(csv_rows, json_file)
# write json file
def write_json(data, json_file):
with open(json_file, "w") as F:
F.write(json.dumps(data, sort_keys=False, indent=4, separators=(',', ': '),encoding="utf-8",ensure_ascii=False))
# exec the conversion
read_csv(json_input_file, json_output_file)
我的csv文件如下所示:
brand_x, x_type, x_color, brand_y, y_type, y_color
x_code1, type1, green, y_code1, type200, orange
x_code1, type1, red, y_code1, type200, pink
x_code1, type1, black, y_code1, type200, yellow
x_code2, type20, blue, y_code2, type201, blue
x_code2, type20, red, y_code3, type202, black
x_code3, type1, white, y_code3, type202, black
x_code3, type1, blue, y_code3, type202, blue
我尝试将颜色分组,这是品牌和类型的一部分 例如 将属于brand_x的x_code1的所有颜色分组,即x_type的type1等。
以下是我正在寻找的json输出:
[
{
"brand_x": "x_code1",
"brand_y": "y_code1",
"x_type": "type1",
"y_type":"type200",
"x_type1_color": [
{
"x_color": "green"
},
{
"x_color": "red"
},
{
"x_color": "black"
}
],
"y_type200_color":[
{
"y_color":"orange"
},
{
"y_color": "pink"
},
{
"y_color": "yellow"
}
]
}
]
答案 0 :(得分:0)
熊猫似乎非常适合这种情况。这是一个近似的解决方案
我没有尝试完全匹配您的输出,因为您似乎有一些自定义映射,例如y_type200_color
,它似乎只是"y_type":"type200"
和y_color
的组合柱。我也认为这种格式比较整洁。
编辑通过展开for循环
使解决方案略显整洁import pandas as pd
import tempfile
import csv
import os
import json
###############
# CSV Setup #
###############
tmp = tempfile.NamedTemporaryFile(delete=False)
raw_string = """brand_x,x_type,x_color,brand_y,y_type,y_color
x_code1,type1,green,y_code1,type200,orange
x_code1,type1,red,y_code1,type200,pink
x_code1,type1,black,y_code1,type200,yellow
x_code2,type20,blue,y_code2,type201,blue
x_code2,type20,red,y_code3,type202,black
x_code3,type1,white,y_code3,type202,black
x_code3,type1,blue,y_code3,type202,blue"""
raw_data = [line.split(',') for line in raw_string.split()]
# Open the file for writing.
with open(tmp.name, 'w') as f:
csv_writer = csv.writer(f)
csv_writer.writerows(raw_data)
tmp.close()
##############
# Solution #
##############
# make a pandas data frame from csv
df = pd.read_csv(tmp.name)
# what columns will you use as index
index_columns = ["brand_x", "x_type"]
df = df.set_index(index_columns)
# select rows by index
df = df.loc[("x_code1", "type1")]
# reset index so that it will be included in our output
df = df.reset_index()
# messy line that matches columns to their values. The list(set(x) makes it so values are unique but also json serializable
output = dict()
for k, v in df.to_dict("list").items():
# unique values only
v = list(set(v))
if len(v) <= 1:
v = v[0]
output[k] = v
print(json.dumps(output, indent=4))
##############
# Clean up #
##############
os.remove(tmp.name)
输出:
{
"brand_x": "x_code1",
"x_color": [
"red",
"green",
"black"
],
"brand_y": "y_code1",
"x_type": "type1",
"y_color": [
"pink",
"orange",
"yellow"
],
"y_type": "type200"
}
答案 1 :(得分:0)
我实施了一些Alter的代码,但做了一些重大改动:
import json
import io
import pandas as pd
csv = """brand_x,x_type,x_color,brand_y,y_type,y_color
x_code1,type1,green,y_code1,type200,orange
x_code1,type1,red,y_code1,type200,pink
x_code1,type1,black,y_code1,type200,yellow
x_code2,type20,blue,y_code2,type201,blue
x_code2,type20,red,y_code3,type202,black
x_code3,type1,white,y_code3,type202,black
x_code3,type1,blue,y_code3,type202,blue"""
df = pd.read_csv(io.StringIO(csv))
for item in list(df.groupby(by=[i for i in df.columns if not i.endswith("color")])):
df_temp = item[1]
# messy line that matches columns to their values. The list(set(x) makes it so values are unique but also json serializable
a = {k : (list(set(v)) if len(set(v)) > 1 else list(set(v))[0]) for k, v in df_temp.to_dict("list").items()}
print(json.dumps(a, indent=4))
打印:
{
"y_type": "type200",
"brand_y": "y_code1",
"x_type": "type1",
"y_color": [
"pink",
"orange",
"yellow"
],
"brand_x": "x_code1",
"x_color": [
"red",
"green",
"black"
]
}
{
"y_type": "type201",
"brand_y": "y_code2",
"x_type": "type20",
"y_color": "blue",
"brand_x": "x_code2",
"x_color": "blue"
}
{
"y_type": "type202",
"brand_y": "y_code3",
"x_type": "type20",
"y_color": "black",
"brand_x": "x_code2",
"x_color": "red"
}
{
"y_type": "type202",
"brand_y": "y_code3",
"x_type": "type1",
"y_color": [
"black",
"blue"
],
"brand_x": "x_code3",
"x_color": [
"white",
"blue"
]
}