我的CSV文件如下:
destination_name,destination_code,destination_arrival_scheduled,destination_arrival_actual,origin_name,origin_code,origin_departure_scheduled,origin_departure_actual,train_date,train_code
Pune,Pun,11:50:00,11:52:05,Delhi,Del,10:30:00,10:30:18,17-Jan-17,D220
Mumbai,mum,11:50:00,11:52:05,Delhi,Del,10:30:00,10:30:18,17-Jan-17,D320
Delhi,del,11:50:00,11:52:05,Indore,Ind,10:30:00,10:30:18,17-Jan-17,D320
need to from Json as below
[
{
"origin": {
"code": "Del",
"name": "Delhi",
"departure": {
"scheduled": "10:30:00",
"diff": "0:00:18",
"actual": "10:30:18"
}
},
"destination": {
"arrival": {
"scheduled": "11:50:00",
"diff": "0:02:05",
"actual": "11:52:05"
},
"code": "Pun",
"name": "Pune"
},
"train": {
"date": "17-Jan-17",
"code": "D220"
}
}
下面是我的代码Python代码
import csv,itertools,json,re
csvfile =open("C:\\Users\\Amit\\train_data.csv","r")
json_file = "C:\\Users\\Amit\\file.json";
data = []
obj ={}
train={}
for row in csv.DictReader(csvfile):
train['date']=row['train_date']
#print(train)
data.append(train)
print(data)
数据具有正确数量的元素,但数据使用最后一条记录进行更新。请帮我将所有值存储在数据[]中的列车{}?
答案 0 :(得分:0)
受pandas.io.json.json_normalize
的启发,它从嵌套的字典数组中提取数据,我认为我们可以做类似的事情但是还原了:
data = '''\
destination_name,destination_code,destination_arrival_scheduled,destination_arrival_actual,origin_name,origin_code,origin_departure_scheduled,origin_departure_actual,train_date,train_code
Pune,Pun,11:50:00,11:52:05,Delhi,Del,10:30:00,10:30:18,17-Jan-17,D220
Mumbai,mum,11:50:00,11:52:05,Delhi,Del,10:30:00,10:30:18,17-Jan-17,D320
Delhi,del,11:50:00,11:52:05,Indore,Ind,10:30:00,10:30:18,17-Jan-17,D320'''
import pandas as pd
import json
dates = ['destination_arrival_actual', 'destination_arrival_scheduled',
'origin_departure_actual', 'origin_departure_scheduled','train_date']
# Read your data to a pandas Dataframe (and parse dates)
df = pd.read_csv(pd.compat.StringIO(data),parse_dates=dates)
# Create new columns with diff in seconds
df['destination_arrival_diff'] = round((df[dates[0]]-df[dates[1]]).dt.total_seconds())
df['origin_departure_diff'] = round((df[dates[2]]-df[dates[3]]).dt.total_seconds())
# Define a merge function
# https://stackoverflow.com/a/29847323/7386332
def merge(d1, d2):
for k in d2:
if k in d1 and isinstance(d1[k], dict) and isinstance(d2[k], dict):
merge(d1[k], d2[k])
else:
d1[k] = d2[k]
# Loop through rows and add dictionaries to array output
# https://stackoverflow.com/a/30135649/7386332
splitkey = "_"
output = []
for ind, row in df.iterrows():
d = {}
for k in row.index:
merge(d,reduce(lambda res, cur: {cur: res}, reversed(k.split(splitkey)), str(row[k])))
output.append(d)
print(json.dumps(output,indent=2))
返回:
[
{
"destination": {
"name": "Pune",
"code": "Pun",
"arrival": {
"actual": "2017-12-10 11:52:05",
"scheduled": "2017-12-10 11:50:00",
"diff": "125.0"
}
},
"origin": {
"name": "Delhi",
"code": "Del",
"departure": {
"actual": "2017-12-10 10:30:18",
"scheduled": "2017-12-10 10:30:00",
"diff": "18.0"
}
},
"train": {
"code": "D220",
"date": "2017-01-17 00:00:00"
}
},
{
"destination": {
"name": "Mumbai",
"code": "mum",
"arrival": {
"actual": "2017-12-10
...
}