我需要帮助,这是迄今为止我所取得的成就
csvfile = open('new_df.csv', 'r')
jsonfile = open('new_df.csv'.replace('.csv','.json'), 'w')
jsonfile.write('{"' + 'new_df.csv'.replace('.csv','') + '": [\n') # write json parent of data list
fieldnames = csvfile.readline().replace('\n','').split(',') # get fieldnames from first line of csv
num_lines = sum(1 for line in open('new_df.csv')) - 1 # count total lines in csv minus header row
reader = csv.DictReader(csvfile, fieldnames)
i = 0
for row in reader:
i += 1
json.dump(row, jsonfile)
if i < num_lines:
jsonfile.write(',')
jsonfile.write('\n')
jsonfile.write(']}')
ID Arrival Departure ArrivalDate DepatureDate
1001 New York Holland 2009-09-23 2012-07-23
1301 Florida Germany 2010-10-23 2012-10-11
1401 New York Holland 2009-09-23 2009-09-25
1301 New York Beijing 2009-09-23 2010-09-21
1201 New York Holland 2008-01-01 2009-09-23
1001 Virginia New York 2008-01-01 2009-09-22
1021 New York Holland 2009-09-23 2009-09-25
1001 New York Holland 2009-09-24 2012-07-23
1021 New York Holland 2009-09-26 2012-07-23
1001 New York Holland 2009-09-25 2012-07-23
….... ......... ........ .............. ...........
1001 New York Holland 2012-07-23 2012-07-23
1401 New York Holland 2009-09-25 2009-09-25
1301 New York Beijing 2010-09-21 2010-09-21
1201 New York Holland 2009-09-23 2009-09-23
1001 Virginia New York 2009-09-22 2009-09-22
1021 New York Holland 2009-09-25 2009-09-25
1001 New York Holland 2012-07-23 2012-07-23
1021 New York Holland 2012-07-23 2012-07-23
1001 New York Holland 2012-07-23 2012-07-23
遍历ArrivalDate
并追加等效行,如下所示,然后是预期输出:
即
"{ArrivalDate:" { "Arrival": ID1,...,IDn
"Departure": ID1,...,IDn
}
(我需要在 DepartureDate 下查看是否与 ArrivalDate匹配,如果匹配,我想将其ID附加到出发处,否则“到达”只会列出
{
“2009-09-23”:
{ “New York”: 1001, 1401, 1301, 1021,
“Holland” : 1021,
“Beijing”: 1301,
}
{ “2010-10-23”:
{“Florida”: 1301,
}
{ “2008-01-01”:
{“New York”: 1201,
“Virginia”: 1001,
}
{“2009-09-24”:
{“New York”: 1001
}
{“2009-09-26”:
{“New York”: 1021
}
{“2009-09-25”:
{“New York”: 1001
“Holland”: 1401
}
{“ 2012-07-23”:
{ “New York”: 1001,
“Holland”: 1001,
答案 0 :(得分:2)
import json
json_dict = {}
for arrival_date, data in df.groupby('ArrivalDate'):
matching_dates = data[data.DepatureDate==arrival_date]
not_matching_dates = data[data.DepatureDate!=arrival_date]
json_dict[arrival_date.strftime('%Y-%m-%d')] = {}
if not matching_dates.empty:
for city, flights in matching_dates.groupby('Arrival'):
json_dict[arrival_date.strftime('%Y-%m-%d')][city] = [str(v) for v in flights.ID.to_dict().values()]
if not not_matching_dates.empty:
for city, flights in not_matching_dates.groupby('Departure'):
json_dict[arrival_date.strftime('%Y-%m-%d')][city] = [str(v) for v in flights.ID.to_dict().values()]
假设您想要json
输出:
print(json.dumps(json_dict, indent=4, sort_keys=True))
{
"2008-01-01": {
"Holland": [
"1201"
],
"New York": [
"1001"
]
},
"2009-09-22": {
"Virginia": [
"1001"
]
},
"2009-09-23": {
"Beijing": [
"1301"
],
"Holland": [
"1001",
"1401",
"1021"
],
"New York": [
"1201"
]
},
"2009-09-24": {
"Holland": [
"1001"
]
},
"2009-09-25": {
"Holland": [
"1001"
],
"New York": [
"1021",
"1001",
"1401"
]
},
"2009-09-26": {
"Holland": [
"1021"
]
},
"2010-09-21": {
"New York": [
"1301"
]
},
"2010-10-23": {
"Germany": [
"1301"
]
},
"2012-07-23": {
"New York": [
"1001",
"1001",
"1021",
"1001"
]
}
}