如何删除
" IndexError:列表索引超出范围"错误
import yaml
import pandas as pd
import numpy as np
import glob
'''Get All the file_names into List'''
all_files=glob.glob('D:\\Dbda\\Project\\Data_Cleaning\\Test_Data\\*.yaml')
file_count=0
all_stats_list=[]
all_score_list=[]
'''Get Only Statistics Data into Stat DataFrame'''
for file_name in all_files:
with open(file_name, 'r') as f:
file_count += 1
print(file_count)
df_stats = pd.io.json.json_normalize(yaml.load(f))
all_stats_list.append(df_stats)
print(all_stats_list)
df_all_stats=pd.concat(all_stats_list)
df_all_stats=df_all_stats[df_all_stats.columns.difference(['innings'])]
df_all_stats.drop(columns=['meta.created','meta.data_version','meta.revision'],inplace=True)
'''df_all_stats.columns=['city', 'date', 'gender', 'match_type',
'outcome.by.runs', 'outcome.winner', 'overs',
'player_of_match', 'teams', 'decision',
'winner', 'umpires', 'venue']
'''
df_all_stats.to_csv('final1.0Stats.csv')
'''Get only Innings_Score Data into Innings_Score Dataframe'''
file_count=0
for file_name in all_files:
file_count += 1
print(file_count)
d = yaml.load(open(file_name))
for i in d['innings']:
df_scores = pd.DataFrame(i[list(i.keys())[0]])
df_scores = pd.io.json.json_normalize(pd.DataFrame(df_scores.deliveries.tolist()).stack().tolist()).assign(team=df_scores.team,date=df_scores['info.dates'])
all_score_list.append(df_scores)
df_all_scores=pd.concat(all_score_list)
df_all_scores.to_csv('final1.0Scores.csv')
我想在dataframe" df_all_scores"中添加date_column,其中数据列将从其他数据帧" df_all_stats"中获取。 匹配文件中的两个局都应该有一个列日期。 为了优化,我正在考虑合并这些或循环,然后从" df_all_stats"将日期列放入" df_all_scores"。
CSV文件在 https://drive.google.com/open?id=13MfFmlQFPo1Dl0ixA1mAkTliJOfqqOYH