def load_data():
data = pd.read_csv('data-WDI/WDI_Data.csv')
print('filling na..')
data.fillna(data.groupby(['Country Name', 'Indicator Name']).transform('mean'), inplace=True)
data.set_index(['Country Name', 'Indicator Name'], inplace=True)
data = data.to_panel()
# save only data for the years, the rest is redundant
years = [str(year) for year in range(1960, 2016)]
data = data[years]
return prepare_test_and_train(data)
该程序可以归结为缺失值并挂起。如果我没有将它们归咎于它们,它的运行速度相对较快。它为什么挂?群体应该花那么长时间吗?