我试图绘制犯罪趋势,所以我从Mongo DB中读取数据,然后在python中进行处理:
reader.py
def read_from_csv_and_push_to_db(path):
data = pd.read_csv(path)
payload = json.loads(data.to_json(orient='records'))
db_connection.write_to_mongo('Course', 'crimes', payload)
def read_from_csv_and_push_to_db(path):
data = pd.read_csv(path)
payload = json.loads(data.to_json(orient='records'))
db_connection.write_to_mongo('Course', 'crimes', payload)
def process_data(data):
df = pd.DataFrame(data)
print(df.head(5))
df['time_hour'] = df['Date'].apply(lambda x: arrow.get(x, "MM/DD/YYYY HH:mm:ss A").format("HH"))
df['month'] = df['Date'].apply(lambda x: arrow.get(x, "MM/DD/YYYY HH:mm:ss A").format("MM"))
df['day'] = df['Date'].apply(lambda x: arrow.get(x, "MM/DD/YYYY HH:mm:ss A").format("DD"))
df['year'] = df['Date'].apply(lambda x: arrow.get(x, "MM/DD/YYYY HH:mm:ss A").format("YYYY"))
return df
# read from mongo and processing
data = db_connection.read_from_mongo('Course', 'crimes', 1000)
df_final = process_data(data)
接下来,我在trends.py
中使用此df_final来绘制所有内容
import matplotlib.pyplot as plt
def plot_trend_crimes_per_month():
df_final.resample('M').size().plot(legend=False)
plt.title('Number of crimes per month in US 2012-2017')
plt.xlabel('Month')
plt.ylabel('Number of crimes')
plt.savefig('img/number_of_crimes.png', format='png')
这是错误:
File "/Users/ilya/PycharmProjects/coursework/trends.py", line 20, in <module>
plot_trend_crimes_per_month()
File "/Users/ilya/PycharmProjects/coursework/trends.py", line 14, in plot_trend_crimes_per_month
df_final.resample('M').size().plot(legend=False)
File "/Users/ilya/PycharmProjects/coursework/venv/lib/python2.7/site-packages/pandas/core/generic.py", line 8155, in resample
base=base, key=on, level=level)
File "/Users/ilya/PycharmProjects/coursework/venv/lib/python2.7/site-packages/pandas/core/resample.py", line 1250, in resample
return tg._get_resampler(obj, kind=kind)
File "/Users/ilya/PycharmProjects/coursework/venv/lib/python2.7/site-packages/pandas/core/resample.py", line 1380, in _get_resampler
"but got an instance of %r" % type(ax).__name__)
TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'RangeIndex'
我试图将所有内容都转换为日期时间,但徒劳无功。 数据集看起来像这样
ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
3,10508693,HZ250496,05/03/2016 11:40:00 PM,013XX S SAWYER AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,True,True,1022,10.0,24.0,29.0,08B,1154907.0,1893681.0,2016,05/10/2016 03:56:50 PM,41.864073157,-87.706818608,"(41.864073157, -87.706818608)"
89,10508695,HZ250409,05/03/2016 09:40:00 PM,061XX S DREXEL AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,RESIDENCE,False,True,313,3.0,20.0,42.0,08B,1183066.0,1864330.0,2016,05/10/2016 03:56:50 PM,41.782921527,-87.60436317,"(41.782921527, -87.60436317)"
感谢您的帮助!