fig, ax = plt.subplots()
hist, bins, patch_lst = ax.hist(post_records[:,1], bins=range(31)) # thought that bins could be a sequence, wanted to create 31 bins for 31 total days in a month
ax.plot(hist, bins)
ax.set_title(r'Histogram of Time')
plt.show() # shows nothing
def create_dataframe_of_datetime_objects_and_visualize():
datetime_lst = [1521071920000000000, 1521071901000000000, 1521071844000000000, 1521071741000000000, 1521071534000000000] # to get this variable I loaded my original dataframe with 1980000, sliced the first 5 entries, then printed out the 'datetime_obj_col'. I can't exactly remember what this format is called, I think it's unix time.
id_lst = [974013, 974072, 327212, 123890, 438201]
for each in range(len(datetime_lst)):
datetime_lst[each] = pd.to_datetime(datetime_lst[each], errors='coerce')
datetime_lst[each] = datetime_lst[each].strftime("%d-%b-%y %H:%M:%S")
datetime_lst[each] = pd.to_datetime(datetime_lst[each], errors='coerce', dayfirst=True, format="%d-%b-%y %H:%M:%S")
datetime_lst = pd.Series(datetime_lst)
df = pd.DataFrame({'tweet_id':id_lst, 'datetime_obj_col': datetime_lst})
gb_var = df.groupby(df["datetime_obj_col"].dt.month)
gb_var_count = gb_var.count()
回溯(最近通话最近): 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ core \ groupby \ groupby.py”中的第918行 结果= self._python_apply_general(f) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ core \ groupby \ groupby.py”,第936行,在_python_apply_general中 自轴) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ core \ groupby \ groupby.py”中的第2273行 res = f(群) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ core \ groupby \ groupby.py”,第541行,在f中 返回self.plot(* args,** kwargs) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ plot ting_core.py”,第2941行,在致电中 sort_columns = sort_columns,** kwds) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ plot ting_core.py”,行1977,在plot_frame中 ** kwds) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ plot ting_core.py“,第1804行,在_plot中 plot_obj.generate() 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ plot ting_core.py“,第266行,在generate中 self._post_plot_logic_common(ax,self.data) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ plot ting_core.py”,第405行,在_post_plot_logic_common中 self._apply_axis_properties(ax.yaxis,fontsize = self.fontsize) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ pandas \ plot ting_core.py”,第478行,在_apply_axis_properties中 标签= axis.get_majorticklabels()+ axis.get_minorticklabels() 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ matplotlib \ axis.py“,第1245行,在get_majorticklabels中 刻度= self.get_major_ticks() 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ matplotlib \ axis.py“,第1396行,在get_major_ticks中 numticks = len(self.get_major_locator()()) 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ matplotlib \ dates.py”,第1249行,在致电中 self.refresh() 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ matplotlib \ dates.py”,第1269行,刷新 dmin,dmax = self.viewlim_to_dt() 文件“ C:\ Users \ biney \ AppData \ Roaming \ Python \ Python36 \ site-packages \ matplotlib \ dates.py”,第1026行,在viewlim_to_dt中 .format(vmin)) ValueError:视图限制最小值0.0小于1,并且是无效的Matplotlib d 吃了价值。如果将非datetime值传递给h的轴,通常会发生这种情况 作为日期时间单位
这是称为创建日期时间对象的函数的代码。它将打开一个csv文件“ tweet_time_info_preprocessed.csv”,该文件只有三列:'tweet_id“” tweet_created_at_date
“和” tweet_created_at_hour.
然后我有 def create_datetime_objects():
with open("post_time_info_preprocessed.csv", 'r', encoding='utf8') as time_csv:
mycsv = csv.reader(time_csv)
progress = 0
for row in mycsv:
progress +=1
if progress == 1: #header row
if progress % 10000 == 0:
each_post_datetime_lst = []
time_str = str(row[1]) + " " + str(row[2])
a_date_object = pd.to_datetime(time_str, dayfirst=True, format="%d-%b-%y %H:%M:%S")
numpy_arr_of_tweets_and_datetimes = np.array(tweets_and_datetime_objs)
np.save(np_save_path, numpy_arr_of_tweets_and_datetimes)
因此,我将数据帧的5行切开并将其存储到def visualize_objects_histogram():
print("Visualizing timeplot as histogram")
post_records= np.load("tweets_and_datetime_objects.npy")
df = pd.DataFrame(data=post_records, columns=['post_id', 'datetime_obj_col'])
df_sliced = df[0:5]
fig, ax = plt.subplots()
hist, bins, patch_lst = ax.hist(df_sliced['datetime_obj_col'], bins=range(5))
ax.plot(hist, bins)
ax.set_title('Histogram of Time')
我将尝试这篇文章中的内容,看看是否有帮助:Can Pandas plot a histogram of dates?
编辑2: 先前的stackoverflow向我介绍了两种有用的方法,但是它们没有用。我将显示...功能更改为以下内容
tweet_id datetime_obj_col
0 974072352958042112 2018-03-14 23:58:40
1 974072272578166784 2018-03-14 23:58:21
2 974072032177598464 2018-03-14 23:57:24
3 974071601313533953 2018-03-14 23:55:41
4 974070732777914368 2018-03-14 23:52:14
def visualize_datetime_objects_with_pandas():
tweets_and_datetime_objects = np.load("tweets_and_datetime_objects.npy") # contains python datetime objects
print("with pandas")
df = pd.DataFrame(data=tweets_and_datetime_objects, columns=['tweet_id', 'datetimeobj'])
pandas_freq_dict = df['datetimeobj'].value_counts().to_dict()
plt.plot(pandas_freq_dict.keys(), pandas_freq_dict.values())
#df = df.set_index('datetimeobj')
# changing the index of this dataframe to a time index
#df['datetimeobj'].plot(kind='line', style=['--'])
