大熊猫DataFrame直方图如何获得月份或日期订单?

时间:2018-05-01 12:25:48

标签: pandas datetime dataframe histogram pandas-groupby

我希望有一组函数可以在DataFrame中为可能对应于天,月或小时的容器绘制变量的直方图。当我尝试这样做时,我最终得到的图表在时间上应该按字母顺序列出水平轴上的日期或月份。该怎么做?

我的MWE代码如下,适用于Jupyter笔记本:

import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
plt.rcParams["figure.figsize"] = (17, 10)

df = pd.DataFrame(
         [
             ["2017-01", 1],
             ["2017-01", 1],
             ["2017-01", 1],
             ["2017-02", 1],
             ["2017-03", 1],
             ["2017-03", 1],
             ["2017-03", 1],
             ["2017-03", 1],
             ["2017-04", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-07", 1],
             ["2017-07", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-10", 1],
             ["2017-10", 1],
             ["2017-10", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-12", 1]
         ],
         columns = ["datetime", "score"]
    )

df["datetime"]     = pd.to_datetime(df["datetime"])
#df["hour"]         = df["datetime"].dt.hour
#df["weekday_name"] = df["datetime"].dt.weekday_name
df["month_name"]   = df["datetime"].dt.strftime("%B")
df.index           = df["datetime"]
del df["datetime"]
df.head()

# ----------

def histogram_hour_counts(df, variable):
    """
    Create a day-long histogram of counts of the variable for each hour. It is
    assumed that the DataFrame index is datetime and that the variable
    `hour` exists.
    """
    if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
        log.error("index is not datetime")
        return False
    if not "hour" in df.columns:
        log.error("field hour not found in DataFrame")
        return False
    counts = df.groupby(by = "hour")[variable].count()
    counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)

def histogram_day_counts(df, variable):
    """
    Create a week-long histogram of counts of the variable for each day. It is
    assumed that the DataFrame index is datetime and that the variable
    `weekday_name` exists.
    """
    if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
        log.error("index is not datetime")
        return False
    if not "weekday_name" in df.columns:
        log.error("field weekday_name not found in DataFrame")
        return False
    counts = df.groupby(by = "weekday_name")[variable].count()
    counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)

def histogram_month_counts(df, variable):
    """
    Create a year-long histogram of counts of the variable for each month. It is
    assumed that the DataFrame index is datetime and that the variable
    `month_name` exists.
    """
    if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
        log.error("index is not datetime")
        return False
    if not "month_name" in df.columns:
        log.error("field month_name not found in DataFrame")
        return False
    counts = df.groupby(by = "month_name")[variable].count()
    counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)

# ----------

histogram_month_counts(variable = "score", df = df)

2 个答案:

答案 0 :(得分:2)

您可以通过groupby-count语句将angular.element('#INPUT_ID').trigger('change'); import calendar的结果与reindex进行caleandar.month_name,如下所示:

来自calendar documentation

注意

  

calendar.month_name一个数组,表示一年中的月份   当前的语言环境。这遵循1月份的正常惯例   月号为1,因此它的长度为13,而month_name [0]为   空字符串。

import matplotlib.pyplot as plt
import pandas as pd
import calendar
%matplotlib inline
plt.rcParams["figure.figsize"] = (17, 10)

df = pd.DataFrame(
         [
             ["2017-01", 1],
             ["2017-01", 1],
             ["2017-01", 1],
             ["2017-02", 1],
             ["2017-03", 1],
             ["2017-03", 1],
             ["2017-03", 1],
             ["2017-03", 1],
             ["2017-04", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-05", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-06", 1],
             ["2017-07", 1],
             ["2017-07", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-08", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-09", 1],
             ["2017-10", 1],
             ["2017-10", 1],
             ["2017-10", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-11", 1],
             ["2017-12", 1]
         ],
         columns = ["datetime", "score"]
    )

df["datetime"]     = pd.to_datetime(df["datetime"])
#df["hour"]         = df["datetime"].dt.hour
#df["weekday_name"] = df["datetime"].dt.weekday_name
df["month_name"]   = df["datetime"].dt.strftime("%B")
df.index           = df["datetime"]
del df["datetime"]
df.head()

# ----------

def histogram_hour_counts(df, variable):
    """
    Create a day-long histogram of counts of the variable for each hour. It is
    assumed that the DataFrame index is datetime and that the variable
    `hour` exists.
    """
    if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
        log.error("index is not datetime")
        return False
    if not "hour" in df.columns:
        log.error("field hour not found in DataFrame")
        return False
    counts = df.groupby(by = "hour")[variable].count()
    counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)

def histogram_day_counts(df, variable):
    """
    Create a week-long histogram of counts of the variable for each day. It is
    assumed that the DataFrame index is datetime and that the variable
    `weekday_name` exists.
    """
    if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
        log.error("index is not datetime")
        return False
    if not "weekday_name" in df.columns:
        log.error("field weekday_name not found in DataFrame")
        return False
    counts = df.groupby(by = "weekday_name")[variable].count()
    counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)

def histogram_month_counts(df, variable):
    """
    Create a year-long histogram of counts of the variable for each month. It is
    assumed that the DataFrame index is datetime and that the variable
    `month_name` exists.
    """
    if not df.index.dtype in ["datetime64[ns]", "<M8[ns]", ">M8[ns]"]:
        log.error("index is not datetime")
        return False
    if not "month_name" in df.columns:
        log.error("field month_name not found in DataFrame")
        return False
    counts = df.groupby(by = "month_name")[variable].count().reindex(calendar.month_name[1:])
    counts.plot(kind = "bar", width = 1, rot = 0, alpha = 0.7)

# ----------

histogram_month_counts(variable = "score", df = df)

输出:

enter image description here

答案 1 :(得分:0)

counts = df.groupby(by =“month_name”)[variable] .count()

您正在按month_name进行分组,这可能也是排序发生的地方。

我认为您需要按索引排序,因为这是您的日期时间数据。