Question

我正在尝试从python数据框中绘制时间序列。代码如下。

import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, YearLocator, MonthLocator
plt.style.use('ggplot')


def plot(df, filename, heading=None):

    fig, ax = plt.subplots(figsize=(8, 4))

    min_date = None
    max_date = None
    for col_name in df.columns.values:

        # plot the column
        col = df[col_name]
        col = col[col.notnull()] # drop NAs
        dates = [zzz.to_timestamp().date() for zzz in col.index]
        ax.plot_date(x=dates, y=col, fmt='-', label=col_name,
            tz=None, xdate=True, ydate=False, linewidth=1.5)

        # establish the date range for the data
        if min_date:
            min_date = min(min_date, min(dates))
        else:
            min_date = min(dates)
        if max_date:
            max_date = max(max_date, max(dates))
        else:
            max_date = max(dates)

    # give a bit of space at each end of the plot - aesthetics
    span = max_date - min_date
    extra = int(span.days * 0.03) * datetime.timedelta(days=1)
    ax.set_xlim([min_date - extra, max_date + extra])

    # format the x tick marks
    ax.xaxis.set_major_formatter(DateFormatter('%Y'))
    ax.xaxis.set_minor_formatter(DateFormatter('\n%b'))
    ax.xaxis.set_major_locator(YearLocator())
    ax.xaxis.set_minor_locator(MonthLocator(bymonthday=1, interval=2))

    # grid, legend and yLabel
    ax.grid(True)
    ax.legend(loc='best', prop={'size':'x-small'})
    ax.set_ylabel('Percent')

    # heading
    if heading:
        fig.suptitle(heading, fontsize=12)
    fig.tight_layout(pad=1.5)

    # footnote
    fig.text(0.99, 0.01, 'nse-timeseries-plot', ha='right',
        va='bottom', fontsize=8, color='#999999')

    # save to file
    fig.savefig(filename, dpi=125)


    url = "https://www.google.com/finance/historical?cid=207437&startdate=Jan%201%2C%201971&enddate=Jul%201%2C%202017&start={0}&num=30"
    how_many_pages=138
    start=0

    for i in range(how_many_pages):
        new_url = url.format(start)
        page = requests.get(new_url)
        soup = BeautifulSoup(page.content, "lxml")
        table = soup.find_all('table', class_='gf-table historical_price')[0]

        columns_header = [th.getText() for th in table.findAll('tr')[0].findAll('th')]
        data_rows=table.findAll('tr')[1:]
        data=[[td.getText() for td in data_rows[i].findAll(['td'])] for i in range(len(data_rows))]

        if start == 0:
            final_df = pd.DataFrame(data, columns=columns_header)
        else:
            df = pd.DataFrame(data, columns=columns_header)
            final_df = pd.concat([final_df, df],axis=0)
        start += 30
        final_df.to_csv('nse_data.csv', sep='\t', encoding='utf-8')


    plot(final_df,'nsetsplot')

当我运行代码时，我收到错误 AttributeError: 'numpy.int64' object has no attribute 'to_timestamp'

当我做的时候

dates = [zzz.to_timestamp().date() for zzz in col.index]

我在Windows 7（x86_64）上使用Anaconda 64位

Answer 1

显然，您的DataFrame的索引不是pandas.PeriodIndex。相反，索引显示保持整数。您发布的代码要求数据框的索引为PeriodIndex。 E.g。

In [36]: df
Out[36]: 
                a         b
2012-01  1.457900  7.084201
2012-02  1.775861  6.448277
2012-03  1.069051  7.861898

In [37]: df.index
Out[37]: PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]', freq='M')

当索引是正确的类型时，以下代码（类似于您发布的代码中的行）有效：

In [39]: dates = [zzz.to_timestamp().date() for zzz in df.index]

In [40]: dates
Out[40]: 
[datetime.date(2012, 1, 1),
 datetime.date(2012, 2, 1),
 datetime.date(2012, 3, 1)]

Answer 2

这可能是由于从excel导入数据框而导致的excel格式问题。我有一个类似的问题：日期在excel中看起来很好，但是在导入的数据框中却显示为整数（excel中日期的整数表示）。这为我解决了问题：我在excel中选择整个日期列，然后将日期格式应用于该列。之后，当我将其导入为数据框时，日期将作为日期显示出来。

AttributeError：＆＃39; numpy.int64＆＃39;对象没有属性＆＃39; to_timestamp＆＃39;

2 个答案: