重采样功能在Linux下运行,但不适用于Windows

时间:2015-04-01 08:37:59

标签: python pandas time-series

我使用下面的python代码来处理timeserias数据,这个文件在linux系统下运行良好,但是寡妇无法运行:

featgen.py代码如下:

import os
import sys
import cPickle as pickle
import numpy as np
import pandas as pd
from pandas import DataFrame
from pprint import pprint
import csv

import talib
from talib import abstract
from talib import common
from talib import func

from featsel import Feature_Sel


class Feature_Gen():

    ###############################################################################
    def __init__(self, csv_path = './data/ZJIFMI201210-201410.csv', pkl_path = './data/data.pkl', resample_time = "10min"):
        self.csv_path = csv_path
        self.resample_time = resample_time
        self.pkl_path = os.path.join("data", "data_{}.pkl".format(resample_time))

    ###############################################################################
    def feature_gen(self):
        if os.path.exists(self.pkl_path):
            print 'read data from:', self.pkl_path
            data = pd.read_pickle(self.pkl_path)
        else:
            print 'read data from:', self.csv_path
            lines = sum(1 for _ in csv.reader(open(self.csv_path)))
            rs_num = 10000
            col_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adjust']
            rs = pd.read_csv(
                self.csv_path,
                header = None,
                index_col = 0,
                names = col_names,
                parse_dates = {'Timestamp':['Date', 'Time']},
                # skiprows = lines-rs_num
            )
            # print rs.head(10)
            # print rs.tail(10)

            #############################################################################
            ## resample
            if self.resample_time is None:
                data = rs.ix[:, 0:5]
                # print data.shape # OHLCV
                save_pkl_path = os.path.split(self.pkl_path)[0]+'/data.pkl'
            else:
                ***tt1 = rs.Close.resample(self.resample_time, how = 'ohlc')***
                Volume = rs.Volume.resample(self.resample_time, how = 'sum')
                tt1['volume'] = Volume
                data = tt1.dropna()
                # print data.shape # ohlcv
                print 'resample_time:', self.resample_time
                save_pkl_path = os.path.split(self.pkl_path)[0]+'/data_'+self.resample_time+'.pkl'
                print save_pkl_path

            ##################################### Feature Selection #####################################
            ###################################################################### need to discuss
            '''you can switch the feature selection function here'''
            data = Feature_Sel(data).feature_sel5()

            ###############################################################################
            # data = data.dropna()
            # data = data.fillna(method="bfill")

            ###############################################################################
            with open(save_pkl_path, "wb") as fp:
                pickle.dump(data, fp)

        ###############################################################################
        # import pylab as pl
        # pl.plot(data.values[:, 3])
        # pl.savefig("./data/Close.png")

        return data


if __name__ == '__main__':

    '''First, search the *pkl file, if not exist, search the *csv file.'''
    '''Default Parameters: csv_path = './data/ZJIFMI201210-201410.csv', pkl_path = './data/data.pkl', resample_time = None'''
    data = Feature_Gen().feature_gen()
    print data

在Windows下运行代码时,它返回错误:

  

文件“C:\ Anaconda \ sigming-task1-DEV \ featgen.py”,第57行,   feature_gen       tt1 = rs.Close.resample(self.resample_time,how ='ohlc')文件“C:\ Anaconda \ lib \ site-packages \ pandas \ core \ generic.py”,第3032行,   重采样       return sampler.resample(self)。 finalize (self)文件“C:\ Anaconda \ lib \ site-packages \ pandas \ tseries \ resample.py”,第105行,   在重新例子中       提出TypeError('仅对DatetimeIndex,TimedeltaIndex或PeriodIndex'有效)TypeError:仅对DatetimeIndex,TimedeltaIndex有效   或PeriodIndex

有什么好的建议吗?

0 个答案:

没有答案