featgen.py代码如下:
import os
import sys
import cPickle as pickle
import numpy as np
import pandas as pd
from pandas import DataFrame
from pprint import pprint
import csv
import talib
from talib import abstract
from talib import common
from talib import func
from featsel import Feature_Sel
class Feature_Gen():
###############################################################################
def __init__(self, csv_path = './data/ZJIFMI201210-201410.csv', pkl_path = './data/data.pkl', resample_time = "10min"):
self.csv_path = csv_path
self.resample_time = resample_time
self.pkl_path = os.path.join("data", "data_{}.pkl".format(resample_time))
###############################################################################
def feature_gen(self):
if os.path.exists(self.pkl_path):
print 'read data from:', self.pkl_path
data = pd.read_pickle(self.pkl_path)
else:
print 'read data from:', self.csv_path
lines = sum(1 for _ in csv.reader(open(self.csv_path)))
rs_num = 10000
col_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adjust']
rs = pd.read_csv(
self.csv_path,
header = None,
index_col = 0,
names = col_names,
parse_dates = {'Timestamp':['Date', 'Time']},
# skiprows = lines-rs_num
)
# print rs.head(10)
# print rs.tail(10)
#############################################################################
## resample
if self.resample_time is None:
data = rs.ix[:, 0:5]
# print data.shape # OHLCV
save_pkl_path = os.path.split(self.pkl_path)[0]+'/data.pkl'
else:
***tt1 = rs.Close.resample(self.resample_time, how = 'ohlc')***
Volume = rs.Volume.resample(self.resample_time, how = 'sum')
tt1['volume'] = Volume
data = tt1.dropna()
# print data.shape # ohlcv
print 'resample_time:', self.resample_time
save_pkl_path = os.path.split(self.pkl_path)[0]+'/data_'+self.resample_time+'.pkl'
print save_pkl_path
##################################### Feature Selection #####################################
###################################################################### need to discuss
'''you can switch the feature selection function here'''
data = Feature_Sel(data).feature_sel5()
###############################################################################
# data = data.dropna()
# data = data.fillna(method="bfill")
###############################################################################
with open(save_pkl_path, "wb") as fp:
pickle.dump(data, fp)
###############################################################################
# import pylab as pl
# pl.plot(data.values[:, 3])
# pl.savefig("./data/Close.png")
return data
if __name__ == '__main__':
'''First, search the *pkl file, if not exist, search the *csv file.'''
'''Default Parameters: csv_path = './data/ZJIFMI201210-201410.csv', pkl_path = './data/data.pkl', resample_time = None'''
data = Feature_Gen().feature_gen()
print data
在Windows下运行代码时,它返回错误:
文件“C:\ Anaconda \ sigming-task1-DEV \ featgen.py”,第57行, feature_gen tt1 = rs.Close.resample(self.resample_time,how ='ohlc')文件“C:\ Anaconda \ lib \ site-packages \ pandas \ core \ generic.py”,第3032行, 重采样 return sampler.resample(self)。 finalize (self)文件“C:\ Anaconda \ lib \ site-packages \ pandas \ tseries \ resample.py”,第105行, 在重新例子中 提出TypeError('仅对DatetimeIndex,TimedeltaIndex或PeriodIndex'有效)TypeError:仅对DatetimeIndex,TimedeltaIndex有效 或PeriodIndex
有什么好的建议吗?