函数coinT()
使用ADF测试和Hurst指数测试两个时间序列是否静止。时间序列存储在1511x6 CSV文件中,但是为了测试,函数stock()
仅返回第5列的向量。共有50个文件。似乎该程序使用了太多内存,因为它在运行约30秒后使PC崩溃。它适用于15个文件,但在较大的集合(> 50)上崩溃。
有人可以帮我找出使用这么多内存的内容吗?我尝试将计算分成多个函数并删除对象,但它没有多大帮助。
import numpy as np
import pandas as pd
import statsmodels.tsa.stattools as ts
import csv
import timeit
from numpy import log, polyfit, sqrt, std, subtract
from pandas.stats.api import ols
import os
src = 'C:/Users/PC/Desktop/Magistr/Ibpython/testing/'
filenames = next(os.walk(src))[2] #load all stock file names into array
cointegratedPairs = []
def hurst(ts):
"""Returns the Hurst Exponent of the time series vector ts
H<0.5 - The time series is mean reverting
H=0.5 - The time series is a Geometric Brownian Motion
H>0.5 - The time series is trending"""
# Create the range of lag values
lags = range(2, 100)
# Calculate the array of the variances of the lagged differences
tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
# Use a linear fit to estimate the Hurst Exponent
poly = polyfit(log(lags), log(tau), 1)
del lags
del tau
# Return the Hurst exponent from the polyfit output
return poly[0]*2.0
#Convert file into an array
def stock(filename):
#read file into array and get it's length
delimiter = ","
with open(src + filename,'r') as dest_f:
data_iter = csv.reader(dest_f,
delimiter = delimiter,
quotechar = '"')
data = [data for data in data_iter]
data_array = np.asarray(data)[:,5]
return data_array
del data
del data_array
#Check if two time series are cointegrated
def coinTest(itemX, itemY):
indVar = map(float, stock(itemX)[0:1000]) #2009.05.22 - 2013.05.14
depVar = map(float, stock(itemY)[0:1000]) #2009.05.22 - 2013.05.14
#Calculate optimal hedge ratio "beta"
df = pd.DataFrame()
df[itemX] = indVar
df[itemY] = depVar
res = ols(y=df[itemY], x=df[itemX])
beta_hr = res.beta.x
alpha = res.beta.intercept
df["res"] = df[itemY] - beta_hr*df[itemX] - alpha
#Calculate the CADF test on the residuals
cadf = ts.adfuller(df["res"])
#Reject the null hypothesis at 1% confidence level
if cadf[4]['1%'] > cadf[0]:
#Hurst exponent test if residuals are mean reverting
if hurst(df["res"]) < 0.4:
cointegratedPairs.append((itemY,itemX))
del indVar
del depVar
del df[itemX]
del df[itemY]
del df["res"]
del cadf
#Main function
def coinT():
limit = 0
TotalPairs = 0
for itemX in filenames:
for itemY in filenames[limit:]:
TotalPairs +=1
if itemX == itemY:
next
else:
coinTest(itemX, itemY)
limit +=1