我正在尝试构建一种算法,该算法首先构建一个大约100个符号的幂集,不包括空集和重复元素。 然后,对于功率组列表中的每个项目,它将读取数据文件并评估夏普比率(返回/风险)。 然后将结果附加到列表中,最后程序给出最佳的夏普比率符号组合。
以下是代码:
import pandas as pd
import numpy as np
import math
from itertools import chain, combinations
import operator
import time as t
#ASSUMPTION
#EQUAL ALLOCATION OF RESOURCES
t0 = t.time()
start_date = '2016-06-01'
end_date = '2017-08-18'
allocation = 170000
usesymbols=['PAEL','TPL','SING','DCL','POWER','FCCL','DGKC','LUCK',
'THCCL','PIOC','GWLC','CHCC','MLCF','FLYNG','EPCL',
'LOTCHEM','SPL','DOL','NRSL','AGL','GGL','ICL','AKZO','ICI',
'WAHN','BAPL','FFC','EFERT','FFBL','ENGRO','AHCL','FATIMA',
'EFOODS','QUICE','ASC','TREET','ZIL','FFL','CLOV',
'BGL','STCL','GGGL','TGL','GHGL','OGDC','POL','PPL','MARI',
'SSGC','SNGP','HTL','PSO','SHEL','APL','HASCOL','RPL','MERIT',
'GLAXO','SEARL','FEROZ','HINOON','ABOT','KEL','JPGL','EPQL',
'HUBC','PKGP','NCPL','LPL','KAPCO','TSPL','ATRL','BYCO','NRL','PRL',
'DWSM','SML','MZSM','IMSL','SKRS','HWQS','DSFL','TRG','PTC','TELE',
'WTL','MDTL','AVN','NETSOL','SYS','HUMNL','PAKD',
'ANL','CRTM','NML','NCL','GATM','CLCPS','GFIL','CHBL',
'DFSM','KOSM','AMTEX','HIRAT','NCML','CTM','HMIM',
'CWSM','RAVT','PIBTL','PICT','PNSC','ASL',
'DSL','ISL','CSAP','MUGHAL','DKL','ASTL','INIL']
cost_matrix = []
def data(symbols):
dates=pd.date_range(start_date,end_date)
df=pd.DataFrame(index=dates)
for symbol in symbols:
df_temp=pd.read_csv('/home/furqan/Desktop/python_data/{}.csv'.format(str(symbol)),usecols=['Date','Close'],
parse_dates=True,index_col='Date',na_values=['nan'])
df_temp = df_temp.rename(columns={'Close': symbol})
df=df.join(df_temp)
df=df.fillna(method='ffill')
df=df.fillna(method='bfill')
return df
def mat_alloc_auto(symbols):
n = len(symbols)
mat_alloc = np.zeros((n,n), dtype='float')
for i in range(0,n):
mat_alloc[i,i] = allocation / n
return mat_alloc
def compute_daily_returns(df):
"""Compute and return the daily return values."""
daily_returns=(df/df.shift(1))-1
df=df.fillna(value=0)
daily_returns=daily_returns[1:]
daily_returns = np.array(daily_returns)
return daily_returns
def port_eval(matrix_alloc,daily_return_matrix):
risk_free = 0
amount_matrix = [allocation]
return_mat = np.dot(daily_return_matrix,matrix_alloc)
return_mat = np.sum(return_mat, axis=1, keepdims=True)
return_mat = np.divide(return_mat,amount_matrix)
mat_average = np.mean(return_mat)
mat_std = np.std(return_mat, ddof=1)
sharpe_ratio = ((mat_average-risk_free)/mat_std) * math.sqrt(252)
return return_mat, sharpe_ratio, mat_average
def powerset(iterable):
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1))
power_set = list(powerset(usesymbols))
len_power = len(power_set)
sharpe = []
for j in range(0, len_power):
df_01 = data(power_set[j])
matrix_allocation = mat_alloc_auto(power_set[j])
daily_return_mat = compute_daily_returns(df_01)
return_matrix, sharpe_ratio_val, matrix_average = port_eval(matrix_allocation, daily_return_mat)
sharpe.append(sharpe_ratio_val)
max_index, max_value = max(enumerate(sharpe), key=operator.itemgetter(1))
print('Maximum sharpe ratio occurs from ',power_set[max_index], ' value = ', max_value)
t1=t.time()
print('exec time is ', t1-t0, 'seconds')
以上代码导致sigkill错误9。 经过研究,我明白这是因为进程分配了太多内存给操作系统带来了压力。 所以我尝试在HP Z600工作站上运行相同的代码但是它需要花费很多时间加上机器冻结。 我的问题是如何才能使我的代码更有效地获得即时结果。