以下代码来自 “用于数据分析的Python”,第11章,小组转换和分析。 我将显示每个库的版本,如下所示。
# -*- coding: utf-8 -*-
""" Created on Sun Jun 4 13:33:47 2017
"Python for Data Analysis",chp 11,group transforms and analysis.
"""
import numpy as np # np.__version__'1.12.1'
import pandas as pd # pd.__version__ '0.20.2'
import random; random.seed(a=0,version=2)
import statsmodels.api as sm # statsmodels.__version__ '0.8.0'
import string
# generate tickers from random
N=1000
def rands(n):
choices=string.ascii_uppercase
return (''.join([random.choice(choices) for _ in range(n)]))
tickers=np.array([rands(5) for _ in range(N)])
# generate data for tickers
M=500
df=pd.DataFrame({'Momentum': np.random.randn(M)/200+0.03,
'Value':np.random.randn(M)/200+0.08,
'ShortInterest':np.random.randn(M)/200-0.02},
index=tickers[:M])
# create industry
ind_names=np.array(['Financial','Tech'])
sampler=np.random.randint(low=0,high=len(ind_names),size=N, dtype='l')
industries=pd.Series(ind_names[sampler],index=tickers,
name='industry')
#%% factor analysis
fac1,fac2,fac3=np.random.rand(3,1000)
ticker_subset=tickers.take(np.random.permutation(N)[:1000])
port=pd.Series(0.7*fac1-1.2*fac2+0.3*fac3+np.random.rand(1000),
index=ticker_subset)
factors=pd.DataFrame({'f1':fac1,'f2':fac2,'f3':fac3},
index=ticker_subset)
by_ind=port.groupby(industries)
本部分来自本书,而pd.ols已被折旧。
#%% use pd.ols, which is depreciated.
# AttributeError: module 'pandas' has no attribute 'ols'
def beta_exposure(chuck,factors=None):
return pd.ols(y=chuck, x=factors).beta
exposures_pd=by_ind.apply(beta_exposure,factors=factors)
print('\nexposures_pd\n',exposures_pd.unstack())
我想使用sm.OLS,而我在为x选择相应的行时遇到了麻烦。我应该如何处理它?</ p>
#%% use sm.OLS, which is not show in the book.
def exposure(chuck,factors):
y=np.array(chuck).reshape(len(chuck),1)
# The following code is wrong, as the rows number is not the corresponding rows as y
# I use [:len(chuck)] just to keep x have same rows number as y.
x=factors[['f1','f2','f3']][:len(chuck)]
print(x[:5])
print(x.shape)
sx=sm.OLS(y,x).fit()
print(sx.summary())
return sm.OLS(y,x).fit()
exposures_sm=exposure(port, factors)
答案 0 :(得分:0)
factors_data['port']=port
def group_ols(fts):
results=[]
for ind, ft in fts:
y=ft.loc[:,'port']
x=ft.loc[:,['f1','f2','f3']]
result=sm.OLS(y,x).fit()
results.append((ind,result.summary()))
return results
exposures_sm=group_ols(factors_data.groupby(industries))
exposures_sm
结果是这样的。
[('Financial', <class 'statsmodels.iolib.summary.Summary'>
"""
OLS Regression Results
==============================================================================
Dep. Variable: port R-squared: 0.746
Model: OLS Adj. R-squared: 0.744
Method: Least Squares F-statistic: 482.4
Date: Thu, 29 Jun 2017 Prob (F-statistic): 2.37e-146
Time: 17:13:34 Log-Likelihood: -134.55
No. Observations: 497 AIC: 275.1
Df Residuals: 494 BIC: 287.7
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
f1 1.0231 0.043 23.894 0.000 0.939 1.107
f2 -0.9639 0.042 -23.146 0.000 -1.046 -0.882
f3 0.6397 0.042 15.391 0.000 0.558 0.721
==============================================================================
Omnibus: 34.466 Durbin-Watson: 1.916
Prob(Omnibus): 0.000 Jarque-Bera (JB): 12.724
Skew: -0.063 Prob(JB): 0.00173
Kurtosis: 2.226 Cond. No. 3.24
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
"""), ('Tech', <class 'statsmodels.iolib.summary.Summary'>
"""
OLS Regression Results
==============================================================================
Dep. Variable: port R-squared: 0.738
Model: OLS Adj. R-squared: 0.736
Method: Least Squares F-statistic: 468.9
Date: Thu, 29 Jun 2017 Prob (F-statistic): 7.30e-145
Time: 17:13:34 Log-Likelihood: -172.76
No. Observations: 503 AIC: 351.5
Df Residuals: 500 BIC: 364.2
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
f1 1.0530 0.045 23.525 0.000 0.965 1.141
f2 -0.8811 0.045 -19.742 0.000 -0.969 -0.793
f3 0.5762 0.046 12.538 0.000 0.486 0.667
==============================================================================
Omnibus: 45.191 Durbin-Watson: 2.013
Prob(Omnibus): 0.000 Jarque-Bera (JB): 15.547
Skew: -0.123 Prob(JB): 0.000421
Kurtosis: 2.175 Cond. No. 3.29
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
""")]