我希望从多个模型中进行回归(提取beta)。
在这里,我有3家公司(BABA,GOOG,AMZN)和1家基准(SPY)。我正在尝试group_by(symbol)
并对基准上的每个公司进行回归分析。
因此BABA将在SPY上运行
GOOG将是第二次在SPY上运行
最后AMZN将是SPY的第三次运行
所以每个公司都使用固定的SPY
我一直在尝试以下运气;
df %>%
group_by(symbol) %>%
do(mod = lm(SPY ~ ., data = .))
数据:
df <- structure(list(symbol = c("BABA", "BABA", "BABA", "BABA", "BABA",
"BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA",
"BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA",
"BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA",
"BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "BABA", "GOOG",
"GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG",
"GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG",
"GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG",
"GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG", "GOOG",
"GOOG", "GOOG", "GOOG", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN",
"AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN",
"AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN",
"AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN",
"AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "AMZN", "SPY",
"SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY",
"SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY",
"SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY",
"SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY", "SPY"), date = structure(c(16708,
16738, 16769, 16800, 16829, 16860, 16891, 16920, 16952, 16982,
17011, 17044, 17074, 17105, 17135, 17165, 17197, 17225, 17256,
17284, 17317, 17347, 17378, 17409, 17438, 17470, 17500, 17529,
17562, 17590, 17619, 17651, 17682, 17711, 17743, 17774, 16708,
16738, 16769, 16800, 16829, 16860, 16891, 16920, 16952, 16982,
17011, 17044, 17074, 17105, 17135, 17165, 17197, 17225, 17256,
17284, 17317, 17347, 17378, 17409, 17438, 17470, 17500, 17529,
17562, 17590, 17619, 17651, 17682, 17711, 17743, 17774, 16708,
16738, 16769, 16800, 16829, 16860, 16891, 16920, 16952, 16982,
17011, 17044, 17074, 17105, 17135, 17165, 17197, 17225, 17256,
17284, 17317, 17347, 17378, 17409, 17438, 17470, 17500, 17529,
17562, 17590, 17619, 17651, 17682, 17711, 17743, 17774, 16708,
16738, 16769, 16800, 16829, 16860, 16891, 16920, 16952, 16982,
17011, 17044, 17074, 17105, 17135, 17165, 17197, 17225, 17256,
17284, 17317, 17347, 17378, 17409, 17438, 17470, 17500, 17529,
17562, 17590, 17619, 17651, 17682, 17711, 17743, 17774), class = "Date"),
close = c(58.970001, 83.830002, 84.080002, 81.269997, 67.029999,
68.809998, 79.029999, 76.940002, 82, 79.529999, 82.480003,
97.190002, 105.790001, 101.690002, 94.019997, 87.809998,
101.309998, 102.900002, 107.830002, 115.5, 122.459999, 140.899994,
154.949997, 171.740005, 172.710007, 184.889999, 177.080002,
172.429993, 204.289993, 186.139999, 183.539993, 178.539993,
198.009995, 185.529999, 187.229996, 175.009995, 608.419983,
710.809998, 742.599976, 758.880005, 742.950012, 697.77002,
744.950012, 693.01001, 735.719971, 692.099976, 768.789978,
767.049988, 777.289978, 784.539978, 758.039978, 771.820007,
796.789978, 823.210022, 829.559998, 905.960022, 964.859985,
908.72998, 930.5, 939.330017, 959.109985, 1016.640015, 1021.409973,
1046.400024, 1169.939941, 1104.72998, 1031.790039, 1017.330017,
1084.98999, 1115.650024, 1217.26001, 1218.189941, 511.890015,
625.900024, 664.799988, 675.890015, 587, 552.52002, 593.640015,
659.590027, 722.789978, 715.619995, 758.809998, 769.159973,
837.309998, 789.820007, 750.570007, 749.869995, 823.47998,
845.039978, 886.539978, 924.98999, 994.619995, 968, 987.780029,
980.599976, 961.349976, 1105.280029, 1176.75, 1169.469971,
1450.890015, 1512.449951, 1447.339966, 1566.130005, 1629.619995,
1699.800049, 1777.439941, 2012.709961, 191.589996, 207.929993,
208.690002, 203.869995, 193.720001, 193.559998, 205.520004,
206.330002, 209.839996, 209.479996, 217.119995, 217.380005,
216.300003, 212.550003, 220.380005, 223.529999, 227.529999,
236.470001, 235.740005, 238.080002, 241.440002, 241.800003,
246.770004, 247.490005, 251.229996, 257.149994, 265.01001,
266.859985, 281.899994, 271.649994, 263.149994, 264.51001,
270.940002, 271.279999, 281.329987, 290.309998), returns = c(NA,
0.421570299786836, 0.00298222586228736, -0.0334206105275782,
-0.17521838963523, 0.0265552592354952, 0.14852494255268,
-0.0264456159236444, 0.0657655038792433, -0.0301219634146341,
0.0370929716722364, 0.178346247150355, 0.0884864576913991,
-0.0387560162703845, -0.0754253599090302, -0.0660497681147555,
0.153741035274821, 0.015694443109159, 0.047910591877345,
0.0711304633009282, 0.0602597316017315, 0.150579741552995,
0.0997161362547681, 0.108357588416087, 0.00564808414906004,
0.0705227925791236, -0.0422413166869019, -0.0262593683503574,
0.184770639061616, -0.0888442636541674, -0.0139680133983453,
-0.0272420191276787, 0.109051208487501, -0.0630271012329453,
0.00916292248780737, -0.0652673250070464, NA, 0.168288382796263,
0.0447235943352615, 0.0219230130974311, -0.020991451738144,
-0.0608116175654627, 0.0676153899532685, -0.0697228017495488,
0.06162964514755, -0.0592888554332855, 0.110807693482712,
-0.00226328392641983, 0.0133498339876124, 0.00932727837126435,
-0.0337777560648413, 0.0181784990237019, 0.0323520649549578,
0.0331581027993302, 0.00771367674141366, 0.0920970444382494,
0.0650138654793755, -0.0581742489818355, 0.0239565332707523,
0.00948954003224078, 0.0210575278571132, 0.0599827245047395,
0.00469188496382378, 0.0244662296830735, 0.118061844578092,
-0.0557378705647592, -0.0660251304124109, -0.0140145004830774,
0.0665073986507567, 0.0282583565586627, 0.0910769361485713,
0.000763954284508372, NA, 0.222723643085712, 0.0621504433749629,
0.0166817496994298, -0.131515502562943, -0.0587393185689948,
0.0744226335907248, 0.111094283292207, 0.0958170202897868,
-0.00991987052703713, 0.0603532647239684, 0.0136397451631891,
0.0886031871031852, -0.0567173342172369, -0.0496948667444936,
-0.000932640517835148, 0.0981636623558995, 0.0261815691014129,
0.0491101025755256, 0.0433708721029611, 0.0752764956948344,
-0.0267639853751381, 0.0204339142561984, -0.00726887848428048,
-0.0196308387427495, 0.149716603311175, 0.0646623200680305,
-0.00618655534310597, 0.240638965495934, 0.0424290851570854,
-0.0430493484805567, 0.0820747314318273, 0.0405394123076008,
0.043065287745196, 0.0456758970242859, 0.13236453990543,
NA, 0.0852862745505771, 0.00365511963442433, -0.0230964921836553,
-0.0497866005245156, -0.000825949820225325, 0.0617896575923709,
0.00394121245735279, 0.0170115541413121, -0.00171559286533729,
0.0364712580956894, 0.00119754055816013, -0.0049682674356365,
-0.0173370316596806, 0.0368383998564328, 0.0142934655074538,
0.0178946898308714, 0.0392915309598361, -0.00308705542738164,
0.00992617693377928, 0.0141129031072504, 0.00149105780739678,
0.0205541808864245, 0.00291770064565866, 0.0151116850153201,
0.0235640572155245, 0.0305658805498554, 0.00698077404698783,
0.0563591765172287, -0.0363604122673377, -0.0312902638974474,
0.00516821596431427, 0.0243090686813703, 0.00125487929980883,
0.037046549826919, 0.0319198500513918)), .Names = c("symbol",
"date", "close", "returns"), row.names = c(NA, -144L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = "symbol", indices = list(
72:107, 0:35, 36:71, 108:143), group_sizes = c(36L, 36L,
36L, 36L), biggest_group_size = 36L, labels = structure(list(
symbol = c("AMZN", "BABA", "GOOG", "SPY")), row.names = c(NA,
-4L), class = "data.frame", vars = "symbol", .Names = "symbol"))
编辑:
我得到了不同的结果:
这是基本模型:
symbols <- c("GOOG")
start_date_beta = "2015-09-01"
end_date_beta = "2018-09-01"
library(tidyquant)
stock_returns <- symbols %>%
tq_get(get = "stock.prices",
from = start_date_beta,
to = end_date_beta) %>%
tq_transmute(select = open:volume,
mutate_fun = to.period,
period = "months") %>%
select(date, close)
bench <- "SPY"
bench_returns <- bench %>%
tq_get(get = "stock.prices",
from = start_date_beta,
to = end_date_beta) %>%
tq_transmute(select = open:volume,
mutate_fun = to.period,
period = "months") %>%
select(date, close)
returns <- full_join(stock_returns, bench_returns, by = "date")
colnames(returns) <- c("date", "GOOG", "SPY")
returns$GOOGret <- Delt(returns$GOOG)
returns$SPYret <- Delt(returns$SPY)
lm(returns$GOOGret ~ returns$SPYret)$coeff[[2]]
> lm(returns$GOOGret ~ returns$SPYret)$coeff[[2]]
[1] 1.412548
这是更高级的模型
symbols_beta <- c("BABA", "GOOG", "AMZN")
start_date_beta = "2015-09-01"
end_date_beta = "2018-09-01"
library(tidyquant)
stock_prices <- symbols_beta %>%
tq_get(get = "stock.prices",
from = start_date_beta,
to = end_date_beta)
stock_periods <- stock_prices %>%
group_by(symbol) %>%
tq_transmute(select = open:volume,
mutate_fun = to.period,
period = "months") %>%
select(symbol, date, close)
bench <- "SPY"
bench_prices <- bench %>%
tq_get(get = "stock.prices",
from = start_date_beta,
to = end_date_beta)
bench_prices$symbol <- "SPY"
bench_periods <- bench_prices %>%
group_by(symbol) %>%
tq_transmute(select = open:volume,
mutate_fun = to.period,
period = "months") %>%
select(symbol, date, close)
returns <- rbind(stock_periods, bench_periods)
returns <- returns %>%
group_by(symbol) %>%
mutate(returns = Delt(close))
library(tidyverse)
returns = ungroup(returns)
imap_dbl(unique(returns$symbol)[unique(returns$symbol) != "SPY"] %>% set_names(),
~ left_join(returns %>% filter(symbol=="SPY") %>%
select(date, spy_returns=returns),
returns %>% filter(symbol==.x) %>%
select(date, !!.y:=returns),
by="date") %>%
lm(paste("spy_returns ~ ", .y), data=.) %>%
coef() %>% .[2]
)
BABA GOOG AMZN
0.1533903 0.3290332 0.2306936
答案 0 :(得分:0)
您需要将SPY的返回值和第二个符号放在单独的列中。您可以使用imap
包中的purrr
遍历非SPY股票代码。例如,在下面的代码中,对每个非SPY符号进行迭代,我们使用left_join
(在date
上连接)创建SPY收益和其他股票收益的数据框。然后我们进行回归,并将每个回归的第二个系数作为命名向量返回:
library(tidyverse)
df = ungroup(df)
imap_dbl(unique(df$symbol)[unique(df$symbol) != "SPY"] %>% set_names(),
~ left_join(df %>% filter(symbol=="SPY") %>%
select(date, spy_returns=returns),
df %>% filter(symbol==.x) %>%
select(date, !!.y:=returns),
by="date") %>%
lm(paste("spy_returns ~ ", .y), data=.) %>%
coef() %>% .[2]
)
BABA GOOG AMZN 0.1533903 0.3290332 0.2306936