我正在尝试使用brinson
包中的pa
函数对股票投资组合进行归因分析。到目前为止,我只能使用内置的jan
数据集来使此功能正常工作。也许我不确定该功能到底需要什么,但是据我所知,首先需要从多个来源收集所需的数据。
library(readxl)
library(pa)
library(tidyquant)
library(Quandl)
library(quantmod)
library(rvest)
library(tidyverse)
library(stringr)
coreUS <- Quandl.datatable('SHARADAR/SF1', paginate=TRUE)
CoreUS包含在纽约证券交易所NAZDAQ和AMEX上市的绝大多数公司。
#This excel file has the stocks I need to run the function on.
ndr_Raw <- read_xlsx("NDRLS012019.xlsx", skip = 3, n_max = 51)
它看起来像这样:
head(ndr_Raw)
symbol name weights
<chr> <chr> <dbl>
1 MOS Mosaic Company 0.015
2 NGVT Ingevity Corporation 0.015
3 KWR Quaker Chemical Corp 0.015
4 ENS EnerSys 0.015
5 FLS Flowserve Corporation 0.015
6 DCI Donaldson Company Inc 0.015
NYSE <- tq_exchange("NYSE")
NASDAQ <- tq_exchange("NASDAQ")
AMEX <- tq_exchange("AMEX")
将ndr_Raw
,NYSE
和NASDAQ
绑定到同一数据帧中。
ndr <- ndr_Raw %>%
inner_join(rbind(NYSE, NASDAQ), by = c("symbol"))
获取所需的列
ndr2 <- select(ndr, symbol, sector, industry, weights)
获取NDR的月收益股票价格
ndr3 <- merge(ndr2, tq_get(ndr2$symbol, get = "stock.prices", from =
"2019-01-01") %>% group_by(symbol) %>%
tq_transmute(adjusted, periodReturn, period = "monthly"), by = "symbol")
#Download S&P stock data
url <- "https://www.slickcharts.com/sp500"
sp500 <- url %>%
read_html() %>%
html_nodes(xpath = '/html/body/div/div[2]/div[1]/div/div/table') %>%
html_table()
sp500 <- sp500[[1]]
在jan
示例中,他们的benchmark
列总计为1,而不是100,所以...
sp500$Weight <- sp500$Weight/100
#Get stock data for all Core US stocks.
coreUS_slim <- tq_get(unique(coreUS$ticker), get = "stock.prices", from = "2019-01-01")
#Transmute the date into monthly returns
coreUS_slim2 <- na.omit(coreUS_slim) %>% group_by(symbol) %>%
tq_transmute(adjusted, periodReturn, period = "monthly")
#subset coreUS for just the rows containing the NDR stocks
coreNDR <- coreUS_slim2[coreUS_slim2$symbol %in% ndr3$symbol,]
#Finds the missing stocks not in coreUS
missingSymbols <- setdiff(ndr3$symbol, coreNDR$symbol)
#Download missing stock data and transmute it to monthly returns. Method differs depending on if there are one or more missing stocks.
if(length(missingSymbols) > 1)
{
missingSymbols_Data <- tq_get(missingSymbols, get = "stock.prices", from = "2019-01-01") %>% group_by(symbol) %>%
tq_transmute(adjusted, periodReturn, period = "monthly")
} else
{
missingSymbols_Data <- tq_get(missingSymbols, get = "stock.prices", from = "2019-01-01") %>%
tq_transmute(adjusted, periodReturn, period = "monthly")
missingSymbols_Data$symbol <- missingSymbols
missingSymbols_Data <- missingSymbols_Data[,c(3, 1:2)]
}
#Binds missing data to primary data set.
coreUS_slim3 <- coreUS_slim2
coreUS_slim3 <- bind_rows(coreUS_slim2, missingSymbols_Data)
#Takes subset of coreUS for stocks that are in the S&P
spSubset <- coreUS_slim3[coreUS_slim3$symbol %in% sp500$Symbol,]
#Finds missing S&P stocks
missingSP <- setdiff(sp500$Symbol, coreUS_slim3$symbol)
#Corrects them into a format recognizable by tq_get() function
missingSP <- gsub(pattern = "\\.", replacement = "-", x = missingSP)
#Download missing stock data and transmute it to monthly returns. Method differs depending on if there are one or more missing stocks.
if(length(missingSP) > 1)
{
missingSP_Data <- tq_get(missingSP, get = "stock.prices", from = "2019-01-01") %>% group_by(symbol) %>%
tq_transmute(adjusted, periodReturn, period = "monthly")
} else
{
missingSP_Data <- tq_get(missingSP, get = "stock.prices", from = "2019-01-01") %>%
tq_transmute(adjusted, periodReturn, period = "monthly")
missingSP_Data$symbol <- missingSP
missingSP_Data <- missingSP_Data[,c(3, 1:2)]
}
#Binds missing S&P data to primary data set
spsubset2 <- bind_rows(spSubset, missingSP_Data)
coreUS_slim3 <- bind_rows(coreUS_slim3, missingSP_Data)
#Creates portfolio column composed entirely of zeros
coreUS_slim3$portfolio <- 0
#subsets coreUS for stocks in the NDR data set and sets their portfolio value to 0.015
coreUS_slim3[coreUS_slim3$symbol %in% ndr3$symbol,]$portfolio <- 0.015
coreUS_slim4 <- coreUS_slim3
##Replaces dots with dashes for tq_get()
spsubset2$symbol <- gsub(pattern = "\\.", replacement = "-", x = spsubset2$symbol)
sp500$Symbol <- gsub(pattern = "\\.", replacement = "-", x = sp500$Symbol)
#subsets all values for the month of january.
coreUSjan <- subset(coreUS_slim4, format.Date(date, "%m") == "01")
#subsets january coreUS data for S&P stocks
coreSP <- coreUSjan[coreUSjan$symbol %in% spsubset2$symbol,]
colnames(sp500)[3] <- "symbol"
#Gets weightings for S&P stocks
coreSP2 <- merge(coreSP, sp500, by = "symbol")
coreSP2 <- coreSP2[, c("symbol", "date", "monthly.returns", "portfolio", "Weight")]
colnames(coreSP2)[5] <- "benchmark"
#Merges S&P weighting to main january dataset.
coreUSjan2 <- merge(coreUSjan, coreSP2, by = c("symbol", "date", "monthly.returns", "portfolio"), all = TRUE)
coreUSjan2$benchmark[is.na(coreUSjan2$benchmark)] <- 0
ndrJan <- subset(ndr3, format.Date(date, "%m") == "01")
NYSE_NASDAQ <- merge(NYSE, NASDAQ, all = TRUE)
NYSE_NASDAQ_AMEX <- merge(NYSE_NASDAQ, AMEX, all = TRUE)
coreUSjan3 <- merge(coreUSjan2, NYSE_NASDAQ_AMEX, all.x = TRUE)
coreUSjan3 <- coreUSjan3[, c(1:5, 10:11)]
我相信我的数据已经到了可以使用brinson
函数的地步,但是当我这样做时会引发错误。
coreUSjan4 <- brinson(x = coreUSjan3, date.var = "date", cat.var = "sector", bench.weight = "benchmark", portfolio.weight = "portfolio",
ret.var = "monthly.returns")
Error in ret.bench/weight.bench : non-conformable arrays
我使用的数据帧coreUSjan3
看起来像这样:
> head(coreUSjan3)
symbol date monthly.returns portfolio benchmark sector industry
1 A 2019-01-31 0.15771047 0 0.00105024 Capital Goods Biotechnology: Laboratory Analytical Instruments
2 AA 2019-01-31 0.13109756 0 0.00000000 Basic Industries Aluminum
3 AAC 2019-01-31 0.48466258 0 0.00000000 Health Care Medical Specialities
4 AAL 2019-01-31 0.10129314 0 0.00065235 Transportation Air Freight/Delivery Services
5 AAMC 2019-01-31 -0.10563146 0 0.00000000 Finance Real Estate
6 AAME 2019-01-31 0.08433735 0 0.00000000 Finance Life Insurance
投资组合包含xlsx文件中列出的权重,它们在coreUSjan3
中的对应股票的权重为0.015,其余值为0。
基准列包含标准普尔500的权重,不属于标准普尔的股票的权重为0。