使用pa包中的Brinson进行R属性分析:ret.bench / weight.bench中的错误:不一致的数组

时间:2019-02-07 19:09:26

标签: r quantmod portfolio attribution tidyquant

我正在尝试使用brinson包中的pa函数对股票投资组合进行归因分析。到目前为止,我只能使用内置的jan数据集来使此功能正常工作。也许我不确定该功能到底需要什么,但是据我所知,首先需要从多个来源收集所需的数据。

library(readxl)
library(pa)
library(tidyquant)
library(Quandl)
library(quantmod)
library(rvest)
library(tidyverse)
library(stringr)
coreUS <- Quandl.datatable('SHARADAR/SF1', paginate=TRUE)

CoreUS包含在纽约证券交易所NAZDAQ和AMEX上市的绝大多数公司。

#This excel file has the stocks I need to run the function on.
ndr_Raw <- read_xlsx("NDRLS012019.xlsx", skip = 3, n_max = 51)

它看起来像这样:

head(ndr_Raw)


    symbol name                  weights
  <chr>  <chr>                   <dbl>
1 MOS    Mosaic Company          0.015
2 NGVT   Ingevity Corporation    0.015
3 KWR    Quaker Chemical Corp    0.015
4 ENS    EnerSys                 0.015
5 FLS    Flowserve Corporation   0.015
6 DCI    Donaldson Company Inc   0.015

NYSE <- tq_exchange("NYSE")
NASDAQ <- tq_exchange("NASDAQ")
AMEX <- tq_exchange("AMEX")

ndr_RawNYSENASDAQ绑定到同一数据帧中。

ndr <- ndr_Raw %>%
inner_join(rbind(NYSE, NASDAQ), by = c("symbol"))

获取所需的列

ndr2 <- select(ndr, symbol, sector, industry, weights)

获取NDR的月收益股票价格

ndr3 <- merge(ndr2, tq_get(ndr2$symbol, get = "stock.prices", from = 
"2019-01-01") %>% group_by(symbol) %>% 
tq_transmute(adjusted, periodReturn, period = "monthly"), by = "symbol")

#Download S&P stock data
url <- "https://www.slickcharts.com/sp500"

sp500 <- url %>%
  read_html() %>%
  html_nodes(xpath = '/html/body/div/div[2]/div[1]/div/div/table') %>%
  html_table()

sp500 <- sp500[[1]]

jan示例中,他们的benchmark列总计为1,而不是100,所以...

sp500$Weight <- sp500$Weight/100

#Get stock data for all Core US stocks.
coreUS_slim <- tq_get(unique(coreUS$ticker), get = "stock.prices", from = "2019-01-01")

#Transmute the date into monthly returns
coreUS_slim2 <- na.omit(coreUS_slim) %>% group_by(symbol) %>% 
  tq_transmute(adjusted, periodReturn, period = "monthly")

#subset coreUS for just the rows containing the NDR stocks
coreNDR <- coreUS_slim2[coreUS_slim2$symbol %in% ndr3$symbol,]

#Finds the missing stocks not in coreUS
missingSymbols <- setdiff(ndr3$symbol, coreNDR$symbol)


    #Download missing stock data and transmute it to monthly returns. Method differs depending on if there are one or more missing stocks.
    if(length(missingSymbols) > 1)
    {
      missingSymbols_Data <- tq_get(missingSymbols,  get = "stock.prices", from = "2019-01-01") %>% group_by(symbol) %>%
        tq_transmute(adjusted, periodReturn, period = "monthly")
    } else
    {
      missingSymbols_Data <- tq_get(missingSymbols,  get = "stock.prices", from = "2019-01-01") %>%
        tq_transmute(adjusted, periodReturn, period = "monthly")
      missingSymbols_Data$symbol <- missingSymbols
      missingSymbols_Data <- missingSymbols_Data[,c(3, 1:2)]
    }

#Binds missing data to primary data set.
coreUS_slim3 <- coreUS_slim2
coreUS_slim3 <- bind_rows(coreUS_slim2, missingSymbols_Data)

#Takes subset of coreUS for stocks that are in the S&P
spSubset <- coreUS_slim3[coreUS_slim3$symbol %in% sp500$Symbol,]
#Finds missing S&P stocks
missingSP <- setdiff(sp500$Symbol, coreUS_slim3$symbol)
#Corrects them into a format recognizable by tq_get() function
missingSP <- gsub(pattern = "\\.", replacement = "-", x = missingSP)

#Download missing stock data and transmute it to monthly returns. Method differs depending on if there are one or more missing stocks.
if(length(missingSP) > 1)
{
  missingSP_Data <- tq_get(missingSP, get = "stock.prices", from = "2019-01-01") %>% group_by(symbol) %>%
    tq_transmute(adjusted, periodReturn, period = "monthly")
} else
{
  missingSP_Data <-  tq_get(missingSP, get = "stock.prices", from = "2019-01-01") %>%
    tq_transmute(adjusted, periodReturn, period = "monthly")
  missingSP_Data$symbol <- missingSP
  missingSP_Data <- missingSP_Data[,c(3, 1:2)]
}

    #Binds missing S&P data to primary data set
    spsubset2 <- bind_rows(spSubset, missingSP_Data)
    coreUS_slim3 <- bind_rows(coreUS_slim3, missingSP_Data)

    #Creates portfolio column composed entirely of zeros
    coreUS_slim3$portfolio <- 0

    #subsets coreUS for stocks in the NDR data set and sets their portfolio value to 0.015
    coreUS_slim3[coreUS_slim3$symbol %in% ndr3$symbol,]$portfolio <- 0.015

    coreUS_slim4 <- coreUS_slim3
    ##Replaces dots with dashes for tq_get()
    spsubset2$symbol <- gsub(pattern = "\\.", replacement = "-", x = spsubset2$symbol)
    sp500$Symbol <- gsub(pattern = "\\.", replacement = "-", x = sp500$Symbol)

#subsets all values for the month of january.
coreUSjan <- subset(coreUS_slim4, format.Date(date, "%m") == "01")
#subsets january coreUS data for S&P stocks
coreSP <- coreUSjan[coreUSjan$symbol %in% spsubset2$symbol,]
colnames(sp500)[3] <- "symbol"
#Gets weightings for S&P stocks
coreSP2 <- merge(coreSP, sp500, by = "symbol")
coreSP2 <- coreSP2[, c("symbol", "date", "monthly.returns", "portfolio", "Weight")]
colnames(coreSP2)[5] <- "benchmark"

#Merges S&P weighting to main january dataset.
coreUSjan2 <- merge(coreUSjan, coreSP2, by = c("symbol", "date", "monthly.returns", "portfolio"), all = TRUE)
coreUSjan2$benchmark[is.na(coreUSjan2$benchmark)] <- 0

ndrJan <- subset(ndr3, format.Date(date, "%m") == "01")

NYSE_NASDAQ <- merge(NYSE, NASDAQ, all = TRUE)

NYSE_NASDAQ_AMEX <- merge(NYSE_NASDAQ, AMEX, all = TRUE)

coreUSjan3 <- merge(coreUSjan2, NYSE_NASDAQ_AMEX, all.x = TRUE)
coreUSjan3 <- coreUSjan3[, c(1:5, 10:11)]

我相信我的数据已经到了可以使用brinson函数的地步,但是当我这样做时会引发错误。

coreUSjan4 <- brinson(x = coreUSjan3, date.var = "date", cat.var = "sector", bench.weight = "benchmark", portfolio.weight = "portfolio", 
        ret.var = "monthly.returns")

Error in ret.bench/weight.bench : non-conformable arrays

我使用的数据帧coreUSjan3看起来像这样:

> head(coreUSjan3)
  symbol       date monthly.returns portfolio  benchmark           sector                                         industry
1      A 2019-01-31      0.15771047         0 0.00105024    Capital Goods Biotechnology: Laboratory Analytical Instruments
2     AA 2019-01-31      0.13109756         0 0.00000000 Basic Industries                                         Aluminum
3    AAC 2019-01-31      0.48466258         0 0.00000000      Health Care                             Medical Specialities
4    AAL 2019-01-31      0.10129314         0 0.00065235   Transportation                    Air Freight/Delivery Services
5   AAMC 2019-01-31     -0.10563146         0 0.00000000          Finance                                      Real Estate
6   AAME 2019-01-31      0.08433735         0 0.00000000          Finance                                   Life Insurance

投资组合包含xlsx文件中列出的权重,它们在coreUSjan3中的对应股票的权重为0.015,其余值为0。 基准列包含标准普尔500的权重,不属于标准普尔的股票的权重为0。

0 个答案:

没有答案