我有一个很大的数据帧,当我应用下面的代码时,我在小标题5中得到了NA结果(此处的dput有100个观测值)。当我将其应用于整个数据集100,000个观察值时,我得到的所有小滴都是NA。
我似乎无法找到导致数据在某些小节中返回NA的原因。
代码:
library(tidyquant)
nested_portfolio_prices <- port_prices %>%
group_by(mod_year) %>%
nest(.key = "data") %>%
mutate(portfolio_data = map(data, ~select(.x, cusip, date_ret, ret)),
portfolio_returns = map(portfolio_data, ~tq_portfolio(
data = .x,
assets_col = cusip,
returns_col = ret,
col_rename = "returns"
)))
nested_portfolio_prices$portfolio_returns
数据:
port_prices <- structure(list(mod_year = c("July_2016_June_2017", "July_2006_June_2007",
"July_2015_June_2016", "July_2007_June_2008", "July_2011_June_2012",
"July_2015_June_2016", "July_2010_June_2011", "July_2007_June_2008",
"July_2016_June_2017", "July_2007_June_2008", "July_2016_June_2017",
"July_2012_June_2013", "July_2017_June_2018", "July_2006_June_2007",
"July_2010_June_2011", "July_2012_June_2013", "July_2011_June_2012",
"July_2014_June_2015", "July_2013_June_2014", "July_2012_June_2013",
"July_2010_June_2011", "July_2015_June_2016", "July_2009_June_2010",
"July_2011_June_2012", "July_2012_June_2013", "July_2011_June_2012",
"July_2011_June_2012", "July_2015_June_2016", "July_2012_June_2013",
"July_2009_June_2010", "July_2011_June_2012", "July_2014_June_2015",
"July_2008_June_2009", "July_2007_June_2008", "July_2017_June_2018",
"July_2014_June_2015", "July_2014_June_2015", "July_2011_June_2012",
"July_2014_June_2015", "July_2016_June_2017", "July_2015_June_2016",
"July_2016_June_2017", "July_2013_June_2014", "July_2008_June_2009",
"July_2009_June_2010", "July_2010_June_2011", "July_2014_June_2015",
"July_2008_June_2009", "July_2008_June_2009", "July_2010_June_2011",
"July_2016_June_2017", "July_2007_June_2008", "July_2010_June_2011",
"July_2013_June_2014", "July_2011_June_2012", "July_2014_June_2015",
"July_2006_June_2007", "July_2007_June_2008", "July_2007_June_2008",
"July_2011_June_2012", "July_2016_June_2017", "July_2006_June_2007",
"July_2008_June_2009", "July_2010_June_2011", "July_2010_June_2011",
"July_2006_June_2007", "July_2014_June_2015", "July_2008_June_2009",
"July_2007_June_2008", "July_2009_June_2010", "July_2011_June_2012",
"July_2007_June_2008", "July_2016_June_2017", "July_2007_June_2008",
"July_2015_June_2016", "July_2011_June_2012", "July_2006_June_2007",
"July_2010_June_2011", "July_2012_June_2013", "July_2016_June_2017",
"July_2014_June_2015", "July_2007_June_2008", "July_2007_June_2008",
"July_2009_June_2010", "July_2008_June_2009", "July_2012_June_2013",
"July_2016_June_2017", "July_2006_June_2007", "July_2010_June_2011",
"July_2007_June_2008", "July_2012_June_2013", "July_2009_June_2010",
"July_2014_June_2015", "July_2011_June_2012", "July_2011_June_2012",
"July_2007_June_2008", "July_2009_June_2010", "July_2007_June_2008",
"July_2013_June_2014", "July_2015_June_2016"), cusip = c("62671710",
"07373010", "12558180", "88731730", "74912110", "37555810", "20030N10",
"G2414010", "23102110", "92826C83", "10922N10", "24736170", "G4883310",
"50025510", "12673P10", "85590A40", "69076840", "88250810", "91301710",
"02209S10", "69351T10", "87150310", "58933Y10", "02376R10", "G6543110",
"26483E10", "26483E10", "74331510", "49456B10", "49456B10", "69076840",
"89364110", "74144T10", "G4779110", "92553P20", "12621E10", "12558180",
"G6543110", "12558180", "00737L10", "87150310", "01535110", "G6543110",
"03251110", "03760410", "14313010", "12621E10", "17737610", "00790310",
"98849810", "57978020", "G9782210", "20030N10", "08467070", "68268010",
"12621E10", "88033G40", "17275R10", "88731730", "02376R10", "38463710",
"85590A40", "G4883310", "G6543110", "50188920", "50025510", "06050510",
"74144T10", "G2414010", "69076840", "02376R10", "G9782210", "90781810",
"95810210", "58283910", "26483E10", "17123210", "69351T10", "68268010",
"22207020", "25667710", "G3223R10", "17275R10", "G7665A10", "G4779110",
"71270410", "71708110", "58933Y10", "80589M10", "17290810", "68268010",
"92220P10", "40414L10", "69076840", "G6543110", "88731730", "28102010",
"G2414010", "56357140", "87612E10"), date_ret = structure(c(17381,
13833, 17242, 14372, NA, 17108, 15365, 14071, 17609, 14042, 17452,
16111, 17876, 13892, 15177, 15952, 15621, 16799, 16505, 15889,
15415, 17268, 14908, 15547, 15987, 15637, 15796, 17106, 15860,
15093, 15812, 16618, 14575, 14327, 17767, 16820, 16626, 15811,
16720, 17640, 17176, 17374, 16440, 14487, 14886, 15287, 16826,
14649, 14587, 15476, 17570, 14250, 15328, 16342, 15663, 16660,
13724, 14078, 14166, 15497, 17569, 13805, 14431, 15286, 15385,
13766, 16727, 14592, 14106, 15022, 15838, 14264, 17499, 14104,
17235, 15778, 13952, 15176, 15947, 17563, 16672, 14181, 14039,
14945, 14732, 15889, 17406, 13740, 15352, 14167, 15957, 14903,
16772, 15785, 15796, 14165, 15068, 14224, 16349, 17224), class = "Date"),
ret = c(0.00155759381595999, -0.000875105615705252, -0.000237129832385108,
0.0185934118926525, NA, -0.0103434007614851, 0.00727406656369567,
-0.0025189493317157, -0.0182075817137957, 0.0234113670885563,
-0.000496421940624714, -0.0139020709320903, -0.0206297431141138,
0.0122618675231934, 0.00596422469243407, 0.0219557154923677,
-0.0108856670558453, -0.0150824375450611, 0.00800677016377449,
0.00084609683835879, 0.00461317086592317, -0.011513170786202,
-0.00935324653983116, NA, 0.00160722085274756, -0.0130445640534163,
0.00131501024588943, -0.00761662237346172, -0.0112096015363932,
-0.0086715230718255, -0.034984327852726, 0.00118677981663495,
-0.0350377112627029, -0.0209561642259359, 0, -0.00801474507898092,
0.0129484310746193, 0.0447073318064213, -0.000238714463193901,
0, 0.00560221634805202, 0.0217440351843834, -0.070071280002594,
-0.0327479131519795, -0.0331215336918831, -0.0371998772025108,
0.0267579536885023, 0.0256470665335655, 0.00461360579356551,
0.0054472079500556, -0.0126631893217564, -0.0348387062549591,
0.0231795627623796, -0.0025237298104912, 0.00525627844035625,
0.0116085726767778, -0.0360360853374004, 0.00650554755702615,
0.0086538614705205, NA, 0.0095509709790349, -0.0313121154904365,
-0.0120284967124462, 0.0137202274054289, -0.000314373755827546,
-0.0146458493545651, 0.0012406047899276, -0.0023971782065928,
0.0274906530976296, 0.00706938654184341, NA, -0.0162822101265192,
0.0404461212456226, -0.0207553151994944, 0.000456162786576897,
0.00345336413010955, -0.0132903717458248, 0.0130105027928948,
-0.012478431686759, -0.0382457934319973, -0.0125886546447873,
-0.0148753765970469, -0.00414471002295613, 0.0312298554927111,
0.0289345514029264, 0, 0.00239598192274571, -0.00300423637963831,
-0.00271675200201571, -0.0669045522809029, 0.0124804880470037,
0.000978274154476821, -0.0123838623985648, -0.028199590742588,
-0.00760159641504288, 0.131664857268333, -0.00431848457083106,
-0.0571128614246845, 0.00217387988232076, 0.00601415615528822
)), row.names = c(NA, -100L), class = "data.frame")
编辑:
运行以下内容:
nested_portfolio_prices <- port_prices %>%
group_by(mod_year) %>%
nest(.key = "data") %>%
mutate(portfolio_data = map(data, ~select(.x, cusip, date_ret, ret))
)
nested_portfolio_prices$portfolio_data
送礼;
# A tibble: 13 x 3
cusip date_ret ret
<chr> <date> <dbl>
1 74912110 NA NA
2 69076840 2012-10-08 -0.0109
3 02376R10 2012-07-26 NA
4 26483E10 2012-10-24 -0.0130
5 26483E10 2013-04-01 0.00132
6 69076840 2013-04-17 -0.0350
7 G6543110 2013-04-16 0.0447
8 68268010 2012-11-19 0.00526
9 02376R10 2012-06-06 NA
10 02376R10 2013-05-13 NA
11 26483E10 2013-03-14 0.00345
12 69076840 2013-03-21 -0.0282
13 G6543110 2013-04-01 -0.00760
不过,tq_portfolio
包中的tidyquant
应该已经将这些NA转换为0-按照warnings()
。这似乎是唯一一个包含NA的小贴士。
运行此代码(添加回tq_portfolio
)
nested_portfolio_prices <- port_prices %>%
group_by(mod_year) %>%
nest(.key = "data") %>%
mutate(portfolio_data = map(data, ~select(.x, cusip, date_ret, ret)),
portfolio_returns = map(portfolio_data, ~tq_portfolio(
data = .x,
assets_col = cusip,
returns_col = ret,
col_rename = "returns"
)))
然后打开warnings()
。给出(前5条警告);
Warning messages:
1: In PerformanceAnalytics::Return.portfolio(., weights = weights, ... :
NA's detected: filling NA's with zeros
2: In PerformanceAnalytics::Return.portfolio(., weights = weights, ... :
NA's detected: filling NA's with zeros
3: In PerformanceAnalytics::Return.portfolio(., weights = weights, ... :
NA's detected: filling NA's with zeros
4: In PerformanceAnalytics::Return.portfolio(., weights = weights, ... :
NA's detected: filling NA's with zeros
5: In xts(x = structure(list(`02376R10` = c(NA_real_, NA_real_, ... :
'order.by' cannot contain 'NA', 'NaN', or 'Inf'
警告5给出不同的警告。
编辑2:
正在运行;
for(i in 1:12){
print(
sum(is.na(nested_portfolio_prices$portfolio_data[[i]]))
)
}
[1] 0
[1] 0
[1] 0
[1] 0
[1] 5
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
[1] 0
显示错误在哪里。我想我只需要事先进行一些数据清理,但是我仍然不明白为什么小标题5中的returns
不会转换为0并且结构不同
5: In xts(x = structure(list(`02376R10` = c(NA_real_, NA_real_, ... :
'order.by' cannot contain 'NA', 'NaN', or 'Inf'