如何在一个tibble中获取列表的子集

时间:2017-05-16 02:32:36

标签: r dplyr lubridate tidyverse purrr

我有几种股票的年度财务数据。我需要将其删除以成为月度数据,并且感谢an answer to this question I'd asked earlier,我有一个解决方案,其中涉及将date列变更为日期列表

library(tidyverse)
library(lubridate)

factors.subset.raw = structure(list(
    sec_id = c(1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1572L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L, 1676L), 
    metric = c("EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "EPS_GROWTH", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY", "ND_EQUITY"), 
    date = structure(c(9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044, 9464, 9829, 10193, 10560, 10925, 11291, 11656, 12020, 12384, 12752, 13117, 13482, 13847, 14211, 14578, 14943, 15308, 15674, 16038, 16402, 16769, 17135, 9342, 9708, 10073, 10438, 10802, 11200, 11565, 13756, 14120, 14487, 14852, 15217, 15583, 15947, 16311, 16678, 17044), 
    class = "Date"), value = c(0.250468, 0.091548, -0.100863, 0.058375, 0.24784, 0.178765, 0.099276, 0.25472, -0.033291, 0.124165, 0.050947, 0.243008, 0.1205, -0.239625, -0.231221, 0.365649, 0.163779, 0.024976, 0.08388, 0.154777, 0.016473, -0.272928, -0.018711, -0.162076, -0.599241, -4.071504, -0.37761, 1.694085, 0.045113, 0.329818, 0.199564, -0.616418, 1.164773, 0.877078, -0.325099, -0.294199, 0.272016, -0.706077, -2.57027, 4.500261, 4.734375, 4.090376, 3.322846, 3.640895, 4.645253, 4.783054, 3.946184, 3.847828, 4.077601, 4.778736, 5.453883, 5.14355, 5.084551, 3.370378, 3.076065, 2.812879, 2.87688, 2.430692, 3.029766, 3.062665, 3.349906, 0.396299, 0.60174, 0.527478, 1.048755, 1.136417, 0.668333, 0.523115, 0.259175, 0.164024, 0.118469, 0.061141, 0.096251, 0.346829, 0.401832, 0.300988, 0.344943, 0.432505)), 
    row.names = c(NA, -78L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("sec_id", "metric", "date", "value"))

factors.subset.monthly = factors.subset.raw %>%
    group_by(sec_id, metric) %>%
    mutate(date = ceiling_date(date, 'month')) %>%
    mutate(date = map2(date, lead(date - 1, default = today()), seq, by = 'month'))

现在只需将%>% unnest() %>% mutate(date = date - 1)添加到上面就可以将我的年度数据转换为每月,所有日期都是月末。

当数据存在较大差距时,我的问题就出现了。当发生这种情况时,我只想填写最多18个月。

我尝试添加切断date列的管道,但到目前为止我似乎无法弄明白。这个小宝石给了我不兼容的大小错误,例如:

factors.subset.monthly %>%
    mutate(count.date = as.numeric(lapply(date, length))) %>%
    mutate(count.cutoff = ifelse(count.date <= 18, count.date, 18)) %>%
    mutate(date = date[1:count.cutoff])

1 个答案:

答案 0 :(得分:1)

您需要使用@synthesize / @synthesize keepRunning = _keepRunning;来迭代列表列,但之后您只需使用@interface NotificationCenterDelegate : NSObject<NSUserNotificationCenterDelegate> { BOOL _keepRunning; } @property (nonatomic, assign) BOOL keepRunning; @end @implementation NotificationCenterDelegate @synthesize keepRunning = _keepRunning; - (void)userNotificationCenter:(NSUserNotificationCenter *)center didDeliverNotification:(NSUserNotification *)notification { self.keepRunning = NO; } @end 将其限制为18个观察点:

map

您还可以在lapply

时添加head
library(tidyverse)
library(lubridate)

df <- factors.subset.monthly %>% mutate(date = map(date, head, 18))

any(lengths(factors.subset.monthly$date) > 18)
#> [1] TRUE
any(lengths(df$date) > 18)
#> [1] FALSE

您还可以使用目标日期的最小值或head factors.subset.monthly factors.subset.raw %>% group_by(sec_id, metric) %>% mutate(date = ceiling_date(date, 'month'), date = map2(date, lead(date - 1, default = today()), ~head(seq(.x, .y, by = 'month'), 18))) 参数的开始日期之后的18个月,但由于长度不规则,添加18个月会有些困难。