我有一些数据:
dput(y)
structure(list(date = structure(c(1508803200, 1515456000, 1506384000,
1501027200, 1503964800, 1517356800, 1519776000, 1511740800, 1511827200,
1498521600, 1509408000, 1522627200, 1493078400, 1502064000, 1504051200,
1504569600, 1504742400, 1512432000, 1515628800, 1520467200, 1493164800,
1496102400, 1496620800, 1501459200, 1506988800, 1508889600, 1512864000,
1515542400, 1515974400, 1517443200, 1520553600, 1520812800, 1494806400,
1495756800, 1496188800, 1496707200, 1498608000, 1499040000, 1499299200,
1501113600, 1501200000, 1502150400, 1504656000, 1507075200, 1508976000,
1509235200, 1509494400, 1515369600, 1516752000, 1516838400, 1517961600,
1518393600, 1519689600, 1522368000, 1522540800), class = c("POSIXct",
"POSIXt")), word = c("hr", "hr", "hr", "hr", "hr", "hr", "hr",
"hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr",
"hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr",
"hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr",
"hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr", "hr",
"hr", "hr", "hr", "hr"), n = c(22L, 16L, 14L, 8L, 8L, 6L, 6L,
5L, 5L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
)), row.names = c(NA, -55L), vars = "date", drop = TRUE, class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), .Names = c("date", "word", "n"
), indices = list(12L, 20L, 32L, 33L, 21L, 34L, 22L, 35L, 9L,
36L, 37L, 38L, 3L, 39L, 40L, 23L, 13L, 41L, 4L, 14L, 15L,
42L, 16L, 2L, 24L, 43L, 0L, 25L, 44L, 45L, 10L, 46L, 7L,
8L, 17L, 26L, 47L, 1L, 27L, 18L, 28L, 48L, 49L, 5L, 29L,
50L, 51L, 52L, 6L, 19L, 30L, 31L, 53L, 54L, 11L), group_sizes = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
date = structure(c(1493078400, 1493164800, 1494806400, 1495756800,
1496102400, 1496188800, 1496620800, 1496707200, 1498521600,
1498608000, 1499040000, 1499299200, 1501027200, 1501113600,
1501200000, 1501459200, 1502064000, 1502150400, 1503964800,
1504051200, 1504569600, 1504656000, 1504742400, 1506384000,
1506988800, 1507075200, 1508803200, 1508889600, 1508976000,
1509235200, 1509408000, 1509494400, 1511740800, 1511827200,
1512432000, 1512864000, 1515369600, 1515456000, 1515542400,
1515628800, 1515974400, 1516752000, 1516838400, 1517356800,
1517443200, 1517961600, 1518393600, 1519689600, 1519776000,
1520467200, 1520553600, 1520812800, 1522368000, 1522540800,
1522627200), class = c("POSIXct", "POSIXt"))), row.names = c(NA,
-55L), vars = "date", drop = TRUE, class = "data.frame", .Names = "date"))
和代码:
y = y %>%
group_by(date) %>%
unnest_tokens(word,comments) %>%
anti_join(stop_words) %>%
count(word,sort = T)
y %>% time_decompose(count,method='stl')%>%
anomalize(remainder,method = 'iqr') %>%
time_recompose()%>% plot_anomalies(time_recomposed = T,ncol=5,alpha_dots = 0.25)
除了我不断收到以下错误:
mutate_impl(.data,dots)出错:评估错误:prep_tbl_time()出错:找不到日期或日期时间列..
我遵循了github教程,我的列和类型与示例相符。但由于某种原因,它无法找到列日期。
答案 0 :(得分:0)
这是一个绊脚石。我几周前完成了那个教程,所以我将你的数据与tidyverse_cran_downloads
进行比较,我看不出有什么区别。它可能在任何时候都不喜欢作为POSIX
类的日期,但我真的不明白这是多么重要。我也用相同的布局编制了数据,并且工作正常。希望这在某种程度上有所帮助。
counts <- data.frame(date = as.Date(c("2013-09-13", "2014-01-23", "2014-06-24", "2017-04-25", "2017-04-26", "2017-06-28", "2017-09-26")), word = c("hr", "hr", "hr", "hr", "hr", "hr", "hr"), count = c(2, 6, 2, 3, 2, 4, 4), stringsAsFactors = FALSE)
counts <- counts %>%
tibbletime::as_tbl_time(index = date)
counts %>%
time_decompose(count, method = "stl") %>%
anomalize(remainder) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)
答案 1 :(得分:0)
我遇到了类似的问题,在我的情况下,group_by
在time_decompose
之前提供了帮助:
y %>% group_by(word) %>%
time_decompose(count,method='stl')%>%
anomalize(remainder,method = 'iqr') %>%
time_recompose()%>% plot_anomalies(time_recomposed = T,ncol=5,alpha_dots = 0.25)