我有数据集
mydat=structure(list(time = structure(c(6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L), .Label = c("01.01.2008", "01.02.2008", "01.03.2008",
"01.04.2008", "01.05.2008", "01.09.2007", "01.10.2007", "01.11.2007",
"01.12.2007"), class = "factor"), account_a = structure(c(6L,
4L, 3L, 2L, 9L, 8L, 5L, 7L, 1L), .Label = c("7725.00848360078",
"7904.51066973023", "8000.05688342733", "8020.91725643046", "8032.80824397166",
"8107.79491750336", "8111.57284600134", "8250.23617172539", "8341.89192978947"
), class = "factor"), account_b = structure(c(7L, 7L, 8L, 6L,
5L, 4L, 3L, 2L, 1L), .Label = c("4878.34404162271", "4883.90444211266",
"4889.84119615347", "4892.14279920565", "4893.31732735194", "4894.12141627531",
"4897.0059129273", "4897.01754483248"), class = "factor"), account_c = structure(c(4L,
7L, 1L, 6L, 8L, 9L, 3L, 5L, 2L), .Label = c("1026.6141549422",
"238.489052868377", "362.833115212652", "426.728323306974", "510.785643175662",
"695.680008726439", "871.207211560508", "895.998302762546", "978.620137201732"
), class = "factor")), .Names = c("time", "account_a", "account_b",
"account_c"), class = "data.frame", row.names = c(NA, -9L))
变量: account_a account_b account_c
我需要按年份汇总
我这样做
library(data.table)
DT <- data.table(date = as.Date(mydat$time), time[-1])
DT[, list(mean = mean(account_a,account_b,account_c),
by = year(date)]
我得到了错误
> DT <- data.table(date = as.Date(mydat$time), time[-1])
Error in charToDate(x) :
character string is not in a standard unambiguous format
> DT[, list(mean = mean(account_a,account_b,account_c),
+
+ by = year(date)]
Error: unexpected ']' in:
"
by = year(date)]"
如何解决? 另外当使用as.Date时,我有同样的错误 如何按年份汇总这三个变量?
答案 0 :(得分:4)
可以做到:
library(data.table)
setDT(mydat)
mydat[, year := year(as.Date(as.character(time), "%d.%m.%Y"))][
, lapply(.SD, function(x) as.numeric(as.character(x))), by = year, .SDcols = -1][
, lapply(.SD, mean), by = year
]
在第一行中,我们首先将time
转换为Date
(需要指定格式,因为它不是通常的格式),然后提取year
;在第二行中,我们将所有account
列转换为numeric
(它们是factors
);在最后一行,我们通过mean
得到所需的year
。
输出:
year account_a account_b account_c
1: 2007 8008.320 4896.288 755.0574
2: 2008 8092.304 4887.510 597.3453
这将是一种data.table
的方法,您也可以在dplyr
中执行以下操作:
library(dplyr)
mydat %>%
mutate(year = format(as.Date(as.character(time), "%d.%m.%Y"), "%Y")) %>%
mutate_at(vars(starts_with("account")), list(~ as.numeric(as.character(.)))) %>%
group_by(year) %>%
summarise_at(vars(starts_with("account")), list(~ mean))