structure(list(UserID = c(42L, 42L, 42L, 95L, 95L, 95L, 95L,
95L), TotalSpend_A = c(NA, NA, NA, NA, 177.12, NA, NA, NA),
TotalSpend_B = c(NA, 40.78, NA, NA, NA, NA, 62.87, NA),
TotalSpend_C = c(NA, NA, 6.74, NA, NA, NA, NA, 96.91), TotalSpend_D = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), TotalSpend_E = c(NA, NA, NA, NA, NA, 1.1, NA,
NA)), .Names = c("UserID", "TotalSpend_A", "TotalSpend_B",
"TotalSpend_C", "TotalSpend_D", "TotalSpend_E"), class = c("data.table",
"data.frame"), row.names = c(NA, -8L))
如果我有以下内容并且我希望每个UserID都有一行而不是多行,我该如何组合它们?
答案 0 :(得分:2)
使用dplyr
:
library(dplyr)
df %>%
group_by(UserID) %>%
summarise_all(funs(sum(., na.rm = TRUE)))
# A tibble: 2 x 6
UserID TotalSpend_A TotalSpend_B TotalSpend_C TotalSpend_D TotalSpend_E
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 42 0.00 40.78 6.74 0 0.0
2 95 177.12 62.87 96.91 0 1.1
基地R:
aggregate(. ~ UserID, df, sum, na.rm = TRUE, na.action = "na.pass")
UserID TotalSpend_A TotalSpend_B TotalSpend_C TotalSpend_D TotalSpend_E
1 42 0.00 40.78 6.74 0 0.0
2 95 177.12 62.87 96.91 0 1.1
data.table
:
DT <- data.table::setDT(df)
DT[, lapply(.SD, mean, na.rm = TRUE), by = UserID]
UserID TotalSpend_A TotalSpend_B TotalSpend_C TotalSpend_D TotalSpend_E
1: 42 NaN 40.78 6.74 NaN NaN
2: 95 177.12 62.87 96.91 NaN 1.1