Question

    structure(list(UserID = c(42L, 42L, 42L, 95L, 95L, 95L, 95L, 
95L), TotalSpend_A = c(NA, NA, NA, NA, 177.12, NA, NA, NA), 
    TotalSpend_B = c(NA, 40.78, NA, NA, NA, NA, 62.87, NA), 
    TotalSpend_C = c(NA, NA, 6.74, NA, NA, NA, NA, 96.91), TotalSpend_D = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), TotalSpend_E = c(NA, NA, NA, NA, NA, 1.1, NA, 
    NA)), .Names = c("UserID", "TotalSpend_A", "TotalSpend_B", 
"TotalSpend_C", "TotalSpend_D", "TotalSpend_E"), class = c("data.table", 
"data.frame"), row.names = c(NA, -8L))

如果我有以下内容并且我希望每个UserID都有一行而不是多行，我该如何组合它们？

Answer 1

使用dplyr：

library(dplyr)

df %>% 
  group_by(UserID) %>% 
  summarise_all(funs(sum(., na.rm = TRUE)))

# A tibble: 2 x 6
  UserID TotalSpend_A TotalSpend_B TotalSpend_C TotalSpend_D TotalSpend_E
   <int>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
1     42         0.00        40.78         6.74            0          0.0
2     95       177.12        62.87        96.91            0          1.1

基地R：

aggregate(. ~ UserID, df, sum, na.rm = TRUE, na.action = "na.pass")

  UserID TotalSpend_A TotalSpend_B TotalSpend_C TotalSpend_D TotalSpend_E
1     42         0.00        40.78         6.74            0          0.0
2     95       177.12        62.87        96.91            0          1.1

data.table：

DT <- data.table::setDT(df)
DT[, lapply(.SD, mean, na.rm = TRUE), by = UserID]

   UserID TotalSpend_A TotalSpend_B TotalSpend_C TotalSpend_D TotalSpend_E
1:     42          NaN        40.78         6.74          NaN          NaN
2:     95       177.12        62.87        96.91          NaN          1.1

基于列组合行

1 个答案: