如何基于不同列中的值对列进行子集?

时间:2019-07-16 19:28:13

标签: r dataframe

已编辑:

我有一个数据框,用于存储有关何时进行特定评估(“何时”)的信息。此评估发生在不同的时间(t1-t3),具体时间因参与者而异。

数据框还包含每个参与者曾经完成的所有评估(包括“时间”列中引用的评估)。我只想要“时间”列中表示的评估信息。因此,如果数字为1,我想保留与该评估有关的所有数据,并删除该评估未收集的所有数据。请注意,实际数据集中的变量比此缩短的数据集中的变量多,因此任何解决方案都不应依赖重复的变量名。

这是我能做的最好的事情。该解决方案的问题在于,必须为每个变量名称重复该解决方案。

df2 <- mutate(.data = df, 
                        a1G_when = if_else(when == 1, a1G_t1, NA_real_))

# here is what we start with
df <- structure(list(id = 1:10, when = c(1, 3, 2, 1, 2, 1, 3, 2, 3, 
1), a1G_t1 = c(0.78, 0.21, 0.04, 0.87, 0.08, 0.25, 0.9, 0.77, 
0.51, 0.5), Stqo_t1 = c(0.68, 0.77, 0.09, 0.66, 0.94, 0.05, 0.97, 
0.92, 1, 0.04), Twcdz_t1 = c(0.95, 0.41, 0.29, 0.54, 0.06, 0.45, 
0.6, 0.24, 0.17, 0.55), Kgh_t1 = c(0.25, 0.86, 0.37, 0.34, 0.97, 
0.75, 0.73, 0.68, 0.37, 0.66), `2xWX_t1` = c(0.47, 0.52, 0.23, 
0.5, 0.88, 0.71, 0.21, 0.98, 0.76, 0.21), `2IYnS_t1` = c(0.32, 
0.75, 0.03, 0.46, 0.89, 0.71, 0.51, 0.83, 0.34, 0.32), a1G_t2 = c(0.97, 
0.01, 0.58, 0.33, 0.58, 0.37, 0.76, 0.33, 0.39, 0.56), Stqo_t2 = c(0.78, 
0.42, 0.5, 0.69, 0.09, 0.72, 0.84, 0.94, 0.46, 0.83), Twcdz_t2 = c(0.62, 
0.34, 0.72, 0.62, 0.8, 0.26, 0.3, 0.88, 0.42, 0.53), Kgh_t2 = c(0.99, 
0.66, 0.02, 0.17, 0.51, 0.03, 0.03, 0.74, 0.1, 0.26), `2xWX_t2` = c(0.68, 
0.97, 0.56, 0.27, 0.66, 0.71, 0.96, 0.24, 0.37, 0.76), `2IYnS_t2` = c(0.24, 
0.88, 0.58, 0.31, 0.8, 0.92, 0.91, 0.9, 0.55, 0.52), a1G_t3 = c(0.73, 
0.6, 0.66, 0.06, 0.33, 0.34, 0.09, 0.44, 0.73, 0.56), Stqo_t3 = c(0.28, 
0.88, 0.56, 0.75, 0.85, 0.33, 0.88, 0.4, 0.63, 0.61), Twcdz_t3 = c(0.79, 
0.95, 0.41, 0.07, 0.99, 0.06, 0.74, 0.17, 0.89, 0.4), Kgh_t3 = c(0.06, 
0.52, 0.35, 0.91, 0.43, 0.74, 0.72, 0.96, 0.39, 0.4), `2xWX_t3` = c(0.25, 
0.09, 0.64, 0.32, 0.15, 0.14, 0.18, 0.33, 0.97, 0.6), `2IYnS_t3` = c(0.92, 
0.49, 0.09, 0.95, 0.3, 0.83, 0.82, 0.56, 0.29, 0.36)), row.names = c(NA, 
-10L), class = "data.frame")

# here is an example of what I want with the first column. I would also want all other repeating columns to look like this (Stq0_when, Twcdz, etc.)

 id when a1G_when
1   1    1   0.78
2   2    3   0.88
3   3    2   0.58
4   4    1   0.87
5   5    2   0.58
6   6    1   0.25
7   7    3   0.09
8   8    2   0.33
9   9    3   0.73
10 10    1   0.50



2 个答案:

答案 0 :(得分:0)

这里有机会使用新的tidyr::pivot_longer。我们可以使用它来调整数据的形状,以使vart在自己的列中,filter只是包含我们想要的数据的行(即t等于when),然后将数据旋转回宽范围。

library(tidyverse)
df1 <- structure(list(ID = c(101, 102, 103, 104, 105), when = c(1, 2, 3, 1, 2), var1_t1 = c(5, 6, 4, 5, 6), var2_t1 = c(2, 3, 4, 2, 3), var1_t2 = c(7, 8, 9, 7, 8), var2_t2 = c(5, 4, 5, 4, 5), var1_t3 = c(3, 4, 3, 4, 3), var2_t3 = c(6, 7, 6, 7, 6)), row.names = c(NA, 5L), class = "data.frame")

df1 %>%
  pivot_longer(
    cols = starts_with("var"),
    names_to = c("var", "t"),
    names_sep = "_t",
    values_to = "val",
    col_ptypes = list(var = character(), t = numeric())
  ) %>%
  filter(when == t) %>%
  select(-t) %>%
  pivot_wider(names_from = "var", values_from = "val")
#> # A tibble: 5 x 4
#>      ID  when  var1  var2
#>   <dbl> <dbl> <dbl> <dbl>
#> 1   101     1     5     2
#> 2   102     2     8     4
#> 3   103     3     3     6
#> 4   104     1     5     2
#> 5   105     2     8     5

reprex package(v0.3.0)于2019-07-16创建

答案 1 :(得分:0)

使用data.table,您可以执行以下操作:

library(data.table)

cols <- unique(paste0(gsub("_.*", "", setdiff(names(df), c("id", "when"))), "_when"))

setDT(df)[
  , (cols) := lapply(cols, function(x) paste0(gsub("_.*", "", x), "_t", when))][
    , (cols) := lapply(cols, function(x) as.character(.SD[[get(x)]])), by = cols][
      , (cols) := lapply(.SD, as.numeric), .SDcols = cols
    ]

输出(仅前10行,仅相关的when列):

    a1G_when Stqo_when Twcdz_when Kgh_when 2xWX_when 2IYnS_when
 1:     0.78      0.68       0.95     0.25      0.47       0.32
 2:     0.60      0.88       0.95     0.52      0.09       0.49
 3:     0.58      0.50       0.72     0.02      0.56       0.58
 4:     0.87      0.66       0.54     0.34      0.50       0.46
 5:     0.58      0.09       0.80     0.51      0.66       0.80
 6:     0.25      0.05       0.45     0.75      0.71       0.71
 7:     0.09      0.88       0.74     0.72      0.18       0.82
 8:     0.33      0.94       0.88     0.74      0.24       0.90
 9:     0.73      0.63       0.89     0.39      0.97       0.29
10:     0.50      0.04       0.55     0.66      0.21       0.32