R:将数据分成两列的组合

时间:2018-01-09 10:38:43

标签: r combinations reshape tidyr

我有一些数据,其中每个id由不同的type衡量,type_val可以有不同的值val。测量值为df <- data.frame(id=rep(letters[1:2],6), type=c(rep('t1',6), rep('t2',6)), type_val=rep(c(1,1,2,2,3,3),2), val=1:12) 。一个小的虚拟数据是这样的:

    id  type    type_val    val
1   a   t1  1   1
2   b   t1  1   2
3   a   t1  2   3
4   b   t1  2   4
5   a   t1  3   5
6   b   t1  3   6
7   a   t2  1   7
8   b   t2  1   8
9   a   t2  2   9
10  b   t2  2   10
11  a   t2  3   11
12  b   t2  3   12

然后df是:

type

我需要传播/投放数据,以便每个type_val的{​​{1}}和id的所有组合都是逐行的。我认为这必须是pkgs reshape2tidyr的工作,但我完全没有生成除错误之外的任何内容。 结果数据结构 - 有点多余 - 将是这样的(希望我做对了!)其中type对(由type_val的组合给出)为列type_t1type_t2及其关联值(df中的val)为val_t1val_t2 - 列名称是任意的:

    id  type_t1 type_t2 val_t1  val_t2
1   a   1   1   1   7
2   a   1   2   1   9
3   a   1   3   1   11
4   a   2   1   3   7
5   a   2   2   3   9
6   a   2   3   3   11
7   a   3   1   5   7
8   a   3   2   5   9
9   a   3   3   5   11
10  b   1   1   2   8
11  b   1   2   2   10
12  b   1   3   2   12
13  b   2   1   4   8
14  b   2   2   4   10
15  b   2   3   4   12
16  b   3   1   6   8
17  b   3   2   6   10
18  b   3   3   6   12

更新

请注意(@Sotos)

> spread(df, type, val)
  id type_val t1 t2
1  a        1  1  7
2  a        2  3  9
3  a        3  5 11
4  b        1  2  8
5  b        2  4 10
6  b        3  6 12

不是所需的输出 - 它无法提供df中typetype_val组合定义的宽格式。

2 个答案:

答案 0 :(得分:3)

怎么样:

SELECT * FROM table WHERE #2018/01/09# BETWEEN startDate AND endDate

答案 1 :(得分:2)

以下是一些更通用的内容,适用于任意数量的唯一type

library(dplyr)

# This function takes a list of dataframes (.data) and merges them by ID
reduce_merge <- function(.data, ID) {
    return(Reduce(function(x, y) merge(x, y, by = ID), .data))
}

# This function renames the cols columns in .data by appending _identifier
batch_rename <- function(.data, cols, identifier, sep = '_') {
    return(plyr::rename(.data, sapply(cols, function(x){
        x = paste(x, .data[1, identifier], sep = sep)
    })))
}

# This function creates a list of subsetted dataframes
# (subsetted by values of key),
# uses batch_rename() to give each dataframe more informative column names,
# merges them together, and returns the columns you'd like in a sensible order
multi_spread <- function(.data, grp, key, vals) {
    .data %>%
        plyr::dlply(key, subset) %>%
        lapply(batch_rename, vals, key) %>%
        reduce_merge(grp) %>%
        select(-starts_with(paste0(key, '.'))) %>%
        select(id, sort(setdiff(colnames(.), c(grp, key, vals))))
}

# Your example
df <- data.frame(id=rep(letters[1:2],6), 
                 type=c(rep('t1',6), rep('t2',6)),
                 type_val=rep(c(1,1,2,2,3,3),2),
                 val=1:12)

df %>% multi_spread('id', 'type', c('type_val', 'val'))

   id type_val_t1 type_val_t2 val_t1 val_t2
1   a           1           1      1      7
2   a           1           2      1      9
3   a           1           3      1     11
4   a           2           1      3      7
5   a           2           2      3      9
6   a           2           3      3     11
7   a           3           1      5      7
8   a           3           2      5      9
9   a           3           3      5     11
10  b           1           1      2      8
11  b           1           2      2     10
12  b           1           3      2     12
13  b           2           1      4      8
14  b           2           2      4     10
15  b           2           3      4     12
16  b           3           1      6      8
17  b           3           2      6     10
18  b           3           3      6     12

# An example with three unique values of 'type'
df <- data.frame(id = rep(letters[1:2], 9), 
                 type = c(rep('t1', 6), rep('t2', 6), rep('t3', 6)),
                 type_val = rep(c(1, 1, 2, 2, 3, 3), 3),
                 val = 1:18)

df %>% multi_spread('id', 'type', c('type_val', 'val'))

   id type_val_t1 type_val_t2 type_val_t3 val_t1 val_t2 val_t3
1   a           1           1           1      1      7     13
2   a           1           1           2      1      7     15
3   a           1           1           3      1      7     17
4   a           1           2           1      1      9     13
5   a           1           2           2      1      9     15
6   a           1           2           3      1      9     17
7   a           1           3           1      1     11     13
8   a           1           3           2      1     11     15
9   a           1           3           3      1     11     17
10  a           2           1           1      3      7     13
11  a           2           1           2      3      7     15
12  a           2           1           3      3      7     17
13  a           2           2           1      3      9     13
14  a           2           2           2      3      9     15
15  a           2           2           3      3      9     17
16  a           2           3           1      3     11     13
17  a           2           3           2      3     11     15
18  a           2           3           3      3     11     17
19  a           3           1           1      5      7     13
20  a           3           1           2      5      7     15
21  a           3           1           3      5      7     17
22  a           3           2           1      5      9     13
23  a           3           2           2      5      9     15
24  a           3           2           3      5      9     17
25  a           3           3           1      5     11     13
26  a           3           3           2      5     11     15
27  a           3           3           3      5     11     17
28  b           1           1           1      2      8     14
29  b           1           1           2      2      8     16
30  b           1           1           3      2      8     18
31  b           1           2           1      2     10     14
32  b           1           2           2      2     10     16
33  b           1           2           3      2     10     18
34  b           1           3           1      2     12     14
35  b           1           3           2      2     12     16
36  b           1           3           3      2     12     18
37  b           2           1           1      4      8     14
38  b           2           1           2      4      8     16
39  b           2           1           3      4      8     18
40  b           2           2           1      4     10     14
41  b           2           2           2      4     10     16
42  b           2           2           3      4     10     18
43  b           2           3           1      4     12     14
44  b           2           3           2      4     12     16
45  b           2           3           3      4     12     18
46  b           3           1           1      6      8     14
47  b           3           1           2      6      8     16
48  b           3           1           3      6      8     18
49  b           3           2           1      6     10     14
50  b           3           2           2      6     10     16
51  b           3           2           3      6     10     18
52  b           3           3           1      6     12     14
53  b           3           3           2      6     12     16
54  b           3           3           3      6     12     18