如何转置某些列

时间:2016-11-12 20:26:45

标签: r

我有一个包含超过300列和行的数据集,如下所示:

old= data.frame(
    id=1:3,
    DATE=c('20/12/1965','11/101970','03/12/1985'),
    TRT=c(1,2,1),
    AT1=c(40,89,50),
    AT2=c(18,43,13),
    AT3=c(23,16,10),
    BECT1=c(80,50,60),
    BECT2=c(78,40,48),
    BECT3=c(60,97,56),
    C0T1=c(43,45,78),
    C0T2=c(42,NA,48))

我想将我的数据转换为如下所示:

new=data.frame(ID=c(1,1,1,2,2,2,3,3,3),
           TRT=c(1,1,1,2,2,2,1,1,1),
           DATE=c('20/12/1965','20/12/1965','20/12/1965','11/10/1970','11/10/1970','11/10/1970','03/12/1985','03/12/1985','03/12/1985'),        
           variable1=c('ATI','AT2','AT3','ATI','AT2','AT3','ATI','AT2','AT3'), 
           value1=c(20,23,18,89,43,16,50,13,10),
           variable2=c('BECT1','BECT2','BECT3','BECT1','BECT2','BECT3','BECT1','BECT2','BECT3'), 
           value2=c(80,78,60,50,40,97,60,48,56),
           Variable3= c('C0T1','C0T2','C0T3','C0T1','C0T2','C0T3','C0T1','C0T2','C0T3'), 
           value3=c(43,42,85,45,63,NA,78,48,48))

感谢

2 个答案:

答案 0 :(得分:2)

您可以使用gather包中的tidyr,并从dplyr安排:

library(dplyr)
library(tidyr)
old_df= data.frame(
    id=1:3,
    DATE=c('20/12/1965','11/10/1970','03/12/1985'),
    TRT=c(1,2,1),
    AT1=c(40,89,50),
    AT2=c(18,43,13),
    AT3=c(23,16,10),
    BECT1=c(80,50,60),
    BECT2=c(78,40,48),
    BECT3=c(60,97,56),
    C0T1=c(43,45,78),
    C0T2=c(42,NA,48))

new_df <- old_df %>%
    gather(AT1, AT2, AT3, key = "variable1", value = "value1") %>%
    gather(BECT1, BECT2, BECT3, key = "variable2", value = "value2") %>%
    gather(C0T1, C0T2, key = "variable3", value = "value3") %>%
    arrange(id, DATE, TRT, variable1, variable2, variable3)

   id       DATE TRT variable1 value1 variable2 value2 variable3 value3
1   1 20/12/1965   1       AT1     40     BECT1     80      C0T1     43
2   1 20/12/1965   1       AT1     40     BECT1     80      C0T2     42
3   1 20/12/1965   1       AT1     40     BECT2     78      C0T1     43
4   1 20/12/1965   1       AT1     40     BECT2     78      C0T2     42
5   1 20/12/1965   1       AT1     40     BECT3     60      C0T1     43
6   1 20/12/1965   1       AT1     40     BECT3     60      C0T2     42
7   1 20/12/1965   1       AT2     18     BECT1     80      C0T1     43
8   1 20/12/1965   1       AT2     18     BECT1     80      C0T2     42
9   1 20/12/1965   1       AT2     18     BECT2     78      C0T1     43
10  1 20/12/1965   1       AT2     18     BECT2     78      C0T2     42
11  1 20/12/1965   1       AT2     18     BECT3     60      C0T1     43
12  1 20/12/1965   1       AT2     18     BECT3     60      C0T2     42
13  1 20/12/1965   1       AT3     23     BECT1     80      C0T1     43
14  1 20/12/1965   1       AT3     23     BECT1     80      C0T2     42
15  1 20/12/1965   1       AT3     23     BECT2     78      C0T1     43
16  1 20/12/1965   1       AT3     23     BECT2     78      C0T2     42
17  1 20/12/1965   1       AT3     23     BECT3     60      C0T1     43
18  1 20/12/1965   1       AT3     23     BECT3     60      C0T2     42
19  2 11/10/1970   2       AT1     89     BECT1     50      C0T1     45
20  2 11/10/1970   2       AT1     89     BECT1     50      C0T2     NA
21  2 11/10/1970   2       AT1     89     BECT2     40      C0T1     45
22  2 11/10/1970   2       AT1     89     BECT2     40      C0T2     NA
23  2 11/10/1970   2       AT1     89     BECT3     97      C0T1     45
24  2 11/10/1970   2       AT1     89     BECT3     97      C0T2     NA
25  2 11/10/1970   2       AT2     43     BECT1     50      C0T1     45
26  2 11/10/1970   2       AT2     43     BECT1     50      C0T2     NA
27  2 11/10/1970   2       AT2     43     BECT2     40      C0T1     45
28  2 11/10/1970   2       AT2     43     BECT2     40      C0T2     NA
29  2 11/10/1970   2       AT2     43     BECT3     97      C0T1     45
30  2 11/10/1970   2       AT2     43     BECT3     97      C0T2     NA
31  2 11/10/1970   2       AT3     16     BECT1     50      C0T1     45
32  2 11/10/1970   2       AT3     16     BECT1     50      C0T2     NA
33  2 11/10/1970   2       AT3     16     BECT2     40      C0T1     45
34  2 11/10/1970   2       AT3     16     BECT2     40      C0T2     NA
35  2 11/10/1970   2       AT3     16     BECT3     97      C0T1     45
36  2 11/10/1970   2       AT3     16     BECT3     97      C0T2     NA
37  3 03/12/1985   1       AT1     50     BECT1     60      C0T1     78
38  3 03/12/1985   1       AT1     50     BECT1     60      C0T2     48
39  3 03/12/1985   1       AT1     50     BECT2     48      C0T1     78
40  3 03/12/1985   1       AT1     50     BECT2     48      C0T2     48
41  3 03/12/1985   1       AT1     50     BECT3     56      C0T1     78
42  3 03/12/1985   1       AT1     50     BECT3     56      C0T2     48
43  3 03/12/1985   1       AT2     13     BECT1     60      C0T1     78
44  3 03/12/1985   1       AT2     13     BECT1     60      C0T2     48
45  3 03/12/1985   1       AT2     13     BECT2     48      C0T1     78
46  3 03/12/1985   1       AT2     13     BECT2     48      C0T2     48
47  3 03/12/1985   1       AT2     13     BECT3     56      C0T1     78
48  3 03/12/1985   1       AT2     13     BECT3     56      C0T2     48
49  3 03/12/1985   1       AT3     10     BECT1     60      C0T1     78
50  3 03/12/1985   1       AT3     10     BECT1     60      C0T2     48
51  3 03/12/1985   1       AT3     10     BECT2     48      C0T1     78
52  3 03/12/1985   1       AT3     10     BECT2     48      C0T2     48
53  3 03/12/1985   1       AT3     10     BECT3     56      C0T1     78
54  3 03/12/1985   1       AT3     10     BECT3     56      C0T2     48

您可以在Hadley Wickham's r4ds book

的第12.3.1节中详细了解收集内容

答案 1 :(得分:1)

transposeData <- function(df) {
    rowList <- list()
    # store each column as a vector in a list
    for (i in seq_along(df)) {
        rowList[[i]] <- t(df[, i])
    }
    # combine each component (vector) of the list by row
    newDf <- do.call(what = rbind, args = rowList)
    newDf <- as.data.frame(newDf)
    # gives the names of the columns as rownames in the new data
    rownames(newDf) <- names(df)
    # name the column of the new data as the observation index
    names(newDf) <- paste("obs", 1:dim(df)[1])
    newDf  
}

输出是一个数据框,其观察结果为列。

>transposeData(old)
        obs 1 obs 2 obs 3
id        1     2     3
DATE      3     2     1
TRT       1     2     1
AT1      40    89    50
AT2      18    43    13
AT3      23    16    10
BECT1    80    50    60
BECT2    78    40    48
BECT3    60    97    56
C0T1     43    45    78
C0T2     42    NA    48

DATE 列的问题是在创建数据框时强制作为因素。那么,在for transposeData()函数内部,当列被提取为向量时,您将获得因子级别索引,而不是日期。您可以使用条件if修复此问题。