如何通过多列将宽数据转换为长数据

时间:2019-09-17 11:15:49

标签: r dataframe transform

如何像这样将数据X转换为Y

X = data.frame(
  ID = c('A','A','A','B','B','C','C','C'),
  V1 = c(2.3,2.6,2.9,3.0,3.2,1.8,1.7,1.6),
  V2=c(5.5,4.7,5.0,4.4,4.2,8.0,9.0,8.5),
  V3=c(1,1,3,2,2,3,3,3),
  SEX = c("MALE","MALE","MALE","FEMALE","FEMALE","MALE","MALE","MALE")
)
Y = data.frame(
  ID = c('A','B','C'),
  SEX = c("MALE","FEMALE",'MALE'),
  V1_1 =c(2.3,3.0,1.8),
  V1_2 =c(2.6,3.2,1.7),
  V1_3 =c(2.9,NA,1.6),
  V2_1=c(5.5,4.4,8.0),
  V2_2=c(4.7,4.2,9.0),
  V2_3=c(5.0,NA,8.0),
  V3_1=c(1,2,3),
  V3_2=c(1,2,3),
  V3_3=c(3,NA,3)
)

R中有一种快速的方法如何将其转换为这样的数据帧?感谢您的任何建议!

3 个答案:

答案 0 :(得分:0)

使用dplyrtidyr,我们可以gatherstarts_with "V"转换为长格式的列,group_by IDSEXkey创建一个唯一列(key1),以“将数据扩展为宽格式。

library(dplyr)
library(tidyr)

X %>%
  gather(key, value, starts_with("V")) %>%
  group_by(ID, SEX, key) %>%
  mutate(key1 = paste(key, row_number(), sep = "_")) %>%
  ungroup() %>%
  select(-key) %>%
  spread(key1, value)


#  ID    SEX     V1_1  V1_2  V1_3  V2_1  V2_2  V2_3  V3_1  V3_2  V3_3
#  <fct> <fct>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 A     MALE     2.3   2.6   2.9   5.5   4.7   5       1     1     3
#2 B     FEMALE   3     3.2  NA     4.4   4.2  NA       2     2    NA
#3 C     MALE     1.8   1.7   1.6   8     9     8.5     3     3     3

答案 1 :(得分:0)

这是一种data.table方法,

library(data.table)

dcast(setDT(X)[, new := seq(.N), by = .(ID, SEX)], ID+SEX ~ new, value.var = c("V1", "V2", "V3"))

#   ID    SEX V1_1 V1_2 V1_3 V2_1 V2_2 V2_3 V3_1 V3_2 V3_3
#1:  A   MALE  2.3  2.6  2.9  5.5  4.7  5.0    1    1    3
#2:  B FEMALE  3.0  3.2   NA  4.4  4.2   NA    2    2   NA
#3:  C   MALE  1.8  1.7  1.6  8.0  9.0  8.5    3    3    3

答案 2 :(得分:0)

与新的tidyr 1.0.0(发行说明here)差不多是一线的:

library(tidyr)
library(dplyr)

X %>%
    group_by(ID) %>%
    mutate(NUMBER = row_number()) %>%
    pivot_wider(names_from = NUMBER, values_from = c(V1, V2, V3))

结果

# A tibble: 3 x 11
# Groups:   ID [3]
  ID    SEX     V1_1  V1_2  V1_3  V2_1  V2_2  V2_3  V3_1  V3_2  V3_3
  <fct> <fct>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A     MALE     2.3   2.6   2.9   5.5   4.7   5       1     1     3
2 B     FEMALE   3     3.2  NA     4.4   4.2  NA       2     2    NA
3 C     MALE     1.8   1.7   1.6   8     9     8.5     3     3     3

编辑:您还可以使用starts_with("V")中的values_from =之类的功能。