R:使用数据标头粘贴一列的唯一值并对其进行转置

时间:2016-08-03 10:38:47

标签: r

我的数据需要一些帮助。我将使用一个例子更好地解释它。以下是dput()数据:

data <- structure(list(SEQ = c(1, 2, 2, 2, 2, 2), 
                       PR = structure(c(1L, 2L, 3L, 4L, 5L, 8L), .Label = c("AHE", 
                                                                                     "AHE", "BHE", "BTH", "CHE", "CTH", "DHE", 
                                                                                     "DS", "DTH"), class = "factor"), mittel = c(1.33, 
                                                                                                                                         2, 0.17, 0.33, 0, 0), max = c(1.33, 2, 0.17, 0.33, 0, 0), 
                       s = c(NaN, NaN, NaN, NaN, NaN, NaN), n = c(1L, 1L, 1L, 1L, 
                                                                  1L, 1L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
                                                                                                                                    -6L), .Names = c("SEQ", "PR", "mittel", "max", "s", 
                                                                                                                                                     "n"))

看起来确实如此:

     SEQ     PR mittel   max     s     n
1      1    AHE   1.33  1.33   NaN     1
2      2    AHE   2.00  2.00   NaN     1
3      2    BHE   0.17  0.17   NaN     1
4      2    BTH   0.33  0.33   NaN     1
5      2    CHE   0.00  0.00   NaN     1
6      2     DS   0.00  0.00   NaN     1

我想将列PR的唯一值粘贴到列名称:mittel,max,s和n。然后转置表并使用SEQ列作为转置表的标题。它看起来像这样:

           1      2
AHE_mittel 1.33  2.0
AHE_max    1.33  2.0
AHE_s      NaN   NaN
AHE_n      1     1
...

[ANSWER]

我要感谢大家的帮助!我注意到(感谢其他用户)这是一个重复的问题。最后,我使用dcast

使用了此代码
data <- melt(data, id=c(1:2))
data$id <- paste(data$PR, data$variable, sep="_")
data <- dcast(data, ...~SEQ, median)

干杯

2 个答案:

答案 0 :(得分:1)

使用ddply和嵌套函数:

DF <- structure(list(SEQ = c(1, 2, 2, 2, 2, 2), 
                       PR = structure(c(1L, 2L, 3L, 4L, 5L, 8L), .Label = c("AHE", 
                                                                                     "AHE", "BHE", "BTH", "CHE", "CTH", "DHE", 
                                                                                     "DS", "DTH"), class = "factor"), mittel = c(1.33, 
                                                                                                                                         2, 0.17, 0.33, 0, 0), max = c(1.33, 2, 0.17, 0.33, 0, 0), 
                       s = c(NaN, NaN, NaN, NaN, NaN, NaN), n = c(1L, 1L, 1L, 1L, 
                                                                  1L, 1L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
                                                                                                                                    -6L), .Names = c("SEQ", "PR", "mittel", "max", "s", 
                                                                                                                                                     "n"))



#Convert PR to character class
DF$PR = as.character(DF$PR)                                                                                                                                                  



#For each unique PR, tranpose each of the columns (mittel,max,s,n) and rbind the rows

transposeDF = ddply(DF,.(PR),function(x) do.call(rbind,lapply(3:ncol(x),function(z) t(x[,z]) )))

#Assign new name

transposeDF$newPR = paste0(transposeDF$PR,"_",colnames(DF)[3:ncol(DF)])


#>transposeDF
#    PR    1   2      newPR
#1  AHE 1.33   2 AHE_mittel
#2  AHE 1.33   2    AHE_max
#3  AHE  NaN NaN      AHE_s
#4  AHE 1.00   1      AHE_n
#5  BHE 0.17  NA BHE_mittel
#6  BHE 0.17  NA    BHE_max
#7  BHE  NaN  NA      BHE_s
#8  BHE 1.00  NA      BHE_n
#9  BTH 0.33  NA BTH_mittel
#10 BTH 0.33  NA    BTH_max
#11 BTH  NaN  NA      BTH_s
#12 BTH 1.00  NA      BTH_n
#13 CHE 0.00  NA CHE_mittel
#14 CHE 0.00  NA    CHE_max
#15 CHE  NaN  NA      CHE_s
#16 CHE 1.00  NA      CHE_n
#17  DS 0.00  NA  DS_mittel
#18  DS 0.00  NA     DS_max
#19  DS  NaN  NA       DS_s
#20  DS 1.00  NA       DS_n

答案 1 :(得分:0)

你走了:

library(tidyr)
library(dplyr)

mod <- data%>%
  gather(val,val2, mittel:n) %>%
  select(PR, val, val2, SEQ)

mod <- data.frame(id = paste0(mod$PR,"_",mod$val), select(mod, val2, SEQ))