重新排列R中的数据,拆分列名称

时间:2015-04-03 17:55:02

标签: r transformation

我得到了一张表,我需要转换成另外两种形式。任何人都可以帮我找到一个系统的R-way来转换它而不需要一堆嵌套for循环?

这是表格的简化版本:

Status <- rep(c(paste0("B",seq(1,4)),"Total"),3)
FID <- c(rep("N123",10),rep("K541",5))
IID <- c(rep(123,5),rep(456,5),rep(789,5))

Value1.G1 <- c(rep(c(888,345,765,875,875,323),2),8039,830,849)
Value2.G1 <- c(rep(c(443,325,761),4),649,975,323)
Value1.G2 <- rep(c(446,345,765,875,323),3)
Value2.G2 <- c(rep(c(540,345,765),4),169,875,431)

dat <-data.frame(FID,IID,Status,Value1.G1,Value2.G1,Value1.G2,Value2.G2)
print(dat)
    FID IID Status Value1.G1 Value2.G1 Value1.G2 Value2.G2
1  N123 123     B1       888       443       446       540
2  N123 123     B2       345       325       345       345
3  N123 123     B3       765       761       765       765
4  N123 123     B4       875       443       875       540
5  N123 123  Total       875       325       323       345
6  N123 456     B1       323       761       446       765
7  N123 456     B2       888       443       345       540
8  N123 456     B3       345       325       765       345
9  N123 456     B4       765       761       875       765
10 N123 456  Total       875       443       323       540
11 K541 789     B1       875       325       446       345
12 K541 789     B2       323       761       345       765
13 K541 789     B3      8039       649       765       169
14 K541 789     B4       830       975       875       875
15 K541 789  Total       849       323       323       431

简而言之,排除前三列,每个单元格中的数字是特定样本的特定值(在此示例中为Value1Value2123,{{ 1}}和456),在特定的凝胶(7891)上,处于特定状态(2B1,{{1 },B2B3)。前三列(B4TotalFID)列出了有关示例(IIDStatus)和状态(FID的信息)。

第一表格

我需要创建的第一个表单将标题分解为组件部分。 (澄清点:&#34; Gel&#34;列是指&#34; G&#34;在列名称之后的前一个数值。)

IID

第二表格

我需要创建的第二种表单要求每个唯一的个人ID(Status)都包含一行,其中包含所有值的信息。在这种情况下,列名称表示关于值及其特定条件的所有信息(即哪个值,哪个凝胶和哪个状态)。使用此示例数据,这意味着有3行和22列。

    FID IID Status Value1 Value2 Gel
1  N123 123     B1    888    443   1
2  N123 456     B1    323    761   1
3  K541 789     B1    875    325   1
4  N123 123     B1    446    540   2
5  N123 456     B1    446    765   2
6  K541 789     B1    446    345   2
7  N123 123     B2    345    325   1
8  N123 456     B2    888    443   1
9  K541 789     B2    323    761   1
10 N123 123     B2    345    345   2
11 N123 456     B2    345    540   2
12 K541 789     B2    345    765   2
13 N123 123     B3    765    761   1
14 N123 456     B3    345    325   1
15 K541 789     B3   8039    649   1
16 N123 123     B3    765    765   2
17 N123 456     B3    765    345   2
18 K541 789     B3    765    169   2
19 N123 123     B4    875    443   1
20 N123 456     B4    765    761   1
21 K541 789     B4    830    975   1
22 N123 123     B4    875    540   2
23 N123 456     B4    875    765   2
24 K541 789     B4    875    875   2
25 N123 123  Total    875    325   1
26 N123 456  Total    875    443   1
27 K541 789  Total    849    323   1
28 N123 123  Total    323    345   2
29 N123 456  Total    323    540   2
30 K541 789  Total    323    431   2

2 个答案:

答案 0 :(得分:3)

使用?reshape

Status <- rep(c(paste0("B",seq(1,4)),"Total"),3)
FID <- c(rep("N123",10),rep("K541",5))
IID <- c(rep(123,5),rep(456,5),rep(789,5))

Value1.G1 <- c(rep(c(888,345,765,875,875,323),2),8039,830,849)
Value2.G1 <- c(rep(c(443,325,761),4),649,975,323)
Value1.G2 <- rep(c(446,345,765,875,323),3)
Value2.G2 <- c(rep(c(540,345,765),4),169,875,431)

dat <-data.frame(FID,IID,Status,Value1.G1,Value2.G1,Value1.G2,Value2.G2)
dat

用于长格式

l <- reshape(dat, direction = 'long', varying = list(c(4,6), c(5,7)),
             v.names = c('Value1', 'Value2'), timevar = 'Gel')
l[order(l$Status), ]

#       FID IID Status Gel Value1 Value2 id
# 1.1  N123 123     B1   1    888    443  1
# 6.1  N123 456     B1   1    323    761  6
# 11.1 K541 789     B1   1    875    325 11
# 1.2  N123 123     B1   2    446    540  1
# 6.2  N123 456     B1   2    446    765  6
# 11.2 K541 789     B1   2    446    345 11
# 2.1  N123 123     B2   1    345    325  2
# 7.1  N123 456     B2   1    888    443  7
# 12.1 K541 789     B2   1    323    761 12
# 2.2  N123 123     B2   2    345    345  2
# 7.2  N123 456     B2   2    345    540  7
# 12.2 K541 789     B2   2    345    765 12
# 3.1  N123 123     B3   1    765    761  3
# 8.1  N123 456     B3   1    345    325  8
# 13.1 K541 789     B3   1   8039    649 13
# 3.2  N123 123     B3   2    765    765  3
# 8.2  N123 456     B3   2    765    345  8
# 13.2 K541 789     B3   2    765    169 13
# 4.1  N123 123     B4   1    875    443  4
# 9.1  N123 456     B4   1    765    761  9
# 14.1 K541 789     B4   1    830    975 14
# 4.2  N123 123     B4   2    875    540  4
# 9.2  N123 456     B4   2    875    765  9
# 14.2 K541 789     B4   2    875    875 14
# 5.1  N123 123  Total   1    875    325  5
# 10.1 N123 456  Total   1    875    443 10
# 15.1 K541 789  Total   1    849    323 15
# 5.2  N123 123  Total   2    323    345  5
# 10.2 N123 456  Total   2    323    540 10
# 15.2 K541 789  Total   2    323    431 15

和广泛

reshape(dat, direction = 'wide', timevar = 'Status', idvar = names(dat)[1:2])

#     FID IID Value1.G1.B1 Value2.G1.B1 Value1.G2.B1 Value2.G2.B1 Value1.G1.B2
# 1  N123 123          888          443          446          540          345
# 6  N123 456          323          761          446          765          888
# 11 K541 789          875          325          446          345          323
#    Value2.G1.B2 Value1.G2.B2 Value2.G2.B2 Value1.G1.B3 Value2.G1.B3 Value1.G2.B3
# 1           325          345          345          765          761          765
# 6           443          345          540          345          325          765
# 11          761          345          765         8039          649          765
#    Value2.G2.B3 Value1.G1.B4 Value2.G1.B4 Value1.G2.B4 Value2.G2.B4 Value1.G1.Total
# 1           765          875          443          875          540             875
# 6           345          765          761          875          765             875
# 11          169          830          975          875          875             849
#    Value2.G1.Total Value1.G2.Total Value2.G2.Total
# 1              325             323             345
# 6              443             323             540
# 11             323             323             431

答案 1 :(得分:2)

您可以在melt的开发版本中尝试dcastdata.table,即v1.9.5,它可以使用多个值列。它可以从here

安装

转换为&#39; wide&#39;要在&#39; data&#39;上使用melt,请使用数据表。 (setDT(dat) - 将&quot; data.frame&#39;转换为&#39; data.table&#39;)并指定&#39; Value1&#39;和&#39; Value2&的索引#39;作为一个&#39;列表&#39;在measure.vars

library(data.table)#v1.9.5+
dM <- melt(setDT(dat), measure.vars=list(c(4,6), c(5,7)), variable.name='Gel')

dM[order(Status)]
#     FID IID Status Gel value1 value2
# 1: N123 123     B1   1    888    443
# 2: N123 456     B1   1    323    761
# 3: K541 789     B1   1    875    325
# 4: N123 123     B1   2    446    540
# 5: N123 456     B1   2    446    765
# 6: K541 789     B1   2    446    345
# 7: N123 123     B2   1    345    325
# 8: N123 456     B2   1    888    443
# 9: K541 789     B2   1    323    761
#10: N123 123     B2   2    345    345
#11: N123 456     B2   2    345    540
#12: K541 789     B2   2    345    765
#13: N123 123     B3   1    765    761
#14: N123 456     B3   1    345    325
#15: K541 789     B3   1   8039    649
#16: N123 123     B3   2    765    765
#17: N123 456     B3   2    765    345
#18: K541 789     B3   2    765    169
#19: N123 123     B4   1    875    443
#20: N123 456     B4   1    765    761
#21: K541 789     B4   1    830    975
#22: N123 123     B4   2    875    540
#23: N123 456     B4   2    875    765
#24: K541 789     B4   2    875    875
#25: N123 123  Total   1    875    325
#26: N123 456  Total   1    875    443
#27: K541 789  Total   1    849    323
#28: N123 123  Total   2    323    345
#29: N123 456  Total   2    323    540
#30: K541 789  Total   2    323    431

我们可以转换长期&#39;格式为&#39;宽&#39;与dcast。在这里,我们在value.var

中指定多个值列
dC <- dcast(dM, FID+IID~Gel+Status, value.var=c('value1', 'value2'))
dC
#   FID IID 1_B1_value1 1_B2_value1 1_B3_value1 1_B4_value1 1_Total_value1
#1: K541 789         875         323        8039         830            849
#2: N123 123         888         345         765         875            875
#3: N123 456         323         888         345         765            875
#   2_B1_value1 2_B2_value1 2_B3_value1 2_B4_value1 2_Total_value1 1_B1_value2
#1:         446         345         765         875            323         325
#2:         446         345         765         875            323         443
#3:         446         345         765         875            323         761
#   1_B2_value2 1_B3_value2 1_B4_value2 1_Total_value2 2_B1_value2 2_B2_value2
#1:         761         649         975            323         345         765
#2:         325         761         443            325         540         345
#3:         443         325         761            443         765         540
#   2_B3_value2 2_B4_value2 2_Total_value2
#1:         169         875            431
#2:         765         540            345
#3:         345         765            540

我们还可以从原始数据集中获取宽格式

dcast(setDT(dat), FID+IID~Status, value.var=names(dat)[4:7])
#   FID IID B1_Value1.G1 B2_Value1.G1 B3_Value1.G1 B4_Value1.G1 Total_Value1.G1
#1: K541 789          875          323         8039        830          849
#2: N123 123          888          345          765          875          875
#3: N123 456          323          888          345          765          875
#   B1_Value2.G1 B2_Value2.G1 B3_Value2.G1 B4_Value2.G1 Total_Value2.G1
#1:          325          761          649          975             323
#2:          443          325          761          443             325
#3:          761          443          325          761             443
#    B1_Value1.G2 B2_Value1.G2 B3_Value1.G2 B4_Value1.G2 Total_Value1.G2
#1:          446          345          765          875             323
#2:          446          345          765          875             323
#3:          446          345          765          875             323
#   B1_Value2.G2 B2_Value2.G2 B3_Value2.G2 B4_Value2.G2 Total_Value2.G2
#1:          345          765          169          875             431
#2:          540          345          765          540             345
#3:          765          540          345          765             540

更新

只需仔细检查OP的预期输出(&#39; form1&#39;)

   merge(dM,form1,by=c("FID","IID","Gel","Status"),suffixes=c("akrun","real"))
#      FID IID Status Gel value1 value2 Value1 Value2
#  1: K541 789     B1   1    875    325    875    325
#  2: K541 789     B2   1    323    761    323    761
#  3: K541 789     B3   1   8039    649   8039    649
#  4: K541 789     B4   1    830    975    830    975   
#  5: K541 789  Total   1    849    323    849    323
#  6: K541 789     B1   2    446    345    446    345
#  7: K541 789     B2   2    345    765    345    765
#  8: K541 789     B3   2    765    169    765    169
#  9: K541 789     B4   2    875    875    875    875
# 10: K541 789  Total   2    323    431    323    431
# 11: N123 123     B1   1    888    443    888    443
# 12: N123 123     B2   1    345    325    345    325
# 13: N123 123     B3   1    765    761    765    761
# 14: N123 123     B4   1    875    443    875    443
# 15: N123 123  Total   1    875    325    875    325
# 16: N123 123     B1   2    446    540    446    540
# 17: N123 123     B2   2    345    345    345    345
# 18: N123 123     B3   2    765    765    765    765
# 19: N123 123     B4   2    875    540    875    540
# 20: N123 123  Total   2    323    345    323    345
# 21: N123 456     B1   1    323    761    323    761
# 22: N123 456     B2   1    888    443    888    443
# 23: N123 456     B3   1    345    325    345    325
# 24: N123 456     B4   1    765    761    765    761
# 25: N123 456  Total   1    875    443    875    443
# 26: N123 456     B1   2    446    765    446    765
# 27: N123 456     B2   2    345    540    345    540
# 28: N123 456     B3   2    765    345    765    345
# 29: N123 456     B4   2    875    765    875    765
# 30: N123 456  Total   2    323    540    323    540