根据一列和不同的相似部分重新排列数据

时间:2015-05-20 08:26:36

标签: r

我的数据如下所示:

   1    ID_17550873     1,211752129
   2    ID_17550873     1,202295466
   3    ID_17550873     1,193705936
   4    ID_17550873     1,184065855
   5    ID_17550873     1,174741145
   6    ID_17550873     1,165991596
  54    ID_17550873.1   1,213821847
  85    ID_17550873.1   1,204415594
  82    ID_17550873.1   1,195891404
2565    ID_17550873.1   1,186352148
 236    ID_17550873.1   1,177122961
   5    ID_17550873.1   1,168398464
2123    ID_17550875     1,194476935
2124    ID_17550875     1,185464218
2125    ID_17550875     1,177338098
2126    ID_17550875     1,168257804
2127    ID_17550875     1,159501092
2128    ID_17550875     1,15130918
  25    ID_17550875.1   1,192470951
2569    ID_17550875.1   1,183321493
 512    ID_17550875.1   1,175383173
  36    ID_17550875.1   1,166816022
 578    ID_17550875.1   1,158762364
 856    ID_17550875.1   1,151287835


df<- structure(list(X1 = c(2L, 3L, 4L, 5L, 6L, 54L, 85L, 82L, 2565L, 
236L, 5L, 2123L, 2124L, 2125L, 2126L, 2127L, 2128L, 25L, 2569L, 
512L, 36L, 578L, 856L), ID_17550873 = structure(c(1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L, 4L), .Label = c("ID_17550873", "ID_17550873.1", "ID_17550875", 
"ID_17550875.1"), class = "factor"), X1.211752129 = c(1.202295466, 
1.193705936, 1.184065855, 1.174741145, 1.165991596, 1.213821847, 
1.204415594, 1.195891404, 1.186352148, 1.177122961, 1.168398464, 
1.194476935, 1.185464218, 1.177338098, 1.168257804, 1.159501092, 
1.15130918, 1.192470951, 1.183321493, 1.175383173, 1.166816022, 
1.158762364, 1.151287835)), .Names = c("X1", "ID_17550873", "X1.211752129"
), class = "data.frame", row.names = c(NA, -23L))

我想让它像下面那样

ID_17550873 1,211752129 1,213821847 ID_17550875 1,194476935 1,192470951
ID_17550873 1,202295466 1,204415594 ID_17550875 1,185464218 1,183321493
ID_17550873 1,193705936 1,195891404 ID_17550875 1,177338098 1,175383173
ID_17550873 1,184065855 1,186352148 ID_17550875 1,168257804 1,166816022
ID_17550873 1,174741145 1,177122961 ID_17550875 1,159501092 1,158762364
ID_17550873 1,165991596 1,168398464 ID_17550875 1,15130918  1,151287835

我加载数据后,删除了第一列。然后我把所有&#34; ID_17550873&#34;与他们相应的值,我删除了所有&#34; ID_17550873.1&#34;。我将相应的值放在&#34; ID_17550873&#34;

之前

然后我为其他ID做同样的事情

1 个答案:

答案 0 :(得分:0)

这是一个选项,虽然我不清楚为什么你会想要使用这样的格式 - 我看不出任何优势。

library(dplyr) # for bind_cols

dd <- bind_cols(split(df[-1], df$V2))
dd[1:ncol(dd)%%4 != 3]
#           V2          V3        V3.1        V2.1        V3.2        V3.3
#1 ID_17550873 1,211752129 1,213821847 ID_17550875 1,194476935 1,192470951
#2 ID_17550873 1,202295466 1,204415594 ID_17550875 1,185464218 1,183321493
#3 ID_17550873 1,193705936 1,195891404 ID_17550875 1,177338098 1,175383173
#4 ID_17550873 1,184065855 1,186352148 ID_17550875 1,168257804 1,166816022
#5 ID_17550873 1,174741145 1,177122961 ID_17550875 1,159501092 1,158762364
#6 ID_17550873 1,165991596 1,168398464 ID_17550875  1,15130918 1,151287835

如果您不知道列的数量以及带有&#34; .X&#34;的ID列的位置结尾位于,您可以使用以下方法:

dd <- bind_cols(split(df[-1], df$V2))  # as before
dd[!sapply(dd, function(x) any(grepl("\\..$", x)))]