我有一个匹配列的矩阵(例如124G和124R)我想将匹配的列彼此分开。列不会根据匹配的ID进行排序,如下所示。任何人都可以提出这样做的功能吗?
124G 124R 126R 126G 26R 26G 243G…
hsa-let-7a-5p|hsa-let-7a-1 16659.1964 46129.599 53180.211 37259.453 61108.754 77946.898 87433.948
hsa-let-7a-5p|hsa-let-7a-2 16662.0615 46105.731 53185.511 37191.243 61115.984 77913.992 87379.138
hsa-let-7a-5p|hsa-let-7a-3 16639.4316 45986.194 53043.683 37157.778 60889.781 77855.132 87213.507
hsa-let-7b-5p|hsa-let-7b 8046.0040 16460.859 17660.545 20728.696 25369.899 26558.286 27932.843
hsa-let-7c-5p|hsa-let-7c 495.3461 1143.432 4671.550 1563.926 2078.405 4993.391 3479.066
hsa-let-7d-5p|hsa-let-7d 1682.8678 3879.721 4858.802 3062.387 4577.366 8218.718 7966.320
答案 0 :(得分:3)
尝试
sapply(split(colnames(m1),sub('[^0-9]+', '', colnames(m1))), function(x) {
x1 <- m1[,sort(x)]
x1[,1]/x1[,2]})
# 124 126 243 26
#hsa-let-7a-5p|hsa-let-7a-1 0.3611390 0.7006263 1.0606547 1.275544
#hsa-let-7a-5p|hsa-let-7a-2 0.3613881 0.6992740 1.0362319 1.274855
#hsa-let-7a-5p|hsa-let-7a-3 0.3618354 0.7005128 1.0337772 1.278624
#hsa-let-7b-5p|hsa-let-7b 0.4887961 1.1737291 0.9823457 1.046842
#hsa-let-7c-5p|hsa-let-7c 0.4332099 0.3347767 1.0131058 2.402511
#hsa-let-7d-5p|hsa-let-7d 0.4337600 0.6302761 1.0817382 1.795513
或者
library(gtools)
indx <- mixedorder(colnames(m1))
m2 <- m1[,indx]
m2[,c(TRUE,FALSE)]/m2[,c(FALSE,TRUE)]
# 26G 124G 126G 243G
#hsa-let-7a-5p|hsa-let-7a-1 1.275544 0.3611390 0.7006263 1.0606547
#hsa-let-7a-5p|hsa-let-7a-2 1.274855 0.3613881 0.6992740 1.0362319
#hsa-let-7a-5p|hsa-let-7a-3 1.278624 0.3618354 0.7005128 1.0337772
#hsa-let-7b-5p|hsa-let-7b 1.046842 0.4887961 1.1737291 0.9823457
#hsa-let-7c-5p|hsa-let-7c 2.402511 0.4332099 0.3347767 1.0131058
#hsa-let-7d-5p|hsa-let-7d 1.795513 0.4337600 0.6302761 1.0817382
或者以上可以写成
m2[,seq(1, ncol(m2), 2)]/m2[, seq(2, ncol(m2), 2)]
m1 <- structure(c(16659.1964, 16662.0615, 16639.4316, 8046.004, 495.3461,
1682.8678, 46129.599, 46105.731, 45986.194, 16460.859, 1143.432,
3879.721, 53180.211, 53185.511, 53043.683, 17660.545, 4671.55,
4858.802, 37259.453, 37191.243, 37157.778, 20728.696, 1563.926,
3062.387, 61108.754, 61115.984, 60889.781, 25369.899, 2078.405,
4577.366, 77946.898, 77913.992, 77855.132, 26558.286, 4993.391,
8218.718, 87433.948, 87379.138, 87213.507, 27932.843, 3479.066,
7966.32, 82433.94, 84323.92, 84363.93, 28434.84, 3434.06, 7364.37
), .Dim = c(6L, 8L), .Dimnames = list(c("hsa-let-7a-5p|hsa-let-7a-1",
"hsa-let-7a-5p|hsa-let-7a-2", "hsa-let-7a-5p|hsa-let-7a-3",
"hsa-let-7b-5p|hsa-let-7b",
"hsa-let-7c-5p|hsa-let-7c", "hsa-let-7d-5p|hsa-let-7d"), c("124G",
"124R", "126R", "126G", "26R", "26G", "243G", "243R")))
答案 1 :(得分:3)
这是另一个以整洁的格式重塑数据的想法。它考虑了不匹配的列。
'6'
给出了:
"6"
注意:对于所有匹配的列,这会将library(dplyr)
library(tidyr)
as.data.frame(m2) %>%
add_rownames("id") %>%
gather(key, value, -id) %>%
extract(key, into = c("number", "letter"), "([0-9]+)([A-Z]+)") %>%
arrange(id, number, letter) %>%
group_by(id, number) %>%
summarise(value = sum(value / lead(value), na.rm = TRUE)) %>%
spread(number, value)
除以#Source: local data frame [6 x 5]
#
# id 124 126 243 26
#1 hsa-let-7a-5p|hsa-let-7a-1 0.3611390 0.7006263 0 1.275544
#2 hsa-let-7a-5p|hsa-let-7a-2 0.3613881 0.6992740 0 1.274855
#3 hsa-let-7a-5p|hsa-let-7a-3 0.3618354 0.7005128 0 1.278624
#4 hsa-let-7b-5p|hsa-let-7b 0.4887961 1.1737291 0 1.046842
#5 hsa-let-7c-5p|hsa-let-7c 0.4332099 0.3347767 0 2.402511
#6 hsa-let-7d-5p|hsa-let-7d 0.4337600 0.6302761 0 1.795513
(字母顺序),并为不匹配的列返回G
(您可以轻松如果需要,请用R
替换它们。如果您想将0
除以NA
,请使用R
代替G
数据强>
lag()
答案 2 :(得分:3)
基础解决方案,这里没什么特别的。我们对列进行排序,因为它们都匹配 - 它们将自动排列为G,然后是R对应的列。然后我们用G,然后是R grep所有的colnames,然后我们将它们相互分开。
m1 <- m1[sort(colnames(m1)]
m1_g <- m1[,grep("G",colnames(m1))]
m1_r <- m1[,grep("R",colnames(m1))]
m1_new <- m1_g/m1_r
m1_new
124G 126G 26G 243G
hsa-let-7a-5p|hsa-let-7a-1 0.3611390 0.7006263 1.275544 1.0606547
hsa-let-7a-5p|hsa-let-7a-2 0.3613881 0.6992740 1.274855 1.0362319
hsa-let-7a-5p|hsa-let-7a-3 0.3618354 0.7005128 1.278624 1.0337772
hsa-let-7b-5p|hsa-let-7b 0.4887961 1.1737291 1.046842 0.9823457
hsa-let-7c-5p|hsa-let-7c 0.4332099 0.3347767 2.402511 1.0131058
hsa-let-7d-5p|hsa-let-7d 0.4337600 0.6302761 1.795513 1.0817382