我有一个包含两个向量的22个列表的列表。每个矢量在第二列表中具有相等的长度,但是相对于彼此具有不相等的长度。请参见最后输出的输出。
我想通过(i)找到最长的向量来构建data.frame,(ii)将2个向量列表中的所有其他第1个向量与最长向量中的第一个进行比较,并用NA值填充任何遗漏。我想在data.frame中将得到的第二个向量(包括它们的NA)组合在一起。
(数据集是相对保留时间和百分比面积。我想将所有色谱图相互比较)。
我可以使用以下代码回答我的问题的第一部分(i),
max(sapply(lapply(A, function(x) x[1,]),length))
告诉我列表中的最后一个数据集是最长的。
现在我想坚持使用动态时间扭曲(dtw())包并应用这里给出的答案(No 3)dtw link,但我发现它似乎只适用于相同的向量长度。
structure(list(chrom_a = structure(c(0.4663, 11.16, 0.4955, 0.82,
0.8744, 4.92, 0.9101, 1.55, 1, 73.91, 1.2862, 5, 1.3198, 1.06,
1.4302, 1.57), .Dim = c(2L, 8L), .Dimnames = list(c("rel.ret",
"rel.area"), c("2", "3", "4", "5", "6", "7", "8", "9"))), chrom_b =structure(c(0.465,
8.6, 0.4938, 0.64, 0.8714, 5.1, 0.907, 1.49, 1, 76.22, 1.2813,
4.92, 1.3144, 1.03, 1.3704, 0.56, 1.4245, 1.45), .Dim = c(2L,
9L), .Dimnames = list(c("rel.ret", "rel.area"), c("3", "4", "5",
"6", "7", "8", "9", "10", "11"))), chrom_c = structure(c(0.4654,
7.58, 0.4943, 0.58, 0.8724, 5.12, 0.9082, 1.51, 1, 77.05, 1.2835,
4.97, 1.3168, 1.07, 1.3732, 0.57, 1.4272, 1.54), .Dim = c(2L,
9L), .Dimnames = list(c("rel.ret", "rel.area"), c("2", "3", "4",
"5", "6", "7", "8", "9", "10"))), chrom_d = structure(c(0.3841,
0.28, 0.4638, 6.07, 0.4926, 0.48, 0.7903, 0.29, 0.8694, 4.79,
0.9049, 1.38, 1, 67, 1.2783, 4.62, 1.3116, 0.95, 1.3681, 0.51,
1.4215, 13.29, 3.5744, 0.35), .Dim = c(2L, 12L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14"))), chrom_e = structure(c(0.5304,
4.68, 0.8727, 5.34, 0.9086, 1.57, 1, 79.92, 1.2838, 5.22, 1.3168,
1.13, 1.3742, 0.58, 1.4279, 1.57), .Dim = c(2L, 8L), .Dimnames = list(
c("rel.ret", "rel.area"), c("2", "3", "4", "5", "6", "7",
"8", "9"))), chrom_f = structure(c(0.3718, 0.36, 0.4628,
3.38, 0.4915, 0.32, 0.789, 0.42, 0.8685, 6.15, 0.904, 1.72, 0.9399,
0.34, 1, 74.63, 1.0651, 0.5, 1.1014, 0.35, 1.1266, 0.29, 1.1609,
0.41, 1.2108, 0.32, 1.2774, 6.06, 1.3103, 1.36, 1.3667, 0.96,
1.4201, 1.63, 3.5693, 0.48, 3.6497, 0.34), .Dim = c(2L, 19L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21"))), chrom_g = structure(c(0.465, 1.44, 0.8708,
5.4, 0.9064, 1.53, 1, 82.64, 1.2805, 5.3, 1.3134, 1.16, 1.37,
0.55, 1.4239, 1.54, 3.5792, 0.44), .Dim = c(2L, 9L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11"))), chrom_h = structure(c(0.4644, 7.08, 0.4932,
0.55, 0.8708, 5.15, 0.9064, 1.49, 1, 77.57, 1.2805, 5.04, 1.3142,
1.08, 1.3704, 0.53, 1.4245, 1.51), .Dim = c(2L, 9L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11"))), chrom_i = structure(c(0.4641, 6.82, 0.493,
0.54, 0.8714, 5.24, 0.9075, 1.46, 1, 78.08, 1.2826, 4.91, 1.3164,
1.02, 1.3722, 0.51, 1.4266, 1.41), .Dim = c(2L, 9L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11"))), chrom_j = structure(c(74.173, 31.69,
0.3729, 0.25, 0.4637, 4.36, 0.4925, 0.35, 0.7904, 0.21, 0.8697,
3.9, 0.9055, 1.06, 1, 52.76, 1.279, 3.56, 1.3119, 0.69, 1.369,
0.3, 1.4224, 0.87), .Dim = c(2L, 12L), .Dimnames = list(c("rel.ret",
"rel.area"), c("2", "3", "4", "5", "6", "7", "8", "9", "10",
"11", "12", "13"))), chrom_k = structure(c(0.4661, 5.91, 0.8743,
5.23, 0.9105, 1.53, 1, 79.89, 1.2863, 5.11, 1.3822, 0.99, 1.0169,
1.33), .Dim = c(2L, 7L), .Dimnames = list(c("rel.ret", "rel.area"
), c("2", "3", "4", "5", "6", "7", "8"))), chrom_l = structure(logical(0), .Dim = c(2L,
0L), .Dimnames = list(c("rel.ret", "rel.area"), NULL)), chrom_m = structure(logical(0), .Dim = c(2L,
0L), .Dimnames = list(c("rel.ret", "rel.area"), NULL)), chrom_n = structure(c(0.3793,
0.37, 0.4604, 6.12, 0.4896, 0.49, 0.8709, 8.41, 0.9072, 1.46,
1, 76.51, 1.2844, 4.75, 1.3188, 0.83, 1.429, 1.06), .Dim = c(2L,
9L), .Dimnames = list(c("rel.ret", "rel.area"), c("3", "4", "5",
"6", "7", "8", "9", "10", "11"))), chrom_o = structure(c(0.1252,
43.23, 0.4625, 2.06, 0.8728, 3, 0.9094, 0.88, 1, 46.69, 1.2865,
2.91, 1.3209, 0.54, 1.4306, 0.7), .Dim = c(2L, 8L), .Dimnames = list(
c("rel.ret", "rel.area"), c("2", "3", "4", "5", "6", "7",
"8", "9"))), chrom_p = structure(c(0.1248, 31.52, 0.3796,
0.23, 0.4608, 1.18, 0.7891, 0.25, 0.8687, 3.88, 0.905, 1.04,
1, 55.93, 1.2808, 3.72, 1.3149, 0.8, 1.3709, 0.38, 1.4244, 1.06
), .Dim = c(2L, 11L), .Dimnames = list(c("rel.ret", "rel.area"
), c("2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"
))), chrom_q = structure(c(0.4618, 0.74, 0.8702, 5.49, 0.9062,
1.44, 1, 83.93, 1.2837, 5.21, 1.3181, 1.13, 1.3738, 0.53, 1.428,
1.53), .Dim = c(2L, 8L), .Dimnames = list(c("rel.ret", "rel.area"
), c("3", "4", "5", "6", "7", "8", "9", "10"))), chrom_r = structure(c(0.8715,
5.39, 0.9072, 1.4, 1, 84.41, 1.2821, 5.18, 1.3151, 1.14, 1.3722,
0.63, 1.4258, 1.46, 3.5918, 0.39), .Dim = c(2L, 8L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10"))), chrom_s = structure(c(0.3722, 0.38, 0.4636,
6.12, 0.492, 0.5, 0.7898, 0.42, 0.8687, 5.94, 0.9047, 1.69, 0.9403,
0.33, 1, 73.12, 1.0642, 0.44, 1.1033, 0.32, 1.1276, 0.25, 1.162,
0.33, 1.2787, 5.87, 1.3119, 1.32, 1.3685, 0.76, 1.422, 1.5, 3.5817,
0.42, 3.662, 0.3), .Dim = c(2L, 18L), .Dimnames = list(c("rel.ret",
"rel.area"), c("3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20"))), chrom_t = structure(c(0.3738,
0.37, 0.4651, 1.51, 0.7917, 0.34, 0.8711, 5.49, 0.9071, 1.51,
1, 81.91, 1.2834, 5.33, 1.3166, 1.13, 1.3736, 0.57, 1.4283, 1.41,
3.6102, 0.43), .Dim = c(2L, 11L), .Dimnames = list(c("rel.ret",
"rel.area"), c("3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13"))), chrom_u = structure(c(0.3844, 0.38, 0.4647, 2.2,
0.7909, 0.37, 0.8699, 5.99, 0.906, 1.63, 1, 79.36, 1.2817, 5.86,
1.3149, 1.23, 1.372, 0.67, 1.4257, 1.56, 3.6037, 0.45, 3.684,
0.3), .Dim = c(2L, 12L), .Dimnames = list(c("rel.ret", "rel.area"
), c("3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14"))), chrom_v = structure(c(0.3836, 0.45, 0.4644, 0.62, 0.7894,
0.58, 0.8685, 7.17, 0.9043, 1.85, 0.941, 0.43, 1, 73.3, 1.0663,
0.31, 1.0849, 0.23, 1.1035, 0.4, 1.1284, 0.31, 1.1625, 0.5, 1.2133,
0.36, 1.2381, 0.32, 1.2795, 7.2, 1.3126, 1.64, 1.37, 1.13, 1.423,
2.05, 3.5963, 0.71, 3.6776, 0.45), .Dim = c(2L, 20L), .Dimnames = list(
c("rel.ret", "rel.area"), c("3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21", "22")))), .Names = c("chrom_a", "chrom_b",
"chrom_c", "chrom_d", "chrom_e", "chrom_f", "chrom_g", "chrom_h",
"chrom_i", "chrom_j", "chrom_k", "chrom_l", "chrom_m", "chrom_n",
"chrom_o", "chrom_p", "chrom_q", "chrom_r", "chrom_s", "chrom_t",
"chrom_u", "chrom_v"))
答案 0 :(得分:2)
mx <- tail(colnames(A[[which.max(lengths(A))]]),1)
newA <- lapply(A, function(x) apply(x, 1, function(x) {length(x) <- mx;x}))
这会将行的长度扩展到最大行长度。
答案 1 :(得分:2)
这是一个建议,我认为提供了所需的输出(如果我正确理解任务)。
longest_vec <- A[[which.max(lengths(A))]][1,]
m <- matrix(NA, ncol = as.integer(names(longest_vec[length(longest_vec)])), nrow = length(A))
for (i in 1:nrow(m)) m[i, as.integer(colnames(A[[i]]))] <- unname(A[[i]][2,])
df <- as.data.frame(m)
df <- df[,-1]
colnames(df) <- c(2, names(longest_vec))
#> head(df)
# 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#1 11.16 0.82 4.92 1.55 73.91 5.00 1.06 1.57 NA NA NA NA NA NA NA NA NA NA NA NA NA
#2 NA 8.60 0.64 5.10 1.49 76.22 4.92 1.03 0.56 1.45 NA NA NA NA NA NA NA NA NA NA NA
#3 7.58 0.58 5.12 1.51 77.05 4.97 1.07 0.57 1.54 NA NA NA NA NA NA NA NA NA NA NA NA
#4 NA 0.28 6.07 0.48 0.29 4.79 1.38 67.00 4.62 0.95 0.51 13.29 0.35 NA NA NA NA NA NA NA NA
#5 4.68 5.34 1.57 79.92 5.22 1.13 0.58 1.57 NA NA NA NA NA NA NA NA NA NA NA NA NA
#6 NA 0.36 3.38 0.32 0.42 6.15 1.72 0.34 74.63 0.50 0.35 0.29 0.41 0.32 6.06 1.36 0.96 1.63 0.48 0.34 NA
希望这有帮助。
PS:我从@Pierre Lafortune的优秀评论中偷走了代码的第一行。
答案 2 :(得分:2)
我以与你最初追求的方式略有不同的方式解决了这个问题,但目标却相同。我的假设是您有一个色谱图列表,其中包含具有保留时间和面积的峰,并且您希望最终能够比较不同色谱图中的峰。
首先转换为长data.table
:
chroms_long <- rbindlist(lapply(chroms, function(x) {
as.data.table(t(x))
}))
chroms_long[, chrom := rep(names(chroms), lengths(chroms) / 2)]
然后使用分层聚类分组。此调用中的截止h
需要针对基础数据进行更改,但对于此数据集,0.015似乎是正确的。
setkey(chroms_long, rel.ret, chrom)
chroms_long[, group := cutree(hclust(dist(rel.ret)), h = 0.015)]
如果您现在想要为每个可能的色谱图/组添加一行:
setkey(chroms_long, chrom, group)
chroms_long <- chroms_long[CJ(unique(chrom), unique(group))]
如果您还想知道每组的平均保留时间:
chroms_long[, mean_rel.ret := mean(rel.ret, na.rm = TRUE), by = group]
如果您想要宽屏输出:
dcast.data.table(chroms_long, chrom ~ group, value.var = "rel.area")
如果不是组号,您希望将平均保留作为列标题:
dcast.data.table(chroms_long[, .(chrom, col = sprintf("%0.2f", mean_rel.ret), rel.area)],
chrom ~ col, value.var = "rel.area")
如果您想恢复原始格式:
sapply(unique(chroms_long$chrom),
function(x) t(data.matrix(chroms_long[x, .(rel.ret, rel.area)])),
simplify = FALSE)
请注意,这会将sapply
用于USE.NAMES
功能,而不是简化列表中的输出。