我有5个列表中的一组项目在列表中。我所做的是构造一个矩阵(tt_matrix
),其中包含5个列表(第1列到第5列)之一中存在的项目,但也存在于5个列表的其他相应列表中(第1行到第1行) 5)。在我给出的示例中,它基本上是5个列表与相同的5个列表。是否有更优雅/更快速的填充矩阵的方法,也许没有3个for循环(参见下面的代码)?
#matrix
tt_matrix <- matrix(0, nrow=length(tlist_sub), ncol=length(tlist_sub))
#Make hash lookup table
tdf_env_sub <- new.env()`
tdf_names_sub <- c("0_754", "755_859", "860_1119", "1120_1839", "1840_2330")
#total unique reads
unq_rds <- unique(unlist(tlist_sub))
tlist_sub
:
[[1]]
[1] "HWI-M02293:152:000000000-AH78U:1:2110:14509:11072" "HWI-M02293:152:000000000-AH78U:1:2113:20433:7925"
[3] "HWI-M02293:152:000000000-AH78U:1:2113:20433:7925" "HWI-M02293:152:000000000-AH78U:1:1104:25512:15827"
[5] "HWI-M02293:152:000000000-AH78U:1:1109:18636:20589" "HWI-M02293:152:000000000-AH78U:1:2112:8742:14213"
[7] "HWI-M02293:152:000000000-AH78U:1:1109:19363:25050" "HWI-M02293:152:000000000-AH78U:1:2101:20792:17286"
[9] "HWI-M02293:152:000000000-AH78U:1:1107:10609:25094" "HWI-M02293:152:000000000-AH78U:1:1108:2957:17646"
[[2]]
[1] "HWI-M02293:152:000000000-AH78U:1:2110:14509:11072" "HWI-M02293:152:000000000-AH78U:1:2113:20433:7925"
[3] "HWI-M02293:152:000000000-AH78U:1:2113:20433:7925" "HWI-M02293:152:000000000-AH78U:1:1104:25512:15827"
[5] "HWI-M02293:152:000000000-AH78U:1:1109:18636:20589" "HWI-M02293:152:000000000-AH78U:1:2112:8742:14213"
[7] "HWI-M02293:152:000000000-AH78U:1:1109:19363:25050" "HWI-M02293:152:000000000-AH78U:1:2101:20792:17286"
[9] "HWI-M02293:152:000000000-AH78U:1:1107:10609:25094" "HWI-M02293:152:000000000-AH78U:1:1108:2957:17646"
[[3]]
[1] "HWI-M02293:152:000000000-AH78U:1:1107:16335:18024" "HWI-M02293:152:000000000-AH78U:1:1111:13719:2995"
[3] "HWI-M02293:152:000000000-AH78U:1:1113:28025:20668" "HWI-M02293:152:000000000-AH78U:1:2114:7408:16585"
[5] "HWI-M02293:152:000000000-AH78U:1:2113:26942:12013" "HWI-M02293:152:000000000-AH78U:1:1111:23385:5935"
[7] "HWI-M02293:152:000000000-AH78U:1:2110:14585:3232" "HWI-M02293:152:000000000-AH78U:1:2113:14890:10443"
[9] "HWI-M02293:152:000000000-AH78U:1:2114:12846:16485" "HWI-M02293:152:000000000-AH78U:1:1102:19118:14376"
[[4]]
[1] "HWI-M02293:152:000000000-AH78U:1:1109:11972:17459" "HWI-M02293:152:000000000-AH78U:1:1109:6800:24111"
[3] "HWI-M02293:152:000000000-AH78U:1:1110:10908:18899" "HWI-M02293:152:000000000-AH78U:1:2101:20791:9350"
[5] "HWI-M02293:152:000000000-AH78U:1:1109:16168:10651" "HWI-M02293:152:000000000-AH78U:1:2101:20791:9350"
[7] "HWI-M02293:152:000000000-AH78U:1:1102:26099:21517" "HWI-M02293:152:000000000-AH78U:1:1113:24350:7463"
[9] "HWI-M02293:152:000000000-AH78U:1:1101:19654:26608" "HWI-M02293:152:000000000-AH78U:1:1114:12659:5227"
[[5]]
[1] "HWI-M02293:152:000000000-AH78U:1:1105:21509:10358" "HWI-M02293:152:000000000-AH78U:1:2104:26287:12446"
[3] "HWI-M02293:152:000000000-AH78U:1:2109:12091:22350" "HWI-M02293:152:000000000-AH78U:1:2112:12838:7167"
[5] "HWI-M02293:152:000000000-AH78U:1:1105:21509:10358" "HWI-M02293:152:000000000-AH78U:1:2104:26287:12446"
[7] "HWI-M02293:152:000000000-AH78U:1:2109:12091:22350" "HWI-M02293:152:000000000-AH78U:1:1112:21242:8988"
[9] "HWI-M02293:152:000000000-AH78U:1:1112:4191:7532" "HWI-M02293:152:000000000-AH78U:1:1112:21242:8988"
填写哈希表:
#Place each bin into hash table
for(item in seq_along(tdf_names_sub)){
tdf_env_sub[[tdf_names_sub[item]]] <- tlist_sub[[item]]
}
填写矩阵:
pb_2 <- txtProgressBar(min = 0, max = length(unq_rds), style = 3)
#loop over each read and check bin occupancy
#eapply()
#Loop through all the unique reads
for(i in seq_along(unq_rds)){
#Loop throught all the bins x-way
for(x in seq_along(tdf_names_sub)){
#Loop through all the bins y-way
for(y in seq_along(tdf_names_sub)){
#If read is in bin1 and bin2
if((unq_rds[i] %in% tdf_env_sub[[tdf_names_sub[x]]]) && (unq_rds[i] %in% tdf_env_sub[[tdf_names_sub[y]]])){
#Add 1 to matrix
tt_matrix[x,y] <- tt_matrix[x,y] + 1
}
}
}
setTxtProgressBar(pb_2, i)
}
输出:
tt_matrix
[,1] [,2] [,3] [,4] [,5]
[1,] 9 9 0 0 0
[2,] 9 9 0 0 0
[3,] 0 0 10 0 0
[4,] 0 0 0 9 0
[5,] 0 0 0 0 6