我有一个很大的电子表格,表格的左侧和右侧都有镜面反射值。我只想选择表左侧和右侧的两个要素均为1的行(我在列中具有1/0的值)。
我正在尝试将subset()
用于我感兴趣的值,但会保留一些不应在最终表中的行。
这是脚本有问题的地方:
library(tidyverse)
entire_table_sorted <- structure(list(Promoter.id1 = c(0L, 0L, 0L, 0L, 0L, 0L), Promoter_flanking_region.id1 = c(0L,
0L, 0L, 1L, 1L, 1L), Enhancer.id1 = c(0L, 0L, 0L, 0L, 0L, 0L),
FLI1.id1 = c(0L, 0L, 0L, 0L, 0L, 0L), GATA1.id1 = c(0L, 0L,
0L, 0L, 0L, 0L), GATA2.id1 = c(0L, 0L, 0L, 0L, 0L, 0L), TAL1.id1 = c(0L,
0L, 0L, 0L, 0L, 0L), CTCF.id1 = c(0L, 0L, 0L, 0L, 0L, 0L),
Exons.id1 = c(1L, 1L, 1L, 1L, 1L, 1L), Intron.id1 = c(0L,
0L, 0L, 0L, 0L, 0L), Promoter.id2 = c(1L, 1L, 1L, 0L, 0L,
1L), Promoter_flanking_region.id2 = c(0L, 0L, 0L, 0L, 1L,
0L), Enhancer.id2 = c(1L, 1L, 1L, 0L, 0L, 1L), FLI1.id2 = c(0L,
0L, 0L, 0L, 0L, 0L), GATA1.id2 = c(0L, 0L, 0L, 0L, 0L, 0L
), GATA2.id2 = c(0L, 0L, 0L, 0L, 0L, 0L), TAL1.id2 = c(0L,
0L, 0L, 0L, 0L, 0L), CTCF.id2 = c(1L, 0L, 0L, 0L, 0L, 0L),
Exons.id2 = c(1L, 1L, 0L, 1L, 0L, 1L), Intron.id2 = c(0L,
0L, 1L, 0L, 0L, 0L), Distance_bait_prey = c(18678L, 26712L,
50140L, 62893L, 6848L, 43508L)), row.names = c(NA, 6L), class = "data.frame")
to_loop.1 <- c("Promoter.id1","Promoter_flanking_region.id1","Enhancer.id1","FLI1.id1","GATA1.id1","GATA2.id1","TAL1.id1","CTCF.id1","Exons.id1","Intron.id1")
to_loop.2 <- c("Promoter.id2","Promoter_flanking_region.id2","Enhancer.id2","FLI1.id2","GATA1.id2","GATA2.id2","TAL1.id2","CTCF.id2","Exons.id2","Intron.id2")
list_of_plots_FtoF <- list()
for (name1 in to_loop.1) {
df_var <- subset(entire_table_sorted, get(name1) == 1 & # subset to have all feature on the left side is 1
c(get(to_loop.1[-grep(name1, to_loop.1)])) == 0 & # and all the "non feature" columns on the left side to be 0
get(to_loop.2[grep(str_sub(name1, 1,-2), to_loop.2)]) == 1 & # the same feature on the right side of the spreadsheet has to be one)
get(to_loop.2[-grep(str_sub(name1, 1,-2), to_loop.2)]) == 0) # the other "non features" columns on the right side have to be 0
assign(paste("df", name1, sep = "_"), df_var)
list_of_plots_FtoF[[paste("df", name1, sep = "_")]] <- df_var
}
变量都是整数:
> str(entire_table_sorted)
'data.frame': 3109 obs. of 21 variables:
$ Promoter.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ Promoter_flanking_region.id1: int 0 0 0 1 1 1 1 1 1 1 ...
$ Enhancer.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ FLI1.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ GATA1.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ GATA2.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ TAL1.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ CTCF.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ Exons.id1 : int 1 1 1 1 1 1 1 1 1 1 ...
$ Intron.id1 : int 0 0 0 0 0 0 0 0 0 0 ...
$ Promoter.id2 : int 1 1 1 0 0 1 0 0 0 0 ...
$ Promoter_flanking_region.id2: int 0 0 0 0 1 0 0 0 1 0 ...
$ Enhancer.id2 : int 1 1 1 0 0 1 0 0 0 0 ...
$ FLI1.id2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ GATA1.id2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ GATA2.id2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ TAL1.id2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ CTCF.id2 : int 1 0 0 0 0 0 0 0 0 0 ...
$ Exons.id2 : int 1 1 0 1 0 1 0 0 0 0 ...
$ Intron.id2 : int 0 0 1 0 0 0 1 1 1 0 ...
$ Distance_bait_prey : int 18678 26712 50140 62893 6848 43508 107138 104417 79328 22663 ...
我的输入类似:
col_A1 col_B1 col_C1 col_A2 col_B2 col_C2
1 0 0 0 1 0
0 1 1 1 0 0
1 0 0 1 0 0
1 1 1 1 1 1
并且在我将col_A1和col_A2过滤为1的情况下,我想要类似的东西:
col_A1 col_B1 col_C1 col_A2 col_B2 col_C2
1 0 0 1 0 0
如果您运行脚本并查看列表list_of_plots_FtoF
,则即使对于list_of_plots_FtoF$df_Promoter_flanking_region.id1
或list_of_plots_FtoF$df_Exons.id1
,您也会看到它们具有以下输出:
$df_Promoter_flanking_region.id1
Promoter.id1 Promoter_flanking_region.id1 Enhancer.id1 FLI1.id1 GATA1.id1 GATA2.id1 TAL1.id1 CTCF.id1 Exons.id1 Intron.id1
5 0 1 0 0 0 0 0 0 1 0
Promoter.id2 Promoter_flanking_region.id2 Enhancer.id2 FLI1.id2 GATA1.id2 GATA2.id2 TAL1.id2 CTCF.id2 Exons.id2 Intron.id2
5 0 1 0 0 0 0 0 0 0 0
Distance_bait_prey
5 6848
$df_Exons.id1
Promoter.id1 Promoter_flanking_region.id1 Enhancer.id1 FLI1.id1 GATA1.id1 GATA2.id1 TAL1.id1 CTCF.id1 Exons.id1 Intron.id1
4 0 1 0 0 0 0 0 0 1 0
Promoter.id2 Promoter_flanking_region.id2 Enhancer.id2 FLI1.id2 GATA1.id2 GATA2.id2 TAL1.id2 CTCF.id2 Exons.id2 Intron.id2
4 0 0 0 0 0 0 0 0 1 0
Distance_bait_prey
4 62893
因此,它们不仅过滤掉预期列(即Promoter_flanking_region.id1和Promoter_flanking_region.id2或df_Exons.id1和df_Exons.id2)中带有1的行,而且还过滤掉了其他列。