Question

我有兴趣按名称提取包含PVALUE的列，包括第一列。我可以使用df1 <- df[, c(1,seq(2,ncol(final),3))]来提取，但我担心数据是否始终遵循相同的顺序。

structure(list(ID.REF = structure(1:9, .Label = c("10071_s_at", 
"1053_at", "117_at", "1255_g_at", "1294_at", "1320_at", "1405_i_at", 
"14312_at", "1438_at"), class = "factor"), PVALUE1 = c(0.000219, 
0.000673, 0.000322, 0.602006, 0.000468, 0.204022, 0.000491, 0.003067, 
0.000562), INTENSITY1 = c(3473.6, 643.2, 564, 9.4, 845.6, 94.3, 
6546.2, 54.1, 461.3), CALL1 = structure(c(2L, 2L, 2L, 1L, 2L, 
1L, 2L, 2L, 2L), .Label = c("A", "P"), class = "factor"), PVALUE2 = c(0.000219, 
0.000673, 0.000322, 0.602006, 0.000468, 0.204022, 0.000491, 0.003067, 
0.000562), INTENSITY2 = c(3473.6, 643.2, 564, 9.4, 845.6, 94.3, 
6546.2, 54.1, 461.3), CALL2 = structure(c(2L, 2L, 2L, 1L, 2L, 
1L, 2L, 2L, 2L), .Label = c("A", "P"), class = "factor"), PVALUE3 = c(0.000219, 
0.000673, 0.000322, 0.602006, 0.000468, 0.204022, 0.000491, 0.003067, 
0.000562), INTENSITY3 = c(3473.6, 643.2, 564, 9.4, 845.6, 94.3, 
6546.2, 54.1, 461.3), CALL3 = structure(c(2L, 2L, 2L, 1L, 2L, 
1L, 2L, 2L, 2L), .Label = c("A", "P"), class = "factor")), .Names = c("ID.REF", 
"PVALUE1", "INTENSITY1", "CALL1", "PVALUE2", "INTENSITY2", "CALL2", 
"PVALUE3", "INTENSITY3", "CALL3"), class = "data.frame", row.names = c(NA, 
-9L))

预期结果：

 ID-REF      PVALUE1    PVALUE2     PVALUE3
10071_s_at  0.000219    0.000219    0.000219
1053_at     0.000673    0.000673    0.000673
117_at      0.000322    0.000322    0.000322
1255_g_at   0.602006    0.602006    0.602006
1294_at     0.000468    0.000468    0.000468
1320_at     0.204022    0.204022    0.204022
1405_i_at   0.000491    0.000491    0.000491
14312_at    0.003067    0.003067    0.003067
1438_at     0.000562    0.000562    0.000562

感谢。

Answer 1

您可以尝试grep

df[grep('^(ID|PVALUE)', names(df))]
#      ID.REF  PVALUE1  PVALUE2  PVALUE3
#1 10071_s_at 0.000219 0.000219 0.000219
#2    1053_at 0.000673 0.000673 0.000673
#3     117_at 0.000322 0.000322 0.000322
#4  1255_g_at 0.602006 0.602006 0.602006
#5    1294_at 0.000468 0.000468 0.000468
#6    1320_at 0.204022 0.204022 0.204022
#7  1405_i_at 0.000491 0.000491 0.000491
#8   14312_at 0.003067 0.003067 0.003067
#9    1438_at 0.000562 0.000562 0.000562

如果您使用dplyr

library(dplyr)
select(df, matches('^(PVALUE|ID)'))

按名称提取许多列

1 个答案: