从多个字符串中删除多个匹配的列

时间:2016-10-04 17:34:23

标签: r pattern-matching dplyr

我有一个超过200个变量的数据框,其中许多变量以给定物种的代码结尾。我想消除包含多个代码之一的任何列,包含在单独的字符串向量中。如何同时删除与多个代码匹配的多个列?列名称与代码值不完全匹配,但包含列名称末尾的代码。例如:

 "rev230" "rev3360" "rev3508"  

手动,我已经完成了这个(使用dplyr包):

sub = select(df, -contains("3781"), -contains("3751"), -contains("1408"), 
    -contains("1409"), -contains("4469"), -contains("1789"), -contains("4559"), 
    -contains("1453"),-contains("8"), -contains("3508"), -contains("4656"), 
    -contains("5131"), -contains("9999")) 

这可以得到我想要的东西(消除包含与这些代码相匹配的物种数据的所有列),但显然这非常繁琐。 我想要更像的东西:

sub = select(df, -contains(species$codes))
# I realize this isn't the right syntax

我尝试使用以下内容删除单个列:

foreach(i=1:length(species$codes), .combine=rbind)%do%
select(df, -contains(species$codes[i]))

但这也不起作用。提前谢谢!

可重现的例子: 物种代码(包含在字符向量中):

 dput(species)
    c("3754", "3755", "3758", "3764", "3765", "3771", "3772", "3782", 
    "3761", "3762", "3763", "3767", "3768", "1790", "1412", "1413", 
    "1416", "1422", "1423", "1424", "1425", "1426", "1410", "1411", 
    "1414", "1415", "1420", "3770", "4740", "4470", "4472", "4474", 
    "4476", "4479", "4480", "1812", "1815", "1799", "4560", "3810", 
    "1440", "1441", "3302", "3295", "3560", "3360", "1940", "3840", 
    "570", "1050", "4710", "230")

以下是数据的前10行,只有物种数据的列

dput(logsub)
structure(list(lbs3781 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 708), lbs3764 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3765 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 708), lbs3758 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3755 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3782 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs3751 = c(0, 0, 4, 0, 0, 0, 21, 0, 18, 0), lbs3761 = c(0, 
0, 0, 0, 0, 0, 18, 0, 0, 0), lbs3762 = c(0, 0, 4, 0, 0, 0, 3, 
0, 0, 0), lbs3763 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3767 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3768 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs3754 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3771 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3772 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs1790 = c(0, 0, 0, 0, 0, 0, 0, 0, 18, 0), lbs1409 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 86), lbs1411 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs1414 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs1415 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 86), lbs4740 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs1420 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3770 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs1408 = c(2508, 785, 57, 0, 132, 
5003, 18, 104, 636, 0), lbs1412 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0), lbs1413 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs1416 = c(2331, 
654, 57, 0, 81, 4284, 15, 104, 120, 0), lbs1422 = c(177, 0, 0, 
0, 51, 719, 3, 0, 0, 0), lbs1423 = c(0, 131, 0, 0, 0, 0, 0, 0, 
502, 0), lbs1424 = c(0, 0, 0, 0, 0, 0, 0, 0, 14, 0), lbs1425 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs1426 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs1410 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs4469 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), lbs4470 = c(0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), lbs4472 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    lbs4474 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs4476 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), lbs4479 = c(0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), lbs4480 = c(0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), lbs1789 = c(0, 0, 0, 863, 0, 0, 0, 0, 
    0, 98), lbs1812 = c(0, 0, 0, 863, 0, 0, 0, 0, 0, 27), lbs1815 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 71), lbs1799 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), lbs4559 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 12), 
    lbs4560 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 12), lbs3810 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), lbs1453 = c(0, 0, 5, 
    0, 0, 0, 21, 0, 15, 235), lbs1440 = c(0, 0, 5, 0, 0, 0, 21, 
    0, 15, 0), lbs1441 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3560 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3302 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 235), lbs3295 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    lbs0008 = c(0, 97, 99, 0, 0, 0, 0, 0, 0, 0), lbs1940 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3840 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), lbs1050 = c(0, 0, 31, 0, 0, 0, 0, 0, 0, 0), 
    lbs4710 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs570 = c(0, 
    97, 68, 0, 0, 0, 0, 0, 0, 0), lbs230 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), lbs3360 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), lbs3508 = c(0, 
    0, 5043, 0, 0, 0, 0, 0, 0, 0), lbs4656 = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), lbs9999 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    rev3781 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1688.144979), rev3764 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), rev3765 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 1688.144979), rev3758 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0), rev3755 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev3782 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), rev3751 = c(0, 0, 7.063636364, 
    0, 0, 0, 33.44605263, 0, 32.53608247, 0), rev3761 = c(0, 
    0, 0, 0, 0, 0, 27.34105263, 0, 0, 0), rev3762 = c(0, 0, 7.063636364, 
    0, 0, 0, 6.105, 0, 0, 0), rev3763 = c(0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0), rev3767 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev3768 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), rev3754 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), rev3771 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev3772 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), rev1790 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 32.53608247, 0), rev1409 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 260.0068669), rev1411 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ), rev1414 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev1415 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 260.0068669), rev4740 = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), rev1420 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0), rev3770 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev1408 = c(6349.327025, 
    2014.2837, 142.8362084, 0, 339.5618788, 13265.98305, 41.94345809, 
    235.6862428, 1835.487932, 0), rev1412 = c(0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0), rev1413 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    rev1416 = c(5841.249152, 1623.155767, 142.8362084, 0, 194.2835976, 
    11101.38378, 33.99320809, 235.6862428, 299.2968186, 0), rev1422 = c(508.0778723, 
    0, 0, 0, 145.2782813, 2164.599274, 7.95025, 0, 0, 0), rev1423 = c(0, 
    391.1279328, 0, 0, 0, 0, 0, 0, 1494.676782, 0), rev1424 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 41.51433134, 0), rev1425 = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), rev1426 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0), rev1410 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev4469 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), rev4470 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), rev4472 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev4474 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0), rev4476 = c(0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0), rev4479 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L), rev4480 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev1789 = c(0, 
    0, 0, 963.8520574, 0, 0, 0, 0, 0, 95.34540063), rev1812 = c(0, 
    0, 0, 963.8520574, 0, 0, 0, 0, 0, 30.02711217), rev1815 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 65.31828847), rev1799 = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), rev4559 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 12.94965112), rev4560 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 12.94965112
    ), rev3810 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev1453 = c(0, 
    0, 3.505617978, 0, 0, 0, 13.9460241, 0, 10.93726937, 225.778089
    ), rev1440 = c(0, 0, 3.505617978, 0, 0, 0, 13.9460241, 0, 
    10.93726937, 0), rev1441 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ), rev3560 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev3302 = c(0, 
    0, 0, 0, 0, 0, 0, 0, 0, 225.778089), rev3295 = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), rev0008 = c(0, 180.3441341, 169.7750491, 
    0, 0, 0, 0, 0, 0, 0), rev1940 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0), rev3840 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev1050 = c(0, 
    0, 48.71428571, 0, 0, 0, 0, 0, 0, 0), rev4710 = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), rev570 = c(0, 180.3441341, 121.0607634, 
    0, 0, 0, 0, 0, 0, 0), rev230 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0), rev3360 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), rev3508 = c(0, 
    0, 2620.957866, 0, 0, 0, 0, 0, 0, 0), rev4656 = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0), rev9999 = c(0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0)), .Names = c("lbs3781", "lbs3764", "lbs3765", "lbs3758", 
"lbs3755", "lbs3782", "lbs3751", "lbs3761", "lbs3762", "lbs3763", 
"lbs3767", "lbs3768", "lbs3754", "lbs3771", "lbs3772", "lbs1790", 
"lbs1409", "lbs1411", "lbs1414", "lbs1415", "lbs4740", "lbs1420", 
"lbs3770", "lbs1408", "lbs1412", "lbs1413", "lbs1416", "lbs1422", 
"lbs1423", "lbs1424", "lbs1425", "lbs1426", "lbs1410", "lbs4469", 
"lbs4470", "lbs4472", "lbs4474", "lbs4476", "lbs4479", "lbs4480", 
"lbs1789", "lbs1812", "lbs1815", "lbs1799", "lbs4559", "lbs4560", 
"lbs3810", "lbs1453", "lbs1440", "lbs1441", "lbs3560", "lbs3302", 
"lbs3295", "lbs0008", "lbs1940", "lbs3840", "lbs1050", "lbs4710", 
"lbs570", "lbs230", "lbs3360", "lbs3508", "lbs4656", "lbs9999", 
"rev3781", "rev3764", "rev3765", "rev3758", "rev3755", "rev3782", 
"rev3751", "rev3761", "rev3762", "rev3763", "rev3767", "rev3768", 
"rev3754", "rev3771", "rev3772", "rev1790", "rev1409", "rev1411", 
"rev1414", "rev1415", "rev4740", "rev1420", "rev3770", "rev1408", 
"rev1412", "rev1413", "rev1416", "rev1422", "rev1423", "rev1424", 
"rev1425", "rev1426", "rev1410", "rev4469", "rev4470", "rev4472", 
"rev4474", "rev4476", "rev4479", "rev4480", "rev1789", "rev1812", 
"rev1815", "rev1799", "rev4559", "rev4560", "rev3810", "rev1453", 
"rev1440", "rev1441", "rev3560", "rev3302", "rev3295", "rev0008", 
"rev1940", "rev3840", "rev1050", "rev4710", "rev570", "rev230", 
"rev3360", "rev3508", "rev4656", "rev9999"), row.names = c(34367L, 
48646L, 48715L, 48717L, 48722L, 48724L, 48743L, 48744L, 48781L, 
48783L), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

一个选项是select_if greplgrepl返回一个逻辑向量,您可以通过|包含您想要匹配的多个模式。

您想删除两个物种的简单情况如下:

select_if(df, !grepl("3781|3751", names(df)) )

使用grepl通过paste将所有物种聚合成collapse的正确形式,然后在grepl内使用。

select_if(df, !grepl(paste(species, collapse = "|"), names(df)) )