如何根据句子中的一个字符串识别行

时间:2018-08-21 01:55:25

标签: r

我最近问了一个非常有用的问题,我尝试使用相同的方法来找到我的解决方案

df<- structure(list(How = structure(c(2L, 2L, 2L, 1L, 2L, 2L, 2L, 
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L), .Label = c("Ismainbody", 
"IsmainbodyCandidate"), class = "factor"), No = c(12L, 38L, 38L, 
3L, 49L, 38L, 85L, 4L, 38L, 57L, 38L, 5L, 6L, 10L, 4L, 12L, 38L, 
7L, 8L, 61L), Main = structure(c(6L, 13L, 9L, 15L, 20L, 12L, 
1L, 19L, 10L, 2L, 7L, 18L, 4L, 14L, 5L, 16L, 8L, 3L, 17L, 11L
), .Label = c("Daa_ASTRONOMY Iso 1B of Tn-1 TTT=ASTY ", "E7EUT5_ASTRONOMY gas TTT=ASTY BOO=3 ", 
"ECO", "ECO transferase E [TTT=ASTY]", "ECO_ASTRONOMY karim,  TTT=ASTY BOO=3", 
"FSSZ1_ASTRONOMY Karim, tyBOO II brothers 74 TTT=ASTY BOO=3 ", 
"H2A1A_ASTRONOMY  tyBOO 1-A TTT=ASTY BOO=1 ", "H2A2B_ASTRONOMY tyBOO 2-B TTT=ASTY BOO=1 ", 
"H2A3_ASTRONOMY Hammer H2A tyBOO 3 TTT=ASTY BOO=1 ", "H2AV_ASTRONOMY Iso 2 of  TTT=ASTY ", 
"H2E_ASTRONOMY ufidm TTT=ASTY ", "Hammer [TTT=ASTY]", "Hammer H2A tyBOO 2-C [TTT=ASTY]", 
"Iso 2 of Deleted in house [TTT=ASTY]", "Iso 2019 of denis [TTT=ASTY]", 
"K2C74_ASTRONOMY karim, tyBOO II  TTT=ASTY BOO=1", "KAR_ASTRONOMY karim, tyBOO TTT=ASTY BOO=1 BBS", 
"karim, tyBOO II  1b [TTT=ASTY]", "karim, tyBOO II 7 [TTT=ASTY]", 
"Putative heat 7 [TTT=ASTY]"), class = "factor")), class = "data.frame", row.names = c(NA, 
-20L))

这是我拥有的数据,我想删除其中包含以下字母的那些行:

karim

ECO

Daa

我确实喜欢

lookm <- c("karim", "ECO", "Daa") 
df2<- df[!df$Main %in% lookm, ]

但是什么也没发生。我该怎么办?

3 个答案:

答案 0 :(得分:3)

我们可以使用<script src="https://code.jquery.com/jquery-3.1.1.min.js"></script> <script src="https://code.highcharts.com/stock/highstock.js"></script> <script src="https://code.highcharts.com/stock/modules/exporting.js"></script> <script src="https://code.highcharts.com/stock/modules/export-data.js"></script> <div id="container" style="height: 400px; min-width: 310px"></div>

grep

答案 1 :(得分:2)

我们可以使用软件包 str_detect() 中的stringr功能:

library(stringr)
df[!str_detect(df$Main, "karim|ECO|Daa"),]

输出:

                   How No                                                        Main
1  IsmainbodyCandidate 12 FSSZ1_ASTRONOMY Karim, tyBOO II brothers 74 TTT=ASTY BOO=3 
2  IsmainbodyCandidate 38                             Hammer H2A tyBOO 2-C [TTT=ASTY]
3  IsmainbodyCandidate 38           H2A3_ASTRONOMY Hammer H2A tyBOO 3 TTT=ASTY BOO=1 
4           Ismainbody  3                                Iso 2019 of denis [TTT=ASTY]
5  IsmainbodyCandidate 49                                  Putative heat 7 [TTT=ASTY]
6  IsmainbodyCandidate 38                                           Hammer [TTT=ASTY]
9  IsmainbodyCandidate 38                          H2AV_ASTRONOMY Iso 2 of  TTT=ASTY 
10 IsmainbodyCandidate 57                        E7EUT5_ASTRONOMY gas TTT=ASTY BOO=3 
11 IsmainbodyCandidate 38                  H2A1A_ASTRONOMY  tyBOO 1-A TTT=ASTY BOO=1 
14 IsmainbodyCandidate 10                        Iso 2 of Deleted in house [TTT=ASTY]
17 IsmainbodyCandidate 38                   H2A2B_ASTRONOMY tyBOO 2-B TTT=ASTY BOO=1 
20 IsmainbodyCandidate 61                               H2E_ASTRONOMY ufidm TTT=ASTY

如果您还想用大写字母“ K”标识单词“ Karim”,则可以尝试:

df[!str_detect(df$Main, "(K|k)arim|ECO|Daa"),]

输出:

                   How No                                              Main
2  IsmainbodyCandidate 38                   Hammer H2A tyBOO 2-C [TTT=ASTY]
3  IsmainbodyCandidate 38 H2A3_ASTRONOMY Hammer H2A tyBOO 3 TTT=ASTY BOO=1 
4           Ismainbody  3                      Iso 2019 of denis [TTT=ASTY]
5  IsmainbodyCandidate 49                        Putative heat 7 [TTT=ASTY]
6  IsmainbodyCandidate 38                                 Hammer [TTT=ASTY]
9  IsmainbodyCandidate 38                H2AV_ASTRONOMY Iso 2 of  TTT=ASTY 
10 IsmainbodyCandidate 57              E7EUT5_ASTRONOMY gas TTT=ASTY BOO=3 
11 IsmainbodyCandidate 38        H2A1A_ASTRONOMY  tyBOO 1-A TTT=ASTY BOO=1 
14 IsmainbodyCandidate 10              Iso 2 of Deleted in house [TTT=ASTY]
17 IsmainbodyCandidate 38         H2A2B_ASTRONOMY tyBOO 2-B TTT=ASTY BOO=1 
20 IsmainbodyCandidate 61                     H2E_ASTRONOMY ufidm TTT=ASTY

答案 2 :(得分:1)

我们可以想到将 dplyr's slice() stringr's str_which()函数组合在一起的方法:

library(dplyr)
library(stringr)

df %>%
  slice(-str_which(df$Main, "karim|ECO|Daa"))

#OR
#df %>%
#  slice(-str_which(df$Main, "(K|k)karim|ECO|Daa"))

在下面的输出中,我们丢失了原始data.frame的前行索引:

                   How No                                                        Main
1  IsmainbodyCandidate 12 FSSZ1_ASTRONOMY Karim, tyBOO II brothers 74 TTT=ASTY BOO=3 
2  IsmainbodyCandidate 38                             Hammer H2A tyBOO 2-C [TTT=ASTY]
3  IsmainbodyCandidate 38           H2A3_ASTRONOMY Hammer H2A tyBOO 3 TTT=ASTY BOO=1 
4           Ismainbody  3                                Iso 2019 of denis [TTT=ASTY]
5  IsmainbodyCandidate 49                                  Putative heat 7 [TTT=ASTY]
6  IsmainbodyCandidate 38                                           Hammer [TTT=ASTY]
7  IsmainbodyCandidate 38                          H2AV_ASTRONOMY Iso 2 of  TTT=ASTY 
8  IsmainbodyCandidate 57                        E7EUT5_ASTRONOMY gas TTT=ASTY BOO=3 
9  IsmainbodyCandidate 38                  H2A1A_ASTRONOMY  tyBOO 1-A TTT=ASTY BOO=1 
10 IsmainbodyCandidate 10                        Iso 2 of Deleted in house [TTT=ASTY]
11 IsmainbodyCandidate 38                   H2A2B_ASTRONOMY tyBOO 2-B TTT=ASTY BOO=1 
12 IsmainbodyCandidate 61                               H2E_ASTRONOMY ufidm TTT=ASTY