使用R删除特定变量中不包含。(点)的行

时间:2018-05-07 14:25:42

标签: r

我有一个防火墙日志文件,其中包括日期,小时,src_address,dest_address和all_attemps,max_byte,avg_byte和活动率。我想删除不包含的行。(点)在src_address columnin r这是我的文件;

    src_address  dest_address all_attemps max_byte avg_byte activity_rate
    2       1.11.201.19 172.16.16.100           1       60       60   0.005434783
    3       1.119.43.90 172.16.16.100           1       60       60   0.005434783
    4       1.119.43.90 172.16.16.153           1       60       60   0.005434783
    5       1.119.43.90 192.168.1.112           1       60       60   0.005434783
    6      1.171.43.133   172.16.16.5           2       52       48   0.010869565
    7      1.179.191.82   172.16.16.5           1       60       60   0.005434783
    8      1.179.191.82 192.168.1.111           2       60       60   0.010869565
    9      1.179.191.82 192.168.1.112           2       60       60   0.010869565
    10     1.180.72.186 172.16.16.153           2       60       60   0.010869565
    11     1.202.165.40 172.16.16.153           1       60       60   0.005434783
    12      1.203.84.52   172.16.16.5           1       60       60   0.005434783
    13      1.203.84.52 192.168.1.112           1       60       60   0.005434783
    14      1.209.171.4 192.168.1.111           1       60       60   0.005434783
    15     1.214.34.114 172.16.16.100           2       60       60   0.010869565
    16     1.214.34.114 172.16.16.153           2       60       60   0.010869565
    37   101.53.137.245   172.16.16.5           2       60       60   0.010869565
    47     101100148143 192.168.1.111           2       60       60   0.010869565
    48     101100148143 192.168.1.112           2       60       60   0.010869565
    49     101231185170 172.16.16.100           1       60       60   0.005434783
    50     101231185170 172.16.16.153           1       60       60   0.005434783
    51     101231185170 192.168.1.112           1       60       60   0.005434783
    52     103.1.94.102 192.168.1.112           1       52       52   0.005434783
    53   103.12.195.158   172.16.16.5           1       52       52   0.005434783

这是我的期望;

         src_address  dest_address all_attemps max_byte avg_byte activity_rate
2       1.11.201.19 172.16.16.100           1       60       60   0.005434783
3       1.119.43.90 172.16.16.100           1       60       60   0.005434783
4       1.119.43.90 172.16.16.153           1       60       60   0.005434783
5       1.119.43.90 192.168.1.112           1       60       60   0.005434783
6      1.171.43.133   172.16.16.5           2       52       48   0.010869565
7      1.179.191.82   172.16.16.5           1       60       60   0.005434783
8      1.179.191.82 192.168.1.111           2       60       60   0.010869565
9      1.179.191.82 192.168.1.112           2       60       60   0.010869565
10     1.180.72.186 172.16.16.153           2       60       60   0.010869565
11     1.202.165.40 172.16.16.153           1       60       60   0.005434783
12      1.203.84.52   172.16.16.5           1       60       60   0.005434783
13      1.203.84.52 192.168.1.112           1       60       60   0.005434783
14      1.209.171.4 192.168.1.111           1       60       60   0.005434783
15     1.214.34.114 172.16.16.100           2       60       60   0.010869565
16     1.214.34.114 172.16.16.153           2       60       60   0.010869565
37   101.53.137.245   172.16.16.5           2       60       60   0.010869565
52     103.1.94.102 192.168.1.112           1       52       52   0.005434783
53   103.12.195.158   172.16.16.5           1       52       52   0.005434783

3 个答案:

答案 0 :(得分:8)

我们可以在这里使用grepl作为基本R选项:

df <- df[grepl("\\.", df$src_address), ]

答案 1 :(得分:2)

您可以使用stringi执行类似的操作:

library(stringi)
df[stri_detect_fixed(df$src_address, "."),]

或者如果你想使用grepl,你可以使用fixed = TRUE,这样你就不需要逃避&#34;。&#34;:

df[grepl(".", df$src_address, fixed = TRUE), ]

数据:

 df <-   structure(list(src_address = c("1.11.201.19", "1.119.43.90", 
"1.119.43.90", "1.119.43.90", "1.171.43.133", "1.179.191.82", 
"1.179.191.82", "1.179.191.82", "1.180.72.186", "1.202.165.40", 
"1.203.84.52", "1.203.84.52", "1.209.171.4", "1.214.34.114", 
"1.214.34.114", "101.53.137.245", "101100148143", "101100148143", 
"101231185170", "101231185170", "101231185170", "103.1.94.102", 
"103.12.195.158"), dest_address = c("172.16.16.100", "172.16.16.100", 
"172.16.16.153", "192.168.1.112", "172.16.16.5", "172.16.16.5", 
"192.168.1.111", "192.168.1.112", "172.16.16.153", "172.16.16.153", 
"172.16.16.5", "192.168.1.112", "192.168.1.111", "172.16.16.100", 
"172.16.16.153", "172.16.16.5", "192.168.1.111", "192.168.1.112", 
"172.16.16.100", "172.16.16.153", "192.168.1.112", "192.168.1.112", 
"172.16.16.5"), all_attemps = c(1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), 
    max_byte = c(60L, 60L, 60L, 60L, 52L, 60L, 60L, 60L, 60L, 
    60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 
    52L, 52L), avg_byte = c(60L, 60L, 60L, 60L, 48L, 60L, 60L, 
    60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 
    60L, 60L, 52L, 52L), activity_rate = c(0.005434783, 0.005434783, 
    0.005434783, 0.005434783, 0.010869565, 0.005434783, 0.010869565, 
    0.010869565, 0.010869565, 0.005434783, 0.005434783, 0.005434783, 
    0.005434783, 0.010869565, 0.010869565, 0.010869565, 0.010869565, 
    0.010869565, 0.005434783, 0.005434783, 0.005434783, 0.005434783, 
    0.005434783)), .Names = c("src_address", "dest_address", 
"all_attemps", "max_byte", "avg_byte", "activity_rate"), row.names = c(NA, 
-23L), class = "data.frame")

答案 2 :(得分:1)

非常接近@Mike H.回答。

在stringr包中有一个很好的函数(str_detect)也可能有用:

library(stringr)
df[str_detect(df$src_address, "\\."), ]