打印与级别关联的值

时间:2016-01-13 06:23:30

标签: r

我有这段代码:

data <- read.csv("small.csv",header=TRUE, sep=",")
data$normal = as.character(data$normal)
data$normal[data$normal == "ipsweep"] = "3"
data$normal[data$normal == "portsweep"] = "3"
data$normal[data$normal == "nmap"] = "3"
data$normal[data$normal == "satan"] = "3"
data$normal[data$normal == "buffer_overflow"] = "5"
data$normal[data$normal == "loadmodule"] = "5"
data$normal[data$normal == "perl"] = "5"
data$normal[data$normal == "rootkit"] = "5"
data$normal[data$normal == "back"] = "1"
data$normal[data$normal == "land"] = "1"
data$normal[data$normal == "neptune"] = "1"
data$normal[data$normal == "pod"] = "1"
data$normal[data$normal == "smurf"] = "1"
data$normal[data$normal == "teardrop"] = "1"
data$normal[data$normal == "ftp_write"] = "4"
data$normal[data$normal == "guess_passwd"] = "4"
data$normal[data$normal == "imap"] = "4"
data$normal[data$normal == "multihop"] = "4"
data$normal[data$normal == "phf"] = "4"
data$normal[data$normal == "spy"] = "4"
data$normal[data$normal == "warezclient"] = "4"
data$normal[data$normal == "warezmaster"] = "4"
data$normal[data$normal == "normal"] = "2"
data = data[!(data$normal=="0.00"),]
data = data[!(data$normal==""),]
data$normal = factor(data$normal)

When I run: `data$normal`, it gives the following output

   [1] 2 1 2 2 1 1 1 1 1 1 1 2 4 1 1 2 3 2 2 1 1 2 2 1 2 1 2 2 2 3 1 2 3 2 2 2 1 2 1 1 1 2 2 2 1 1 2 4 1 2 1 1 2 2 2 3 1 1

现在,我想打印与每个列中的关卡相关联的值,但它会返回所有FALSE0。例如:

x<-data[normal=="neptune",]

如何打印行值为neptune但不是1的每列?

2 个答案:

答案 0 :(得分:2)

如果你更多地浏览一下GitHub存储库,你会发现一些其他有用的csv文件。 The first列出了攻击类型,这些类型与小型训练数据集中的名称相匹配。 The second列出了字段名称,这些名称在主csv文件中无处可见。

攻击类型csv可能是为了你正在尝试做的而设计的。它是一个查找表,用于关联攻击&#34;类型&#34;每次攻击攻击&#34;名称&#34;。您可以将其与主数据集合并,如下所示。

# Base url
u <- 'https://raw.githubusercontent.com/defcom17/NSL_KDD/master'

# Read in the field names table
nm <- read.csv(file.path(u, 'Field%20Names.csv'), 
               header=FALSE, stringsAsFactors=FALSE)

# Read in the attack types table
attacks <- read.csv(file.path(u, 'Attack%20Types.csv'), 
                    header=FALSE, stringsAsFactors=FALSE, 
                    col.names=c('name', 'type'))

# Read in the main dataset
d <- read.csv(file.path(u, 'Small%20Training%20Set.csv'), 
              header=FALSE, stringsAsFactors=FALSE)

# Assign the field names to the main dataset. This is really optional.
names(d) <- nm[, 1]

# There were a couple of names missing. 
# One is obvious, while the other seems irrelevant for now.
names(d)[42:43] <- c('name', 'code')

# By default, merge() merges on fields that the datasets have in common (i.e., "name").
d2 <- merge(d, attacks)

head(d2)
##     name duration protocol_type  service flag src_bytes dst_bytes land
## 1 normal        0           tcp ftp_data   SF       491         0    0
## 2 normal        0           udp    other   SF       146         0    0
## 3 normal      240           tcp     http   SF       328       275    0
## 4 normal        0           tcp     http   SF       232      8153    0
## 5 normal        0           tcp     http   SF       199       420    0
## 6 normal        0           udp domain_u   SF        46        46    0
##   wrong_fragment urgent hot num_failed_logins logged_in num_compromised
## 1              0      0   0                 0         0               0
## 2              0      0   0                 0         0               0
## 3              0      0   0                 0         1               0
## 4              0      0   0                 0         1               0
## 5              0      0   0                 0         1               0
## 6              0      0   0                 0         0               0
##   root_shell su_attempted num_root num_file_creations num_shells num_access_files
## 1          0            0        0                  0          0                0
## 2          0            0        0                  0          0                0
## 3          0            0        0                  0          0                0
## 4          0            0        0                  0          0                0
## 5          0            0        0                  0          0                0
## 6          0            0        0                  0          0                0
##   num_outbound_cmds is_host_login is_guest_login count srv_count serror_rate
## 1                 0             0              0     2         2         0.0
## 2                 0             0              0    13         1         0.0
## 3                 0             0              0     9        10         0.0
## 4                 0             0              0     5         5         0.2
## 5                 0             0              0    30        32         0.0
## 6                 0             0              0    96       178         0.0
##   srv_serror_rate rerror_rate srv_rerror_rate same_srv_rate diff_srv_rate
## 1             0.0           0             0.0          1.00          0.00
## 2             0.0           0             0.0          0.08          0.15
## 3             0.0           0             0.1          1.00          0.00
## 4             0.2           0             0.0          1.00          0.00
## 5             0.0           0             0.0          1.00          0.00
## 6             0.0           0             0.0          1.00          0.00
##   srv_diff_host_rate dst_host_count dst_host_srv_count dst_host_same_srv_rate
## 1               0.00            150                 25                   0.17
## 2               0.00            255                  1                   0.00
## 3               0.20            255                250                   0.98
## 4               0.00             30                255                   1.00
## 5               0.09            255                255                   1.00
## 6               0.02            255                254                   1.00
##   dst_host_diff_srv_rate dst_host_same_src_port_rate dst_host_srv_diff_host_rate
## 1                   0.03                        0.17                        0.00
## 2                   0.60                        0.88                        0.00
## 3                   0.01                        0.00                        0.00
## 4                   0.00                        0.03                        0.04
## 5                   0.00                        0.00                        0.00
## 6                   0.01                        0.01                        0.00
##   dst_host_serror_rate dst_host_srv_serror_rate dst_host_rerror_rate
## 1                 0.00                     0.00                 0.05
## 2                 0.00                     0.00                 0.00
## 3                 0.00                     0.00                 0.00
## 4                 0.03                     0.01                 0.00
## 5                 0.00                     0.00                 0.00
## 6                 0.00                     0.00                 0.00
##   dst_host_srv_rerror_rate code   type
## 1                     0.00   20 normal
## 2                     0.00   15 normal
## 3                     0.00   21 normal
## 4                     0.01   21 normal
## 5                     0.00   21 normal
## 6                     0.00   18 normal

请注意,数据现已按攻击名称排序。

要成为neptune攻击的子集,您可以使用subset

subset(d2, name=='neptune')

答案 1 :(得分:1)

我不完全确定你想要做什么,但如果你想稍后使用字符数据,你不能用(字符串)数字覆盖它。这似乎是一个更好的解决方案:

keys_values <- c("ipsweep" = 3, "portsweep" = 3, "nmap" = 3, "satan" = 3, "buffer_overflow" = 5, 
                 "loadmodule" = 5, "perl" = 5, "rootkit" = 5, "back" = 1, "land" = 1, "neptune" = 1, 
                 "pod" = 1, "smurf" = 1, "teardrop" = 1, "ftp_write" = 4, "guess_passwd" = 4, "imap" = 4, 
                 "multihop" = 4, "phf" = 4, "spy" = 4, "warezclient" = 4, "warezmaster" = 4, "normal" = 2)

data$normal <- as.character(data$normal)
data$normal_value <- keys_values[data$normal]
data <- data[data$normal != "0.00" & data$normal != "", ]
data$normal <- factor(data$normal)

x <- data[data$normal == "neptune", ]