我有这段代码:
data <- read.csv("small.csv",header=TRUE, sep=",")
data$normal = as.character(data$normal)
data$normal[data$normal == "ipsweep"] = "3"
data$normal[data$normal == "portsweep"] = "3"
data$normal[data$normal == "nmap"] = "3"
data$normal[data$normal == "satan"] = "3"
data$normal[data$normal == "buffer_overflow"] = "5"
data$normal[data$normal == "loadmodule"] = "5"
data$normal[data$normal == "perl"] = "5"
data$normal[data$normal == "rootkit"] = "5"
data$normal[data$normal == "back"] = "1"
data$normal[data$normal == "land"] = "1"
data$normal[data$normal == "neptune"] = "1"
data$normal[data$normal == "pod"] = "1"
data$normal[data$normal == "smurf"] = "1"
data$normal[data$normal == "teardrop"] = "1"
data$normal[data$normal == "ftp_write"] = "4"
data$normal[data$normal == "guess_passwd"] = "4"
data$normal[data$normal == "imap"] = "4"
data$normal[data$normal == "multihop"] = "4"
data$normal[data$normal == "phf"] = "4"
data$normal[data$normal == "spy"] = "4"
data$normal[data$normal == "warezclient"] = "4"
data$normal[data$normal == "warezmaster"] = "4"
data$normal[data$normal == "normal"] = "2"
data = data[!(data$normal=="0.00"),]
data = data[!(data$normal==""),]
data$normal = factor(data$normal)
When I run: `data$normal`, it gives the following output
[1] 2 1 2 2 1 1 1 1 1 1 1 2 4 1 1 2 3 2 2 1 1 2 2 1 2 1 2 2 2 3 1 2 3 2 2 2 1 2 1 1 1 2 2 2 1 1 2 4 1 2 1 1 2 2 2 3 1 1
现在,我想打印与每个列中的关卡相关联的值,但它会返回所有FALSE
或0
。例如:
x<-data[normal=="neptune",]
如何打印行值为neptune
但不是1
的每列?
答案 0 :(得分:2)
如果你更多地浏览一下GitHub存储库,你会发现一些其他有用的csv文件。 The first列出了攻击类型,这些类型与小型训练数据集中的名称相匹配。 The second列出了字段名称,这些名称在主csv文件中无处可见。
攻击类型csv可能是为了你正在尝试做的而设计的。它是一个查找表,用于关联攻击&#34;类型&#34;每次攻击攻击&#34;名称&#34;。您可以将其与主数据集合并,如下所示。
# Base url
u <- 'https://raw.githubusercontent.com/defcom17/NSL_KDD/master'
# Read in the field names table
nm <- read.csv(file.path(u, 'Field%20Names.csv'),
header=FALSE, stringsAsFactors=FALSE)
# Read in the attack types table
attacks <- read.csv(file.path(u, 'Attack%20Types.csv'),
header=FALSE, stringsAsFactors=FALSE,
col.names=c('name', 'type'))
# Read in the main dataset
d <- read.csv(file.path(u, 'Small%20Training%20Set.csv'),
header=FALSE, stringsAsFactors=FALSE)
# Assign the field names to the main dataset. This is really optional.
names(d) <- nm[, 1]
# There were a couple of names missing.
# One is obvious, while the other seems irrelevant for now.
names(d)[42:43] <- c('name', 'code')
# By default, merge() merges on fields that the datasets have in common (i.e., "name").
d2 <- merge(d, attacks)
head(d2)
## name duration protocol_type service flag src_bytes dst_bytes land
## 1 normal 0 tcp ftp_data SF 491 0 0
## 2 normal 0 udp other SF 146 0 0
## 3 normal 240 tcp http SF 328 275 0
## 4 normal 0 tcp http SF 232 8153 0
## 5 normal 0 tcp http SF 199 420 0
## 6 normal 0 udp domain_u SF 46 46 0
## wrong_fragment urgent hot num_failed_logins logged_in num_compromised
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 1 0
## 4 0 0 0 0 1 0
## 5 0 0 0 0 1 0
## 6 0 0 0 0 0 0
## root_shell su_attempted num_root num_file_creations num_shells num_access_files
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## num_outbound_cmds is_host_login is_guest_login count srv_count serror_rate
## 1 0 0 0 2 2 0.0
## 2 0 0 0 13 1 0.0
## 3 0 0 0 9 10 0.0
## 4 0 0 0 5 5 0.2
## 5 0 0 0 30 32 0.0
## 6 0 0 0 96 178 0.0
## srv_serror_rate rerror_rate srv_rerror_rate same_srv_rate diff_srv_rate
## 1 0.0 0 0.0 1.00 0.00
## 2 0.0 0 0.0 0.08 0.15
## 3 0.0 0 0.1 1.00 0.00
## 4 0.2 0 0.0 1.00 0.00
## 5 0.0 0 0.0 1.00 0.00
## 6 0.0 0 0.0 1.00 0.00
## srv_diff_host_rate dst_host_count dst_host_srv_count dst_host_same_srv_rate
## 1 0.00 150 25 0.17
## 2 0.00 255 1 0.00
## 3 0.20 255 250 0.98
## 4 0.00 30 255 1.00
## 5 0.09 255 255 1.00
## 6 0.02 255 254 1.00
## dst_host_diff_srv_rate dst_host_same_src_port_rate dst_host_srv_diff_host_rate
## 1 0.03 0.17 0.00
## 2 0.60 0.88 0.00
## 3 0.01 0.00 0.00
## 4 0.00 0.03 0.04
## 5 0.00 0.00 0.00
## 6 0.01 0.01 0.00
## dst_host_serror_rate dst_host_srv_serror_rate dst_host_rerror_rate
## 1 0.00 0.00 0.05
## 2 0.00 0.00 0.00
## 3 0.00 0.00 0.00
## 4 0.03 0.01 0.00
## 5 0.00 0.00 0.00
## 6 0.00 0.00 0.00
## dst_host_srv_rerror_rate code type
## 1 0.00 20 normal
## 2 0.00 15 normal
## 3 0.00 21 normal
## 4 0.01 21 normal
## 5 0.00 21 normal
## 6 0.00 18 normal
请注意,数据现已按攻击名称排序。
要成为neptune
攻击的子集,您可以使用subset
:
subset(d2, name=='neptune')
答案 1 :(得分:1)
我不完全确定你想要做什么,但如果你想稍后使用字符数据,你不能用(字符串)数字覆盖它。这似乎是一个更好的解决方案:
keys_values <- c("ipsweep" = 3, "portsweep" = 3, "nmap" = 3, "satan" = 3, "buffer_overflow" = 5,
"loadmodule" = 5, "perl" = 5, "rootkit" = 5, "back" = 1, "land" = 1, "neptune" = 1,
"pod" = 1, "smurf" = 1, "teardrop" = 1, "ftp_write" = 4, "guess_passwd" = 4, "imap" = 4,
"multihop" = 4, "phf" = 4, "spy" = 4, "warezclient" = 4, "warezmaster" = 4, "normal" = 2)
data$normal <- as.character(data$normal)
data$normal_value <- keys_values[data$normal]
data <- data[data$normal != "0.00" & data$normal != "", ]
data$normal <- factor(data$normal)
x <- data[data$normal == "neptune", ]