到目前为止:
首先,我已经能够从正在从pcap文件读取的数据包或实时数据包(通过pcap库)读取数据包中提取大量数据,这些数据包传输到本地计算机或从本地计算机传输。请参阅我已经能够从数据包中提取的信息:部分,以查看我已经能够从每个数据包中提取的特定信息。
第二,我已经能够将提取的信息格式化并保存为arff文件格式,旨在与Weka结合使用,以通过对现有训练集(例如KDDTrain +)进行训练来对分类器进行聚类。 。请参阅根据上述所有数据包信息生成的arff文件示例部分,以查看诸如arff文件之类的示例。
第三,为克服 mis-matched标头问题,我制作了一种方法,可以自动生成用于培训(例如NSL-KDD)和测试集的替换arff文件(例如,通过数据包形成)仅包含在数据集之间共享的那些列名(以及与这些列有关的数据)。但是当我尝试运行此命令时,将返回此错误:
weka.core.UnsupportedAttributeTypeException: weka.classifiers.meta.FilteredClassifier: Cannot handle numeric class!
at weka.core.Capabilities.test(Capabilities.java:1067)
at weka.core.Capabilities.test(Capabilities.java:1256)
at weka.core.Capabilities.test(Capabilities.java:1138)
at weka.core.Capabilities.testWithFail(Capabilities.java:1468)
at weka.classifiers.meta.FilteredClassifier.buildClassifier(FilteredClassifier.java:756)
at weka.Junction_Tree.run(Junction_Tree.java:43)
at main.Usage.Weka(Usage.java:187)
at main.Usage.main(Usage.java:242)
问题:
我无法为以下任何Arff训练/测试集中包含的所有(所有)列提取数据:“ DARPA99Week1-1”,“ UNSW-NB15”,“ NIMS1”,“ MAWI” -1”,“ NSL-KDD”。
当我尝试测试生成的arff文件时,如果使用上述任何数据集的训练集,则标头将不匹配(不匹配的标头问题),并且如果我产生替换文件仅包含相互的标头(和相关数据),错误无法处理数字类!在执行时返回。因此,我无法在生成的任何arff文件中使用Weka分类器。
问题:
是否有一种方法可以对通过Java中的pcap(或jpcap)库提取的数据包数据使用weka分类器(例如下面的分类器),以便检测异常(例如任何提及的数据中包含的异常) -集)?如果不是,您能否提供一个示例,说明使用其他分类库来实现此目标?
代码:
Weka分类器
public class Junction_Tree {
String train_path = "KDDTrain+.arff";
String test_path = "KDDTest+.arff";
double accuracy;
double recall;
double precision;
int correctPredictions;
int incorrectPredictions;
int numAnomaliesDetected;
int numNetworkRecords;
public void run() {
try {
Instances train = DataSource.read(train_path);
Instances test = DataSource.read(test_path);
train.setClassIndex(train.numAttributes() - 1);
test.setClassIndex(test.numAttributes() - 1);
if (!train.equalHeaders(test))
throw new IllegalArgumentException("datasets are not compatible..");
Remove rm = new Remove();
rm.setAttributeIndices("1");
J48 j48 = new J48();
j48.setUnpruned(true);
FilteredClassifier fc = new FilteredClassifier();
fc.setFilter(rm);
fc.setClassifier(j48);
fc.buildClassifier(train);
numAnomaliesDetected = 0;
numNetworkRecords = 0;
int n_ana_p = 0;
int ana_p = 0;
correctPredictions = 0;
incorrectPredictions = 0;
for (int i = 0; i < test.numInstances(); i++) {
double pred = fc.classifyInstance(test.instance(i));
String a = "anomaly";
String actual;
String predicted;
actual = test.classAttribute().value((int) test.instance(i).classValue());
predicted = test.classAttribute().value((int) pred);
if (actual.equalsIgnoreCase(a))
numAnomaliesDetected++;
if (actual.equalsIgnoreCase(predicted))
correctPredictions++;
if (!actual.equalsIgnoreCase(predicted))
incorrectPredictions++;
if (actual.equalsIgnoreCase(a) && predicted.equalsIgnoreCase(a))
ana_p++;
if ((!actual.equalsIgnoreCase(a)) && predicted.equalsIgnoreCase(a))
n_ana_p++;
numNetworkRecords++;
}
accuracy = (correctPredictions * 100) / (correctPredictions + incorrectPredictions);
recall = ana_p * 100 / (numAnomaliesDetected);
precision = ana_p * 100 / (ana_p + n_ana_p);
System.out.println("\n\naccuracy: " + accuracy + ", Correct Predictions: " + correctPredictions
+ ", Incorrect Predictions: " + incorrectPredictions);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
Junction_Tree JT1 = new Junction_Tree();
JT1.run();
}
}
我已经能够从数据包中提取信息:
packetID,
sourcePort,
destinationPort,
HopCount,
sourceMAC,
DestinationMAC,
Protocol,
sourceIP,
destinationIP,
frameType,
IDFlag,
Payload,
TTL,
ICMP_Type,
Code,
ICMP_Sequence,
RedirectAddress,
AddressMask,
OriginalTimestamp,
Date ReceivedTimestamp,
TransmissionTimestamp,
SequenceNumber,
AckNumber,
URGFlag,
ACKFlag,
PSHFlag,
RSTFlag,
SYNFlag,
FINFlag,
WindowSize,
ARP_HardwareType,
ARP_ProtocolType,
HardwareAddressLength,
ProtocolAddressLength,
Operation,
SenderHardwareAddress,
SenderProtocolAddress,
TargetHardwareAddress,
TargetProtocolAddress,
Version,
FlowLabel,
Length,
SourceHostName,
DestinationHostName,
TOS_Priority,
TOS_Throughput,
TOS_Reliability,
Identification,
Fragment_Don_tFragment,
Fragment_MoreFragment,
FragmentOffset,
caplen,
headerLength,
payloadLength,
HttpMethod,
numMatchingPackets,
isSourceIPLocal,
isDestinationIPLocal,
is_sm_ips_ports,
labeled_As_Attack,
srv_count,
packetLength,
isHTTP,
isTCP,
isUDP
根据上述所有数据包信息生成的arff文件示例
@relation 'feature_table'
@attribute 'ackdat' real
@attribute 'synack' real
@attribute 'ack' real
@attribute 'psh' real
@attribute 'rst' real
@attribute 'urg' real
@attribute 'syn' real
@attribute 'caplen' real
@attribute 'count' real
@attribute 'ct_dst_ltm' real
@attribute 'ct_dst_sport_ltm' real
@attribute 'ct_dst_src_ltm' real
@attribute 'ct_flw_http_mthd' real
@attribute 'ct_ftp_cmd' real
@attribute 'ct_src_ dport_ltm' real
@attribute 'ct_src_ltm' real
@attribute 'ct_srv_dst' real
@attribute 'ct_srv_src' real
@attribute 'diff_srv_rate' real
@attribute 'dloss' real
@attribute 'dst_host_count' real
@attribute 'dst_host_diff_srv_rate' real
@attribute 'dst_host_same_srv_rate' real
@attribute 'dtcpb' real
@attribute 'duration' real
@attribute 'dwin' real
@attribute 'extSrc' real
@attribute 'extDst' real
@attribute 'headerLength' real
@attribute 'is_sm_ips_ports' {'0', '1'}
@attribute 'label' {1,0}
@attribute 'protocol_type' {'TCP','FTP', 'HTTP', 'POP3', 'SMTP', 'Telnet', 'SSH', 'UDP', 'DNS', 'ICMP', 'ARP', 'IPv6', 'IPv4', 'IP', 'Other'}
@attribute 'same_srv_rate' real
@attribute 'srv_count' real
@attribute 'srv_diff_host_rate' real
@attribute 'stcpb' real
@attribute 'sttl' real
@attribute 'tcprtt' real
@attribute 'TotalSize' real
@attribute 'PayloadLength' real
@data
0,0,0,0,1,1,1,84,0,22,2,2,0,0,1,2,22,2,0.0,0,6,0.0,0.0,3088870319,536676046,1050,1,0,54,0,1,TCP,0.0,0,0.0,3088870319,61,0,84,0
0,0,0,0,1,1,1,84,0,22,2,2,0,0,1,2,22,2,0.0,0,6,0.0,0.0,3088870319,536676046,1050,1,0,54,0,1,TCP,0.0,0,0.0,3088870319,61,0,84,0
0,0,0,0,1,1,1,84,0,22,2,2,0,0,1,2,22,2,0.0,0,6,0.0,0.0,3088870319,536676046,1050,1,0,54,0,1,TCP,0.0,0,0.0,3088870319,61,0,84,0
0,0,0,0,1,1,1,84,0,22,2,2,0,0,1,2,22,2,0.0,0,6,0.0,0.0,3088870319,536676046,1050,1,0,54,0,1,TCP,0.0,0,0.0,3088870319,61,0,84,0
0,0,0,0,1,1,1,282,0,3,3,2,0,0,3,23,3,22,0.0,0,5,0.0,0.0,2802183036,877357746,2636,0,1,54,0,0,TCP,0.0,0,0.0,2802183036,64,0,282,0
0,0,0,0,1,1,1,282,0,3,3,2,0,0,3,23,3,22,0.0,0,5,0.0,0.0,2802183036,877357746,2636,0,1,54,0,0,TCP,0.0,0,0.0,2802183036,64,0,282,0
0,0,0,0,1,1,1,282,0,3,3,2,0,0,3,23,3,22,0.0,0,5,0.0,0.0,2802183036,877357746,2636,0,1,54,0,0,TCP,0.0,0,0.0,2802183036,64,0,282,0
对于上下文,这是NSL-KDD数据集中的几行:
@relation 'KDDTrain'
@attribute 'duration' real
@attribute 'protocol_type' {'tcp','udp', 'icmp'}
@attribute 'service' {'aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard', 'domain', 'domain_u', 'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp', 'ftp_data', 'gopher', 'harvest', 'hostnames', 'http', 'http_2784', 'http_443', 'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link', 'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat', 'nnsp', 'nntp', 'ntp_u', 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer', 'private', 'red_i', 'remote_job', 'rje', 'shell', 'smtp', 'sql_net', 'ssh', 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i', 'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'}
@attribute 'flag' { 'OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH' }
@attribute 'src_bytes' real
@attribute 'dst_bytes' real
@attribute 'land' {'0', '1'}
@attribute 'wrong_fragment' real
@attribute 'urgent' real
@attribute 'hot' real
@attribute 'num_failed_logins' real
@attribute 'logged_in' {'0', '1'}
@attribute 'num_compromised' real
@attribute 'root_shell' real
@attribute 'su_attempted' real
@attribute 'num_root' real
@attribute 'num_file_creations' real
@attribute 'num_shells' real
@attribute 'num_access_files' real
@attribute 'num_outbound_cmds' real
@attribute 'is_host_login' {'0', '1'}
@attribute 'is_guest_login' {'0', '1'}
@attribute 'count' real
@attribute 'srv_count' real
@attribute 'serror_rate' real
@attribute 'srv_serror_rate' real
@attribute 'rerror_rate' real
@attribute 'srv_rerror_rate' real
@attribute 'same_srv_rate' real
@attribute 'diff_srv_rate' real
@attribute 'srv_diff_host_rate' real
@attribute 'dst_host_count' real
@attribute 'dst_host_srv_count' real
@attribute 'dst_host_same_srv_rate' real
@attribute 'dst_host_diff_srv_rate' real
@attribute 'dst_host_same_src_port_rate' real
@attribute 'dst_host_srv_diff_host_rate' real
@attribute 'dst_host_serror_rate' real
@attribute 'dst_host_srv_serror_rate' real
@attribute 'dst_host_rerror_rate' real
@attribute 'dst_host_srv_rerror_rate' real
@attribute 'class' {'normal', 'anomaly'}
@data
0,tcp,ftp_data,SF,491,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0.00,0.00,0.00,0.00,1.00,0.00,0.00,150,25,0.17,0.03,0.17,0.00,0.00,0.00,0.05,0.00,normal
0,udp,other,SF,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,1,0.00,0.00,0.00,0.00,0.08,0.15,0.00,255,1,0.00,0.60,0.88,0.00,0.00,0.00,0.00,0.00,normal
0,tcp,private,S0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,123,6,1.00,1.00,0.00,0.00,0.05,0.07,0.00,255,26,0.10,0.05,0.00,0.00,1.00,1.00,0.00,0.00,anomaly
KDDTrain + 的相互版本的示例:
@relation 'KDDTrain'
@attribute 'duration' real
@attribute 'count' real
@attribute 'srv_count' real
@attribute 'same_srv_rate' real
@attribute 'diff_srv_rate' real
@attribute 'srv_diff_host_rate' real
@attribute 'dst_host_count' real
@attribute 'dst_host_same_srv_rate' real
@attribute 'dst_host_diff_srv_rate' real
@data
0,2,2,1.00,0.00,0.00,150,0.17,0.03
0,13,1,0.08,0.15,0.00,255,0.00,0.60
0,123,6,0.05,0.07,0.00,255,0.10,0.05
数据包数据Arff的相互版本的示例:
@relation 'feature_table'
@attribute 'duration' real
@attribute 'count' real
@attribute 'srv_count' real
@attribute 'same_srv_rate' real
@attribute 'diff_srv_rate' real
@attribute 'srv_diff_host_rate' real
@attribute 'dst_host_count' real
@attribute 'dst_host_same_srv_rate' real
@attribute 'dst_host_diff_srv_rate' real
@data
1362493400,0,0,0.0,0.0,0.0,4,0.0,0.0
1362493400,0,0,0.0,0.0,0.0,4,0.0,0.0
1362493400,0,0,0.0,0.0,0.0,4,0.0,0.0