我有一个巨大的csv文件,其中包含
highest_layer,transport_layer,src_ip,dst_ip,src_port,dst_port,ip_flag,packet_length,transport_flag,time,timestamp,geo_country,data
LAN_DISCOVERY,UDP,192.168.1.6,224.0.0.251,5353,5353,0,82,-1,2020-06-10 19:38:08.479232,1591832288479,Unknown,
LAN_DISCOVERY,UDP,fe80::868:621b:c2ff:cee2,ff02::fb,5353,5353,-1,102,-1,2020-06-10 19:38:08.479261,1591832288479,Unknown,
LAN_DISCOVERY,UDP,192.168.1.3,192.168.1.6,5353,5353,16384,409,-1,2020-06-10 19:38:08.506399,1591832288506,Unknown,
DNS,UDP,192.168.1.6,192.168.1.1,32631,53,0,89,-1,2020-06-10 19:38:08.863846,1591832288863,Unknown,
DNS,UDP,192.168.1.6,192.168.1.1,31708,53,0,79,-1,2020-06-10 19:38:08.864186,1591832288864,Unknown,
DNS,UDP,192.168.1.6,192.168.1.1,16807,53,0,79,-1,2020-06-10 19:38:08.866492,1591832288866,Unknown,
SSDP,UDP,192.168.1.6,239.255.255.250,58185,1900,0,216,-1,2020-06-10 19:38:08.887298,1591832288887,Unknown,
DNS,UDP,192.168.1.1,192.168.1.6,53,32631,16384,105,-1,2020-06-10 19:38:08.888232,1591832288888,Unknown,
TCP,TCP,192.168.1.6,172.217.12.131,53717,443,16384,78,2,2020-06-10 19:38:08.888553,1591832288888,Unknown,
DNS,UDP,192.168.1.1,192.168.1.6,53,31708,16384,95,-1,2020-06-10 19:38:08.895148,1591832288895,Unknown,
TCP,TCP,192.168.1.6,172.217.10.237,53718,443,16384,78,2,2020-06-10 19:38:08.895594,1591832288895,Unknown,
DNS,UDP,192.168.1.1,192.168.1.6,53,16807,16384,119,-1,2020-06-10 19:38:08.896202,1591832288896,Unknown,
TCP,TCP,192.168.1.6,172.217.11.14,53719,443,16384,78,2,2020-06-10 19:38:08.896540,1591832288896,Unknown,
DNS,UDP,192.168.1.6,192.168.1.1,20557,53,0,75,-1,2020-06-10 19:38:08.911968,1591832288911,Unknown,
DATA,UDP,192.168.1.3,192.168.1.6,51216,58185,16384,558,-1,2020-06-10 19:38:08.913276,1591832288913,Unknown,
TCP,TCP,172.217.12.131,192.168.1.6,443,53717,0,74,18,2020-06-10 19:38:08.916735,1591832288916,Unknown,
TCP,TCP,192.168.1.6,172.217.12.131,53717,443,16384,66,16,2020-06-10 19:38:08.916860,1591832288916,Unknown,
TLS,TCP,192.168.1.6,172.217.12.131,53717,443,16384,583,24,2020-06-10 19:38:08.917442,1591832288917,Unknown,
TCP,TCP,172.217.10.237,192.168.1.6,443,53718,0,74,18,2020-06-10 19:38:08.919293,1591832288919,Unknown,
TCP,TCP,192.168.1.6,172.217.10.237,53718,443,16384,66,16,2020-06-10 19:38:08.919423,1591832288919,Unknown,
TLS,TCP,192.168.1.6,172.217.10.237,53718,443,16384,583,24,2020-06-10 19:38:08.919593,1591832288919,Unknown,
TCP,TCP,172.217.11.14,192.168.1.6,443,53719,0,74,18,2020-06-10 19:38:08.928819,1591832288928,Unknown,
TCP,TCP,192.168.1.6,172.217.11.14,53719,443,16384,66,16,2020-06-10 19:38:08.928922,1591832288928,Unknown,
TLS,TCP,192.168.1.6,172.217.11.14,53719,443,16384,583,24,2020-06-10 19:38:08.929100,1591832288929,Unknown,
DNS,UDP,192.168.1.1,192.168.1.6,53,20557,16384,317,-1,2020-06-10 19:38:08.932758,1591832288932,Unknown,
TCP,TCP,192.168.1.6,172.217.12.174,53720,443,16384,78,2,2020-06-10 19:38:08.933034,1591832288933,Unknown,
TCP,TCP,172.217.12.131,192.168.1.6,443,53717,0,66,16,2020-06-10 19:38:08.947137,1591832288947,Unknown,
TCP,TCP,172.217.10.237,192.168.1.6,443,53718,0,66,16,2020-06-10 19:38:08.952060,1591832288952,Unknown,
TLS,TCP,172.217.12.131,192.168.1.6,443,53717,0,1484,16,2020-06-10 19:38:08.954404,1591832288954,Unknown,
我想找到时间戳的相对时间,并根据累积数据包长度对其进行绘图。这是我尝试的方法。
datadis = pd.read_csv('data.txt', sep=',')
dfd = (datadis[(datadis.src_port == 53725)])
dfd1 = dfd.drop(columns=['highest_layer', 'transport_layer','ip_flag', 'transport_flag','geo_country','data'])
dfd1.to_csv('disabletry.txt',index=False)
dfd1.index = dfd1['timestamp'] - dfd1.loc[0,'timestamp']
dfd2 = dfd1.groupby(['src_ip'])['packet_length'].cumsum()
#dfd2.to_csv('disableplot.txt',index=False)
dfd2.plot(x='time',y=['packet_length'])
我收到以下错误消息。
---> 14 dfd1.index = dfd1['timestamp'] - dfd1.loc[0,'timestamp']
# we by definition only have the 0th axis
# we have yielded a scalar ?
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 0
答案 0 :(得分:2)
dfd = (datadis[(datadis.src_port == 53725)])
导致数据帧为空,则会发生错误。dfd.empty
,如果True
跳过其余部分。.reset_index()
时必须dfd1
,否则可能没有.loc[0,'timestamp']
datadis = pd.read_csv('data.txt', sep=',')
dfd = (datadis[(datadis.src_port == 53725)])
if not dfd.empty: # only proceed if the dataframe is not empty
dfd1 = dfd.drop(columns=['highest_layer', 'transport_layer','ip_flag', 'transport_flag','geo_country','data']).reset_index()
dfd1.to_csv('disabletry.txt',index=False)
dfd1.index = dfd1['timestamp'] - dfd1.loc[0,'timestamp']
dfd2 = dfd1.groupby(['src_ip'])['packet_length'].cumsum()
#dfd2.to_csv('disableplot.txt',index=False)
dfd2.plot(x='time',y=['packet_length'])
for i, g in datadis.groupby('src_port'):
g = pd.DataFrame(g)
if not g.empty:
dfd1 = g.drop(columns=['highest_layer', 'transport_layer','ip_flag', 'transport_flag','geo_country','data']).reset_index(drop=True)
new_index = dfd1['timestamp'] - dfd1.loc[0,'timestamp']
dfd1.index = new_index
dfd2 = dfd1.groupby(['src_ip'])['packet_length'].cumsum()
dfd2.plot(x='time', y=['packet_length'], label=i)
plt.legend(title='src_port')
cumsum
与timestamp
dfd1['timestamp'] - dfd1.loc[0,'timestamp']
的,而这只是将dfd1['timestamp']
作为索引。for i, g in datadis.groupby('src_port'):
g = pd.DataFrame(g)
if not g.empty:
dfd1 = g.drop(columns=['highest_layer', 'transport_layer','ip_flag', 'transport_flag','geo_country','data']).reset_index(drop=True)
dfd1.set_index('timestamp', inplace=True)
display(dfd1) # display is for jupyter notebook. Change to print if not in a notebook
dfd2 = dfd1.groupby(['src_ip'])['packet_length'].cumsum()
display(dfd2) # display is for jupyter notebook. Change to print if not in a notebook
print('\n')
if len(dfd2) < 2:
print(f'src_port: {i} only has 1 timestamp so nothing will be plotted.\n')
else:
plt.figure()
dfd2.plot(x='time', y=['packet_length'], label=i)
plt.legend(title='src_port')
plt.show()