I am parsing data into a dictionary and then saving it to file. After the file reaches a size, I start getting the error:
Traceback (most recent call last):
File "storepcap.py", line 123, in storeData
with open(dataFileName, 'a+') as dataFile:
IOError: [Errno 22] invalid mode ('a+') or filename: 'alldata.dat'
The file does grow in size as data is being appended so I know that opening it is very likely not the issue. It probably has something to do with me unpickling data, adding more data to the pickled object, and then resaving it. (I'd very much welcome more efficient solutions).
4,294,969,812 bytes
import dpkt
from socket import inet_ntoa
import traceback
import datetime
import multiprocessing
import sys
try:
import cPickle as pickle
except:
import pickle
myAddress = '10.0.0.3'
def inet_to_str(inet):
return socket.inet_ntop(socket.AF_INET, inet)
badFrames = 0
data = ""
maxProcesses = 100
pool = []
jobsProcessed = 0
def storeData(timestamp, buf):
global badFrames
dataFileName = 'alldata.dat'
with open(dataFileName, 'rb') as dataFile:
try:
packetData = pickle.load(dataFile)
except:
#print 'Doing data from scratch'
packetData = {}
try:
eth = dpkt.ethernet.Ethernet(buf)
if not isinstance(eth.data, dpkt.ip.IP):
return
ip = eth.data
info = {}
try:
info = {'source':inet_ntoa(ip.src),
'destination':inet_ntoa(ip.dst),
'len':ip.len,
'id':ip.id,
'off':ip.off,
'ttl':ip.ttl,
'p':ip.p,
'sum':ip.sum,
'payload':repr(ip.data),
'options':ip.opts,
'type':eth.type}
if isinstance(ip.data, dpkt.udp.UDP):
info['packet'] = 'UDP'
info['sport'] = ip.data.sport
info['dport'] = ip.data.dport
info['ulen'] = ip.data.ulen
elif isinstance(ip.data, dpkt.tcp.TCP):
info['packet'] = 'TCP'
info['sport'] = ip.data.sport
info['dport'] = ip.data.dport
info['sequence'] = ip.data.seq
info['acknowledgement'] = ip.data.ack
info['flags'] = ip.data.flags
info['window'] = ip.data.win
elif isinstance(ip.data, dpkt.igmp.IGMP):
info['packet'] = 'IGMP'
info['group'] = ip.data.group
elif isinstance(ip.data, dpkt.icmp.ICMP):
info['packet'] = 'ICMP'
info['code'] = ip.data.code
info['payload'] = repr(ip.data.data.data)
info['sport'] = ip.data.data.data.data.sport
info['dport'] = ip.data.data.data.data.dport
info['sum'] = ip.data.data.data.data.sum
try:
info['packet'] = 'ICMP+TCP'
info['sequence'] = ip.data.data.data.data.seq
info['acknowledgement'] = ip.data.data.data.data.ack
info['flags'] = ip.data.data.data.data.flags
info['window'] = ip.data.data.data.data.win
info['sum'] = ip.data.data.data.data.sum
info['options'] = ip.data.data.data.data.opts
except:
try:
info['packet'] = 'ICMP+UDP'
info['len'] = ip.data.data.data.data.ulen
except:
print "-------------------- ping tunnel issue"
print {ip}
print "--------------------"
pass
elif isinstance(ip.data, dpkt.sctp.SCTP):
info['packet']= 'SCTP'
info['sport'] = ip.data.sport
info['dport'] = ip.data.dport
for i in xrange(len(ip.data.data)):
info['chunk %s len'%i] = ip.data.data[i].len
info['chunk %s payload'%i] = ip.data.data[i].data
else:
info['packet'] = 'UNKNOWN'
except:
traceback.print_exc()
print {ip}
if myAddress != info['source'] and myAddress != info['destination']:
return
try:
packetData[str(datetime.datetime.utcfromtimestamp(timestamp))] = info
except:
badFrames += 1
return
with open(dataFileName, 'a+') as dataFile:
pickle.dump(packetData, dataFile)
except:
traceback.print_exc()
return
if __name__ == '__main__':
with open("alldata2.pcap", 'rb') as data:
pcap = dpkt.pcap.Reader(data)
for timestamp, buf in pcap:
storeData(timestamp, buf)
print "%s bad frames"%badFrames