我有一个处理CSV上传的模块 在模块中,我有一个名为twzworkscsvreader.py的文件,其中包含类和函数,通过查看标题(已定义)来检查上载的文件是否为csv文件,如果文件未成功上载则抛出错误。 ,我可以从控制台看到错误。 我想在我的视图中添加一个功能,将此错误返回到浏览器中的页面,而不是仅将它们打印到控制台。 这是处理此错误的twzworkscsvreader.py部分
import logging
import csv
import time
import warnings
import datetime
import dateutil.parser as dparser
import os
class CSVFileReader:
def __init__(self, filename, evidence, mysql, filetype):
""" Class constructor.
Args:
- filename: csv file name to process
- evidence: evidence number supplied from the command line
- mysql: instance of the mySQL database
- filetype: type of the file to process
"""
self.errorCount = 0
self.path = filename
self.evidence = evidence
self.mysql = mysql
self.dictInstance = {}
self.filetype = filetype
#TO DO: move this outside maybe back into function. I dont think this is right place this this.
self.artifacts = ['AuditPolicy','App Paths','exefile open\command','cmdfile open\command','batfile open\command','htafile open\command','piffile open\command',\
'http open\command','browsers','Run keys','NetworkCards','Browser Helper Objects']
def is_arbitrary_text(self,row):
"""
Check if an TZworks CSV header is present
Args:
row: the row as it was read from the csv
Returns:
Boolean: True or False
"""
##TO DO CLEAN UP HOW THIS IS FORMATTED BELOW. MAYBE PULL ALL THIS INTO A SEPERATE FILE.
if self.filetype == "jmp" and str(row) == "['source path/filename', 'source type ', 'appid ', 'MRU/MFU', 'stream#', 'MRU date ', 'MRU-UTC ', 'file mdate ', 'mtime-UTC ', 'file adate ', 'atime-UTC ', 'file cdate ', 'ctime-UTC ', 'tgt mdate ', 'mtime-UTC ', 'tgt adate ', 'atime-UTC] ', 'tgt cdate ', 'ctime-UTC ', 'ObjID cdate ', 'ctime-UTC ', 'tgt attrib', 'inode ', 'seq# ', 'file size ', 'target name ', 'idlist extra info ', 'vol type', 'vol serial', 'vol label', 'local path', 'common path', 'network/device info', 'extra info', 'netbios name', 'volume id', 'object id', 'mac addr']":
return True
elif self.filetype == "usb" and str(row) == "['device name', 'vid/pid', ' time-utc', 'install', ' time-local', \
'disk dev', ' time-utc', 'vol dev', ' time-utc', 'type', 'vid', 'pid', 'hub', 'port', \
'vendor', 'product', 'rev', 'volume guid', 'vol', 'vol name', 'users [ date/time - utc]', \
'instance id/serial #', 'Other dates defined by explicit property keys', 'Readyboost (freeform list of EMDMgmt entries) vol serial# / vol name / last modify regtimes [utc] and * = test time']":
return True
elif self.filetype == "link" and str(row) == "['source path/filename', 'source type', 'file mdate', ' time-UTC', 'file adate', ' time-UTC', \
'file cdate', 'ctime-UTC', 'tgt mdate', ' time-UTC', 'tgt adate', ' time-UTC]', 'tgt cdate', ' time-UTC', 'ObjID cdate', ' ctime-UTC', 'tgt attrib', \
'inode', 'seq#', 'file size', 'target name', 'idlist extra info', 'vol type', 'vol serial', 'vol label', 'local path', 'common path', 'network/device info', \
'extra info', 'netbios name', 'volume id', 'object id', 'mac addr']":
return True
elif self.filetype == "prefetch" and str(row) == "['prefetch file name ',' app name ',' times ran',' last run ', \
' time-utc ',' mdate ',' time-utc',' adate ',' time-utc',' cdate ',' time-utc',' path/appname ',' Num Vols',' volume(s)', \
' volume serial(s)',' volume date/time(s) (utc)',' Num modules',' Module list']":
return True
elif self.filetype == "sbag" and str(row) == "['regdate', ' reg-UTC', 'mru', 'mdate', ' time-UTC', 'adate', ' time-UTC', 'cdate', ' time-UTC', \
'type', 'bag', 'file size', 'inode', 'seq#', 'full path', 'source subkey/value name', 'user acct', 'extra metadata', 'bag registry dates related to this entry']":
return True
elif self.filetype == "AuditPolicy" and row == "regdate, reg-UTC,policy name,type audit":
return True
elif self.filetype == "App Paths" and row == "reg date, reg-UTC,subkey,value name,value data":
return True
elif self.filetype == "exefile open\command" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "cmdfile open\command" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "batfile open\command" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "htafile open\command" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "piffile open\command" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "http open\command" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "browsers" and row == "reg date, reg-UTC,subkey,value name,value data":
return True
elif self.filetype == "Run keys" and row == "reg date, reg-UTC,value name,value data":
return True
elif self.filetype == "NetworkCards" and row == "reg date, reg-UTC,subkey,value name,value data":
return True
elif self.filetype == "Browser Helper Objects" and row == "reg date, reg-UTC,subkey,value name,value data":
return True
else:
return False
#TODO: Move this to its own file
def StringToDatetime(self, datetime, dt_type):
"""Converts a string timestamp into a datetime object.
Args:
datetime: A string formatted as a timestamp (i.e 1970-01-01 00:00:00.000).
dt_type: which datetime field we are parsing. Used for logging purposes
Returns:
A datetime object or None if the parsing fails."""
# Here some dates have space (like "install" and some are just empty (like "userDateTime)
# so we need to check for both conditions
if datetime == " " or datetime == "" or datetime == " ":
self.errorCount += 1
#TO DO: Use %s
message = dt_type + " is missing the date at line "
message += str(self.rowcount)
logging.warn(message)
return None
else:
try:
datetimeobject = dparser.parse(str(datetime))
return datetimeobject
except:
self.errorCount += 1
#TO DO: Use %s
message = dt_type + " has incorrect date at line "
message += str(self.rowcount)
logging.error(message)
return None
def readyBoostParse(self, string):
"""Parse the last field of the csv file
Args:
string: string representation of the last field in the csv
Returns:
readyboostDict: dictionary of the items in the readyboost field
"""
#Readyboost (freeform list of EMDMgmt entries) vol serial# / vol name / last modify regtimes [utc] and * = test time
readyboostDict = {}
readyboost_items = string.split(';')
item_counter = 1
for item in readyboost_items:
if len(readyboost_items) > 5:
self.errorCount += 1
logging.error("Too many readyboost items at line " + str(self.rowcount))
try:
vol_sn_and_vol_name , _, last_mod_regdatetime = item.partition("[")
vol_sn, _, vol_name = vol_sn_and_vol_name.partition("/")
readyboostDict['volume_serial' + str(item_counter)] = vol_sn.replace('-', '')
readyboostDict['volume_name' + str(item_counter)] = vol_name
readyboostDict['last_modify' + str(item_counter)] = self.StringToDatetime(last_mod_regdatetime.strip("]"), "readyboost")
item_counter += 1
except Exception as e:
self.errorCount += 1
#TO DO add better error handeling and logging.
readyboostDict['volume_serial' + str(item_counter)] = ""
readyboostDict['volume_name' + str(item_counter)] = ""
readyboostDict['last_modify' + str(item_counter)] = None
item_counter += 1
return readyboostDict
def readAdditionalData(self):
""" Read the data from the additional CSV file
Args:
None
Return:
None
"""
in_header = False
headerPassed = False
internalHeader = False
#Why not use the other readData?
with open(self.path, "rb") as csvfile:
for line in csvfile:
line = line.rstrip('\r\n')
if not line:
continue
if internalHeader and headerPassed:
if line != "--------------------------------------------------------------------------------------------------------- ":# and not in_header:
if self.filetype == "AuditPolicy":
self.processAuditPolicyData(line)
if self.filetype == "App Paths":
self.processAppPathData(line)
if self.filetype == "exefile open\command":
self.processExeFileOpenCommand(line)
if self.filetype == "cmdfile open\command":
self.processCmdFileOpenCommand(line)
if self.filetype == "batfile open\command":
self.processBatFileOpenCommand(line)
if self.filetype == "htafile open\command":
self.processHtaFileOpenCommand(line)
if self.filetype == "piffile open\command":
self.processPifFileOpenCommand(line)
if self.filetype == "http open\command":
self.processHttpOpenCommand(line)
if self.filetype == "browsers":
self.processBrowsers(line)
if self.filetype == "Run keys":
self.processRunKeys(line)
if self.filetype == "NetworkCards":
self.processNetworkCards(line)
if self.filetype == "Browser Helper Objects":
self.processBrowserHelperObject(line)
else:
internalHeader = False
headerPassed = False
self.filetype = ""
if line == "--------------------------------------------------------------------------------------------------------- ":# and not in_header:
if not in_header:
in_header = True
else:
in_header = False
headerPassed = True
continue
#if line == "--------------------------------------------------------------------------------------------------------- " and in_header:
#in_header = False
#headerPassed = True
#continue
if in_header:
(key,value) = line.split(':')
value = value.strip()
if key == "Artifact":
if value not in self.artifacts:
print "Unknown artifact found."
logging.error("Unknown artifact found in file: {0:s}".format(self.path))
return
else:
self.filetype = value
continue
else:
continue
elif headerPassed and not internalHeader:
if not self.is_arbitrary_text(line):
return False
else:
internalHeader = True
def readData(self):
""" Read the data from the CSV file.
Args:
None
Return:
None
"""
try:
import codecs
with open(self.path, "rU") as data_initial:
reader = csv.reader((line.replace('\0','') for line in data_initial), delimiter=",")
##oldcode
#data_initial = open("staff.csv", "rU")
#reader = csv.reader((line.replace('\0','') for line in csvfile), delimiter=",")
#reader = csv.reader(csvfile, delimiter=',', quotechar='"')
#Row number where header exists
rangecount = 0
#Check if more then one header exists in file.
header = False
#Count number of rows. Need to test to see what happens if error on one row.
self.rowcount = 1
for row in reader:
#Check if expected header is in place and if there are no other headers in file.
if self.is_arbitrary_text(row) == True and header == False:
for _ in range(rangecount):
next(reader, None)
self.is_arbitrary_text(reader)
reader = csv.reader((line.replace('\0','') for line in data_initial), delimiter=",")
#Do processing for each row
for row in reader:
if self.filetype == "usb":
self.processUSBData(row)
if self.filetype == "lnk":
self.processLNKData(row)
if self.filetype == "prefetch":
self.processPreFetchData(row)
if self.filetype == "sbag":
self.processSBagData(row)
if self.filetype == "jmp":
self.processJmpData(row)
self.rowcount += 1
header = True
else:
rangecount +=1
if header == False:
logging.error("Did not find expected header in CSV file: " + self.path)
print "Did not find expected header in CSV file %s." % (self.path)
except IOError as e:
print "Error parsing file: %s". e.args[1]
任何人都可以在我的视图中给我写一个使用这个类的原型,以便它显示错误到页面吗?该类在我调用mysq_db_loader的模块中定义,并在名为twzworkscsvreader.py的文件中定义
答案 0 :(得分:0)
如果你使用的是Django,那么这个包在CSV文件之上做了很多很好的抽象:https://github.com/fusionbox/django-separated