我创建了这段代码,以扫描samples_vsdt.txt
以获得一定的值,然后将其写入csv,我遇到了错误StopIteration
,甚至没有读取文本文件。我试图解决这个问题数小时,不知道是什么原因引起的?
这是我的代码的工作方式,例如以下行:
Scanning samples_extracted\82e5b144cb5f1c10629e72fc1291f535db7b0b40->(Word 2003 XML Document 1003-1)
将以这种方式写入csv:
82e5b144cb5f1c10629e72fc1291f535db7b0b40,Word 2003 XML Document 1003-1
这是我的代码,它适用于我所有的txt_files,但是此sample_vsdt.txt无法正常工作
import csv,re
out_vsdt = "samples_vsdt.txt"
out_sha1_vsdt = "sha1_vsdt.csv"
def read_text_file(out_vsdt):
with open(out_vsdt) as f:
data = []
for line in f:
if "Scanning " + new in line and "(" in line:
try:
sha = re.search('\\\(.*)->', line).group(1)
desc= re.search('->\((.*)\)', line).group(1)
except AttributeError:
desc = None
sha = None
mix = sha,desc
data.append(mix)
continue
if "Scanning " + new in line:
try:
sha= re.search('\\\(.*)$', line).group(1)
while True:
i = next(f)
if "(" in i:
try:
desc = re.search('->\((.*)\)', i).group(1)
break
except AttributeError:
desc = None
sha = None
mix = sha,desc
data.append(mix)
except AttributeError:
sha = None
return data
def write_csv_file(data,out_sha1_vsdt):
with open(out_sha1_vsdt, 'wb') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',', quotechar='"')
csvwriter.writerow(['SHA-1','VSDT','DESC'])
for row in data:
csvwriter.writerow(row)
def main():
data = read_text_file(out_vsdt)
write_csv_file(data, out_sha1_vsdt)
if __name__ == '__main__':
main()
print "Parsing Successful"
给我错误:
Traceback (most recent call last):
File "C:\Users\trendMICRO\Desktop\ojt\scanner\parser.py", line 65, in <module>
main()
File "C:\Users\trendMICRO\Desktop\ojt\scanner\parser.py", line 61, in main
data = read_text_file(out_vsdt)
File "C:\Users\trendMICRO\Desktop\ojt\scanner\parser.py", line 37, in read_text_file
i = next(f)
StopIteration
答案 0 :(得分:0)
另一种方法可能是仅使用正则表达式提取整个块:
import csv
import re
out_vsdt = "samples_vsdt.txt"
out_sha1_vsdt = "sha1_vsdt.csv"
with open(out_vsdt) as f_input:
vscan32 = f_input.read()
with open(out_sha1_vsdt, 'w', newline='') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(['SHA-1', 'VSDT', 'DESC'])
for sha, desc, vsdt in re.findall(r'Scanning.*?\\([0-9a-f]+)(.*?)->\((.*?)\)$', vscan32, re.S + re.M):
desc = '|'.join(line.strip() for line in desc.splitlines() if len(line.strip()))
desc = ''.join(filter(lambda x: x in string.printable, desc)) # remove non-printable characters
csv_output.writerow([sha, vsdt, desc])
这使用多行表达式来查找以Scanning
开头的块。如果有多行,则使用|
将这些行剥离并连接在一起。最后,所有不可打印的字符将从说明中删除。
这将为您提供类似以下内容的输出:
SHA-1,VSDT,DESC
004d44eeecae27314f8bd3825eb82d2f40182b51,WIN32 EXE 7-2,
07eab9ea58d4669febf001d52c5182ecf579c407,WIN32 EXE 7-2,
0d558bb5e0a5b544621af0ffde1940615ac39deb,WIN32 EXE 7-2,
5172c70c1977bbddc2a163f6ede46595109c7835,WIN32 EXE 7-2,- $R0\NsCpuCNMiner32.exe->Found Virus [WORM_CO.331300D2]|- $R0\NsCpuCNMiner64.exe->Found Virus [WORM_CO.331300D2]|- $R0\NsGpuCNMiner.exe->Found Virus [TROJ64_.743CC567]
这假设您使用的是Python 3.x