我需要从一些(74k)压缩日志文件中找到与IP地址列表(464k)匹配的内容。当我手动解压缩它们并搜索它们我的脚本工作,但是,压缩文件时我没有匹配。
from os import listdir
from os.path import isfile, join
import gzip
realList=open('ipList', 'r')
testlist=open('testList', 'r')
offender=open('offenders', 'w')
# DEV Log Dir Uncompressed #
logDir = 'ulogs'
# DEV Log Dir Compressed #
#logDir = 'clogs'
logFile=[ logFiles for logFiles in listdir(logDir) if isfile(join(logDir, logFiles))]
count=0
with testlist as host:
ips = set(l.strip() for l in host)
for lfile in logFile:
print(lfile)
# Uncompressed # Results: 870,328 Hits
with open(logDir + '/' + lfile, 'r') as log:
# Compressed # Results: 0 Hits
#with gzip.open(logDir + '/' + lfile, 'r') as log:
for line in log:
#print(line)
elements = line.split()
if ips.intersection(elements):
#print(elements[3])
offender.write(elements[3] + '\n');
count+=1
print(count)