我正在尝试从过去7天的.gz文件中检索特定的数据列,并将其拆分为两个单独的csv文件,然后我希望在删除重复文件之前获取每个文件的行数。
当前我的输出文件为空,非常感谢您的帮助。
#!/usr/bin/python
import gzip, os, csv, time, glob, shutil, smtplib, logging, os.path, datetime, argparse
from datetime import date, timedelta
strDate = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
home = os.path.expanduser("~")
LOG_PATH = home + '/logs/'
LOG_FILENAME = LOG_PATH + strDate + '_scripts.log'
SOURCEDIR = home + '/archive/data/incoming/'
ARCHIVE = home + '/archive/data/Accounts/'
count = 7
limit = 0
if not os.path.isdir(ARCHIVE):
try:
os.mkdir(ARCHIVE)
logging.info('Directory ' + ARCHIVE + ' was missing but has been created.')
except:
logging.warning('Directory ' + ARCHIVE + ' is missing and can\'t be created. Exiting')
exit(1)
while ( count > limit ):
yesterday = date.today() - timedelta(count)
yesterday = yesterday.strftime('%Y%m%d')
if os.path.exists(SOURCEDIR+"spam_"+yesterday+".csv.gz"):
fin = gzip.open(SOURCEDIR+"spam_"+yesterday+".csv.gz",'rb')
reader = csv.reader(fin,delimiter = ',',quotechar="'")
fo = open(ARCHIVE+"Email_Accounts"+yesterday+".csv", 'ab')
fo2 = open(ARCHIVE+"Accounts"+yesterday+".csv", 'ab')
csvWriter = csv.writer(fo)
csvWriter2 = csv.writer(fo2)
try:
for row in reader:
SITE = row[2].strip()
SITE = SITE.rjust(2, '0')
ACCOUNT = row[1].strip()
ACCOUNT = ACCOUNT.rjust(9, '0')
EMAIL = row[3].strip()
DATA = (SITE+ACCOUNT+EMAIL)
EMAILData = (EMAIL)
ACCOUNTDATA = (SITE+ACCOUNT)
csvWriter.writerow(EMAILData)
csvWriter2.writerow(ACCOUNTDATA)
except IndexError:
pass
fo.close()
fo2.close()
fin.close()