从.gz文件中提取数据

时间:2019-03-21 12:49:02

标签: python-2.7

我正在尝试从过去7天的.gz文件中检索特定的数据列,并将其拆分为两个单独的csv文件,然后我希望在删除重复文件之前获取每个文件的行数。

当前我的输出文件为空,非常感谢您的帮助。

#!/usr/bin/python

import gzip, os, csv, time, glob, shutil, smtplib, logging, os.path, datetime, argparse
from datetime import date, timedelta


strDate = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
home = os.path.expanduser("~")
LOG_PATH = home + '/logs/'
LOG_FILENAME = LOG_PATH + strDate + '_scripts.log'

SOURCEDIR = home + '/archive/data/incoming/'
ARCHIVE = home + '/archive/data/Accounts/'
count = 7
limit = 0

if not os.path.isdir(ARCHIVE):
        try:
                os.mkdir(ARCHIVE)
                logging.info('Directory ' + ARCHIVE + ' was missing but has been created.')
        except:
                logging.warning('Directory ' + ARCHIVE + ' is missing and can\'t be created. Exiting')
                exit(1)


while ( count > limit ):
        yesterday = date.today() - timedelta(count)
        yesterday = yesterday.strftime('%Y%m%d')

        if os.path.exists(SOURCEDIR+"spam_"+yesterday+".csv.gz"):
                fin = gzip.open(SOURCEDIR+"spam_"+yesterday+".csv.gz",'rb')
                reader = csv.reader(fin,delimiter = ',',quotechar="'")
                fo = open(ARCHIVE+"Email_Accounts"+yesterday+".csv", 'ab')
                fo2 = open(ARCHIVE+"Accounts"+yesterday+".csv", 'ab')
                csvWriter = csv.writer(fo)
                csvWriter2 = csv.writer(fo2)

                try:
                        for row in reader:
                                SITE = row[2].strip()
                                SITE = SITE.rjust(2, '0')
                                ACCOUNT = row[1].strip()
                                ACCOUNT = ACCOUNT.rjust(9, '0')
                                EMAIL = row[3].strip()
                                DATA = (SITE+ACCOUNT+EMAIL)
                                EMAILData = (EMAIL)
                                ACCOUNTDATA = (SITE+ACCOUNT)
                                csvWriter.writerow(EMAILData)
                                csvWriter2.writerow(ACCOUNTDATA)

                except IndexError:
                        pass
        fo.close()
        fo2.close()
        fin.close()

0 个答案:

没有答案