如何让这段代码更加pythonic?

时间:2015-09-07 15:24:34

标签: python pandas

我正在阅读一堆日常文件,并使用glob将它们连接成单独的数据帧。我最终将它们连接在一起,基本上创建了一个用于连接仪表板的大型文件。我不熟悉Python,但我经常使用pandas和sklearn。

正如您所看到的,我基本上只是阅读最近60天(或更多)的数据(最后60个文件)并为每个数据创建数据帧。这有效,但我想知道是否有更多的pythonic /更好的方式?我在pydata上观看了一段视频(关于不受PEP 8的限制并确保你的代码是pythonic),这很有意思。

(仅供参考 - 我需要阅读60天的时间是因为客户可以通过很久以前发生的电话填写调查表。客户今天就7月份发生的电话填写调查表。我需要知道这个电话(它持续了多长时间,主题是什么等)。

os.chdir(r'C:\\Users\Documents\FTP\\')
loc = r'C:\\Users\Documents\\'
rosterloc = r'\\mand\\'
splitsname = r'Splits.csv'
fcrname = r'global_disp_'
npsname = r'survey_'
ahtname = r'callbycall_'
rostername = 'Daily_Roster.csv'
vasname = r'vas_report_'
ext ='.csv'
startdate = dt.date.today() - Timedelta('60 day')
enddate = dt.date.today() 
daterange = Timestamp(enddate) - Timestamp(startdate)
daterange = (daterange / np.timedelta64(1, 'D')).astype(int)

data = []
frames = []
calls = []
bracket = []
try:
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)):
        aht = pd.read_csv(ahtname+date_range.strftime('%Y_%m_%d')+ext)
        calls.append(aht)
except IOError:
        print('File does not exist:', ahtname+date_range.strftime('%Y_%m_%d')+ext)
aht = pd.concat(calls)
print('AHT Done')                 
try:
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)):
        fcr = pd.read_csv(fcrname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['call_time'])
        data.append(fcr)
except IOError:
        print('File does not exist:', fcrname+date_range.strftime('%m_%d_%Y')+ext)
fcr = pd.concat(data)
print('FCR Done')                                                
try:
    for date_range in (Timestamp(enddate) - dt.timedelta(n) for n in range(3)):
        nps = pd.read_csv(npsname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['call_date','date_completed'])
        frames.append(nps)
except IOError:
        print('File does not exist:', npsname+date_range.strftime('%m_%d_%Y')+ext)
nps = pd.concat(frames)
print('NPS Done')                
try:
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)):
        vas = pd.read_csv(vasname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['Call_date'])
        bracket.append(vas)
except IOError:
        print('File does not exist:', vasname+date_range.strftime('%m_%d_%Y')+ext)
vas = pd.concat(bracket)
print('VAS Done')                 
roster = pd.read_csv(loc+rostername)
print('Roster Done')
splits = pd.read_csv(loc+splitsname)
print('Splits Done')      

1 个答案:

答案 0 :(得分:-1)

我没有改变名字,但恕我直言,他们应该更加冗长,例如。 pd ==熊猫?不确定。这是一些更加抒情的写作方式:

from functools import partial
import logging
from operator import add, sub
import os
import datetime as dt
import contextlib

os.chdir(r'C:\\Users\Documents\FTP\\')
location = r'C:\\Users\Documents\\'
roster_location = r'\\mand\\'
splits_name = r'Splits.csv'
fcr_name = r'global_disp_'
nps_name = r'survey_'
aht_name = r'callbycall_'
roster_name = 'Daily_Roster.csv'
vas_name = r'vas_report_'
ext = '.csv'
start_date = dt.date.today() - Timedelta('60 day')
end_date = dt.date.today()
daterange = Timestamp(end_date) - Timestamp(start_date)
daterange = (daterange / np.timedelta64(1, 'D')).astype(int)
logger = logging.getLogger()    # logger is better than "print" in case, when you have multiple tiers to log. In this case: regular debug and exceptions


def timestamps_in_range(daterange, method=add):    # injected operation method instead of "if" statement in case of subtracting
    for n in xrange(daterange):
        yield method(Timestamp(start_date), dt.timedelta(n))    # use generators for creating series of data in place


def read_csv(name, date_range, **kwargs):    # use functions/methods to shorten (make more readable) long, repetitive method invocation
    return pd.read_csv(name + date_range.strftime('%Y_%m_%d') + ext, kwargs)


def log_done(module):    # use functions/methods to shorten (make more readable) long, repetitive method invocation
    logger.debug("%s Done" % module)


@contextlib.contextmanager    #contextmanager is great to separate business logic from exception handling
def mapper(function, iterable):
    try:
        yield map(function, iterable)    # map instead of executing function in "for" loop
    except IOError, err:
        logger.error('File does not exist: ', err.filename)


# Following code is visualy tight and cleaner. 
# Shows only what's needed, hiding most insignificant details and repetitive code

read_csv_aht = partial(read_csv, aht_name)    # partial pre-fills function (first argument) with arguments of this function (remaining arguments). In this case it is useful for feeding "map" function - it takes one-argument function to execute on each element of a list
with mapper(read_csv_aht, timestamps_in_range(daterange)) as calls:    # contextmanager beautifully hides "dangerous" content, sharing only the "safe" result to be used
    aht = pd.concat(calls)
    log_done('AHT')

read_csv_fcr = partial(read_csv, fcr_name)
with mapper(read_csv_fcr, timestamps_in_range(daterange)) as data:
    fcr = pd.concat(data)
    log_done('FCR')

read_csv_nps = partial(read_csv, nps_name, parse_dates=['call_date', 'date_completed'])
with mapper(read_csv_nps, timestamps_in_range(3, sub)) as frames:
    nps = pd.concat(frames)
    log_done('NPS')

read_csv_vas = partial(read_csv, vas_name, parse_dates=['Call_date'])
with mapper(read_csv_vas, timestamps_in_range(daterange)) as bracket:
    vas = pd.concat(bracket)
    log_done('VAS')

roster = pd.read_csv(location + roster_name)
log_done('Roster')

splits = pd.read_csv(location + splits_name)
log_done('Splits')