python - 一种更好,更短的解析方法?

时间:2017-02-18 09:43:29

标签: python regex python-2.7 parsing

是否有更好的方法来构建以下三个BLOCKS。我的方式似乎非常有用,并且在将来添加更多数据提取并不容易。也许在元组中使用re.compile sintaxis ???

BLOCK ONE

po_gen_execution    = re.compile(r'')
po_gen_start_date   = re.compile(r'')
po_gen_fileserver_host  = re.compile(r'')
po_gen_site         = re.compile(r'')
po_gen_grid_check   = re.compile(r'')
po_gen_params       = re.compile(r'')
po_nn_name      = re.compile(r'')
po_nn_launcher      = re.compile(r'')
po_nn_DbSourceMapping   = re.compile(r'')
po_nn_DbSource      = re.compile(r'')
po_ps_ps        = re.compile(r'')
po_ps_unit      = re.compile(r'')
po_ps_npid      = re.compile(r'')
po_ps_status        = re.compile(r'')
po_ps_start     = re.compile(r'')
po_ps_end       = re.compile(r'')

结束第一个

def duration(ti, tf):
    ''' return time duration between tf and ti '''
    end = datetime.datetime.strptime(tf, '%Y/%m/%d %H:%M:%S')
    start = datetime.datetime.strptime(ti, '%Y/%m/%d %H:%M:%S')
    if start <= end: 
        return end - start
    else: 
        logging.info('Started before finish? check me!')
        return -1
# List of files to treat
gen_file_list = open(sys.argv[1])
G_dic = {}
for filename in gen_file_list.readlines():
    filename = filename.rstrip()
    G_dic[filename] = {}
    with open(filename) as f:
        G_dic[filename]['gen'] = G_dic[filename]['nn'] = G_dic[filename]['ss'] = {}
        filename = filename.rstrip()
        params = []
        for line in f:

BLOCK TWO

LG_start_date   = po_gen_start_date.search(line)
LG_fileserver   = po_gen_fileserver_host.match(line)
LG_site         = po_gen_site.match(line)
LG_grid_check   = po_gen_grid_check.match(line)
LG_params       = po_gen_params.search(line)
LN_launcher     = po_nn_launcher.match(line)
LN_dbsm         = po_nn_DbSourceMapping.match(line)
LN_dbs          = po_nn_DbSource.match(line)
LP_unit         = po_ps_unit.match(line)
LP_pid          = po_ps_pid.match(line)
LP_status       = po_ps_status.match(line)
LP_end          = po_ps_end.match(line)

结束两个

BLOCK THREE

if LG_params:
    params.append(LG_params.group(2).strip())
elif LG_start_date:
    LG_sd   = LG_start_date.group(1).strip()
    LG_st   = LG_start_date.group(2).strip()
elif LG_fileserver:  LG_fs   = LG_fileserver.group(1).strip()
elif LG_site:        LG_s    = LG_site.group(1).strip()
elif LG_grid_check:  LG_gc   = LG_grid_check.group(1).strip()
elif LN_launcher:    LN_l    = LN_launcher.group(1).strip()
elif LN_dbsm:        LN_d    = LN_dbsm.group(1).strip()
elif LN_dbs:         LN_db   = LN_dbs.group(1).strip()
elif LP_unit:        LP_u    = LP_unit.group(1).strip()
elif LP_npid:        LP_np   = LP_npid.group(1).strip()
elif LP_status:      LP_s    = LP_status.group(1).strip()
elif LP_end:
    LP_en   = LP_end.group(1).strip()
    LP_em   = LP_end.group(2).strip()

结束三个

start = LG_sd + " " + LG_st
end = LP_en + " " + LP_em

G_dic[filename]['gen'] = {
    'unit'      : LP_u,
    'fileserver'    : LG_fs,
    'site'      : LG_s,
    'exe params'    : params }
G_dic[filename]['nn'] = {
    'name'      : params[1],
    'launcher'      : LN_l }
G_dic[filename]['ps'] = {
    'name'      : params[0],
    'start'         : start,
    'end'       : end,
    'duration'      : str(duration(start, end)),
    'grid check'    : LG_gc,
    'pid'       : LP_np,
    'exit_code '    : LP_s }

0 个答案:

没有答案