是否有更好的方法来构建以下三个BLOCKS。我的方式似乎非常有用,并且在将来添加更多数据提取并不容易。也许在元组中使用re.compile sintaxis ???
po_gen_execution = re.compile(r'')
po_gen_start_date = re.compile(r'')
po_gen_fileserver_host = re.compile(r'')
po_gen_site = re.compile(r'')
po_gen_grid_check = re.compile(r'')
po_gen_params = re.compile(r'')
po_nn_name = re.compile(r'')
po_nn_launcher = re.compile(r'')
po_nn_DbSourceMapping = re.compile(r'')
po_nn_DbSource = re.compile(r'')
po_ps_ps = re.compile(r'')
po_ps_unit = re.compile(r'')
po_ps_npid = re.compile(r'')
po_ps_status = re.compile(r'')
po_ps_start = re.compile(r'')
po_ps_end = re.compile(r'')
def duration(ti, tf):
''' return time duration between tf and ti '''
end = datetime.datetime.strptime(tf, '%Y/%m/%d %H:%M:%S')
start = datetime.datetime.strptime(ti, '%Y/%m/%d %H:%M:%S')
if start <= end:
return end - start
else:
logging.info('Started before finish? check me!')
return -1
# List of files to treat
gen_file_list = open(sys.argv[1])
G_dic = {}
for filename in gen_file_list.readlines():
filename = filename.rstrip()
G_dic[filename] = {}
with open(filename) as f:
G_dic[filename]['gen'] = G_dic[filename]['nn'] = G_dic[filename]['ss'] = {}
filename = filename.rstrip()
params = []
for line in f:
LG_start_date = po_gen_start_date.search(line)
LG_fileserver = po_gen_fileserver_host.match(line)
LG_site = po_gen_site.match(line)
LG_grid_check = po_gen_grid_check.match(line)
LG_params = po_gen_params.search(line)
LN_launcher = po_nn_launcher.match(line)
LN_dbsm = po_nn_DbSourceMapping.match(line)
LN_dbs = po_nn_DbSource.match(line)
LP_unit = po_ps_unit.match(line)
LP_pid = po_ps_pid.match(line)
LP_status = po_ps_status.match(line)
LP_end = po_ps_end.match(line)
if LG_params:
params.append(LG_params.group(2).strip())
elif LG_start_date:
LG_sd = LG_start_date.group(1).strip()
LG_st = LG_start_date.group(2).strip()
elif LG_fileserver: LG_fs = LG_fileserver.group(1).strip()
elif LG_site: LG_s = LG_site.group(1).strip()
elif LG_grid_check: LG_gc = LG_grid_check.group(1).strip()
elif LN_launcher: LN_l = LN_launcher.group(1).strip()
elif LN_dbsm: LN_d = LN_dbsm.group(1).strip()
elif LN_dbs: LN_db = LN_dbs.group(1).strip()
elif LP_unit: LP_u = LP_unit.group(1).strip()
elif LP_npid: LP_np = LP_npid.group(1).strip()
elif LP_status: LP_s = LP_status.group(1).strip()
elif LP_end:
LP_en = LP_end.group(1).strip()
LP_em = LP_end.group(2).strip()
start = LG_sd + " " + LG_st
end = LP_en + " " + LP_em
G_dic[filename]['gen'] = {
'unit' : LP_u,
'fileserver' : LG_fs,
'site' : LG_s,
'exe params' : params }
G_dic[filename]['nn'] = {
'name' : params[1],
'launcher' : LN_l }
G_dic[filename]['ps'] = {
'name' : params[0],
'start' : start,
'end' : end,
'duration' : str(duration(start, end)),
'grid check' : LG_gc,
'pid' : LP_np,
'exit_code ' : LP_s }