是否有更优雅的方法来处理必须评估许多案件?
这看起来太难看了,效率也不高。做经验丰富的Python3程序员 用列表推导工作某种伏都教做强烈的评价?
示例文件位于底部。
#! /usr/bin/env python3
#This utility attempts to parse the output text files generated by dcp_inspect
#for faster inspection and when attending to multiple DCP inspections.
#dcp_inspect_parse runs on Python3.x (Though I am testing on 3.3)
#
#
#
#
#
#
import glob
import os
import sys
print(os.getcwd())
cpl = []
content = []
contentkind = []
container = []
errors = []
package = []
summary = []
mainsound = []
mainpicture = []
encryption = []
duration = []
fsp = []
dimensiontype = []
aspect = []
filesize = [2]
audio = []
cpltype = []
dir= []
dir=input("Please enter directory location of dcp_inspect output***\n")
print('Changing directories')
os.chdir(dir)
print(os.getcwd())
print('Attempting to open file(s) in directory:\n')
print(dir,'\n')
for file in glob.glob("*"):
try:
newfile = file
print("Scanning...\t\t\t\t\t\t", newfile)
data = open(newfile)
for each_line in data:
if 'summary'in each_line:
summary.extend(each_line.split())
elif 'Errors' in each_line:
errors=(each_line.split())
elif 'Package with total size'in each_line:
if 'Bytes 0' in each_line:
continue
else:
temp =[]
temp.extend(each_line.split())
#print(len(temp))
filesize=(temp.pop()+' '+temp.pop())
#print('size =',size)
elif 'MainSound'in each_line:
if 'audio' in each_line:
if '24bps' in each_line:
if '48kHz' in each_line:
if '6ch' in each_line:
if '16ch' in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '16ch 48kHz 24bps'
elif '6ch' in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '6ch 48kHz 24bps'
elif '7ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '7ch 48kHz 24bps'
elif '8ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '8ch 48kHz 24bps'
elif '10ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '10ch 48kHz 24bps'
elif '11ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '11ch 48kHz 24bps'
elif '12ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '12ch 48kHz 24bps'
elif '13ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '13ch 48kHz 24bps'
elif '14ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '14ch 48kHz 24bps'
elif '15ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '15ch 48kHz 24bps'
elif '16ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '16ch 48kHz 24bps'
elif '17ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '17ch 48kHz 24bps'
elif '18ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '18ch 48kHz 24bps'
elif '19ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '19ch 48kHz 24bps'
elif '20ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '20ch 48kHz 24bps'
elif '21ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '21ch 48kHz 24bps'
elif '22ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '22ch 48kHz 24bps'
elif '23ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '23ch 48kHz 24bps'
elif '24ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '24ch 48kHz 24bps'
elif '96kHz' in each_line:
audio = 'unknown format, bitrate, etc'
if '6ch'in each_line:
if '16ch' in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '16ch 96kHz 24bps'
elif '26ch' in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '26ch 96kHz 24bps'
elif '6ch' in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '6ch 96kHz 24bps'
elif '7ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '7ch 96kHz 24bps'
elif '8ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '8ch 96kHz 24bps'
elif '9ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '9ch 96kHz 24bps'
elif '10ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '10ch 96kHz 24bps'
elif '11ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '11ch 96kHz 24bps'
elif '12ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '12ch 96kHz 24bps'
elif '13ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '13ch 96kHz 24bps'
elif '14ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '14ch 96kHz 24bps'
elif '15ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '15ch 96kHz 24bps'
elif '16ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '16ch 96kHz 24bps'
elif '17ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '17ch 96kHz 24bps'
elif '18ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '18ch 96kHz 24bps'
elif '19ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '19ch 96kHz 24bps'
elif '20ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '20ch 96kHz 24bps'
elif '21ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '21ch 96kHz 24bps'
elif '22ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '22ch 96kHz 24bps'
elif '23ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '23ch 96kHz 24bps'
elif '24ch'in each_line:
temp = []
temp.extend(each_line.split(','))
audio = '24ch 96kHz 24bps'
elif 'MainPicture'in each_line:
if 'pictures' in each_line:
temp = []
temp.extend(each_line.split(','))
container = temp[-2]
encryption= temp[-3]
elif 'CPL Id:'in each_line:
cpl=(each_line.split())
elif 'CPL type: 'in each_line:
cpltype=(each_line.split())
cpltype = cpltype[2]
elif 'ContentKind: 'in each_line:
contentkind=(each_line.split())
contentkind = contentkind[1]
elif 'ContentTitleText:'in each_line:
content=(each_line.split(':'))
content = content[1]
for x in range(len(summary)):
if 'fps' in summary[x]:
#print(summary[(x-1)],'near fps')
#print(summary[x],'near fps')
fps = summary[(x-1)]
duration = summary[(x-2)].strip()
elif 'summary:' in summary[x]:
#print(summary[(x)])
content = summary[(x+1)]
content.strip(',')
elif '2D' in summary[x]:
#print(summary[(x)], 'found')
dimensiontype = summary[(x)]
aspect = summary[(x+1)].strip(',')
elif '3D' in summary[x]:
#print(summary[(x)], 'found')
dimensiontype = summary[(x)]
aspect = summary[(x+1)]
print('Container: \t\t',container.strip())
print('CPL Type: \t\t',cpltype)
print('Duration: \t\t',duration.strip(','))
print('Frame Rate: \t\t',fps)
print('Aspect Ratio: \t\t',aspect.strip(','))
print('Dimension: \t\t',dimensiontype.strip(','))
print('Content Title: \t\t',content.strip(',').strip('""'))
print('Audio for DCP: \t\t', audio)
print('Package size: \t\t',filesize)
print('Encryption status: \t\t',encryption.upper().strip())
print('CPL ID: \t\t', cpl[2])
print('Content Kind: \t\t',contentkind.upper())
print('\n')
print('There are',errors[1],'Errors and',errors[3],'hints for', content.strip(','))
if errors[1] != '0':
print('could be issues\n')
else:
print('This DCP appears to be OK\n')
#print(summary)
data.close()
except:
print("Exception Encountered")
pass
找到1个Assetmap AM 7599203f-a73f-4c55-b967-93c3061e10ea:Overnighters_01 / Interop_20140108 / ASSETMAP AM 7599203f-a73f-4c55-b967-93c3061e10ea列出了4个资产: cef846b8-8f85-4cb4-9cb8-f39e53ebe013:Overnighters_01 / Interop_20140108 / cef846b8-8f85-4cb4-9cb8-f39e53ebe013_pcm.mxf c2cb9f04-83bf-4829-b4cf-e9963dbaa0d3:Overnighters_01 / Interop_20140108 / c2cb9f04-83bf-4829-b4cf-e9963dbaa0d3_j2c.mxf d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:Overnighters_01 / Interop_20140108 / d28cfbbd-9cc7-4cd9-ac18-66fc031b0118_cpl.xml edc7e9bc-7397-4922-85e2-4ebf5b2d710a:Overnighters_01 / Interop_20140108 / edc7e9bc-7397-4922-85e2-4ebf5b2d710a_pkl.xml Assetmap 7599203f-a73f-4c55-b967-93c3061e10ea列出了1个PKL: 存在PKL文件:edc7e9bc-7397-4922-85e2-4ebf5b2d710a:/media/Overnighters_01/Interop_20140108/edc7e9bc-7397-4922-85e2-4ebf5b2d710a_pkl.xml
找到1个包裹 存在PKL文件:edc7e9bc-7397-4922-85e2-4ebf5b2d710a:/media/Overnighters_01/Interop_20140108/edc7e9bc-7397-4922-85e2-4ebf5b2d710a_pkl.xml
PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a:/media/Overnighters_01/Interop_20140108/edc7e9bc-7397-4922-85e2-4ebf5b2d710a_pkl.xml PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a:架构检查:OK PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a:未找到签名节点 PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a:AnnotationText:OVERNIGHTERS_20140108 PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a列出3个资产 cef846b8-8f85-4cb4-9cb8-f39e53ebe013:application / x-smpte-mxf; asdcpKind =声音: d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:text / xml; asdcpKind = CPL:Overnighters_01 / Interop_20140108 / d28cfbbd-9cc7-4cd9-ac18-66fc031b0118_cpl.xml [...] g哈希值:0%[] ETA - : - : - 经过 - : - : - ] g哈希值:16%[====] ETA 00:00:00经过00:00:00A 00:00:00经过00:00:00 PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a:包装尺寸:93.9 GB PKL edc7e9bc-7397-4922-85e2-4ebf5b2d710a列出1个成分 CPL文件存在:d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:/media/Overnighters_01/Interop_20140108/d28cfbbd-9cc7-4cd9-ac18-66fc031b0118_cpl.xml
找到1个成分 CPL文件存在:d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:/media/Overnighters_01/Interop_20140108/d28cfbbd-9cc7-4cd9-ac18-66fc031b0118_cpl.xml
CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:卷轴1:音频分析...... CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:卷轴1:音频分析:完成 CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:架构检查:OK CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:未找到签名节点 CPL Id:d28cfbbd-9cc7-4cd9-ac18-66fc031b0118 CPL文件:/media/Overnighters_01/Interop_20140108/d28cfbbd-9cc7-4cd9-ac18-66fc031b0118_cpl.xml CPL类型:Interop(http://www.digicine.com/PROTO-ASDCP-CPL-20040511#) ContentTitleText:OVERNIGHTERS_FTR_F_EN_51_2K_20140108_CLO_OV AnnotationText:[空] ContentKind:功能 IssueDate:2014-01-08T17:49:36-08:00(2014年1月8日星期三17:49) 发行人:Colorflow Digital 创作者:Colorflow 卷轴数量:1 卷轴1: 145729 01:41:12 + 01 @ 24.0 c2cb9f04 MainPicture(MXF Interop,01:43:53 + 18,明文,1920x1080,图片) 145729 01:41:12 + 01 @ 24.0 cef846b8 MainSound(MXF Interop,01:43:53 + 18,明文,6ch 48kHz 24bps,[FAIL],[FAIL],音频) 总持续时间: 145729 01:41:12 + 01 @ 24.0 CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:构图摘要:" OVERNIGHTERS_FTR_F_EN_51_2K_20140108_CLO_OV",Interop,Plaintext,2D,HD,HD,01:41:12 + 01,24.0 fps 作文完成
提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:卷轴1:MainPicture具有非DCI宽高比1.778(1920x1080,HD):建议播放适当的非标准屏蔽 提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:卷轴1:MainPicture具有非DCI像素尺寸(1920x1080,HD) 提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:命名约定:ContentTitleText" OVERNIGHTERS_FTR_F_EN_51_2K_20140108_CLO_OV"缺少一些部分 提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:命名约定:9个部件匹配:设施:" CLO" film_title:" OVERNIGHTERS" content_kind:" FTR" ASPECT_RATIO:" F" 60512-1-100:" 51"分辨率:" 2K"工作室:" EN"日期:" 20140108" PACKAGE_TYPE:" OV" 提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:命名约定:缺少2个部分:language,territory_rating 提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:命名约定:部分aspect_ratio声称平坦但合成宽高比为HD 提示:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:命名约定:部件分辨率声称为2K,但成分分辨率为HD 信息:CPL d28cfbbd-9cc7-4cd9-ac18-66fc031b0118:组成OVERNIGHTERS_FTR_F_EN_51_2K_20140108_CLO_OV 3天前发布 信息:dcp_inspect v1.2013.12.16于2014年1月11日星期六10:57(00:01:18:46) 信息:检查:/媒体 信息:找到1个包裹,总大小93.9 GB 信息:找到1个Assetmap,1个包(0个签名),1个组合(0个签名,1个明文/ 0个KDMs要求) 信息:0个错误,7个提示
答案 0 :(得分:2)
您可以在值上使用正则表达式匹配,而不是测试每个案例:
m = re.search('([0-9]+)bps', each_line)
if m is None:
bps = None
else:
bps = int(m.group(1))
m = re.search('([0-9]+)ch', each_line)
if m is None:
channels = None
else:
channels = int(m.group(1))
m = re.search('([0-9]+)kHz', each_line)
if m is None:
bandwidth = None
else:
bandwidth = int(m.group(1))
# if still needed
audio = '%ich %ikHz %ibps' % (channels, bandwidth, bps)
如果它仍然采用我可以看到here的相同格式,您也可以匹配整行:
patt = '''MainSound\s*\(.*([0-9]+) channels, ([0-9]+) kHz, ([0-9]+) bps, audio\)'''
m = re.search(patt, each_line)
if m:
channels, bandwidth, bps = m.groups()
或者,如果您确切知道该行的样子,可以使用PyParsing。
答案 1 :(得分:1)
通过分解
可以简化您的程序temp = []
temp.extend(each_line.split(','))
来自每个elif
,而不是在elif部分的上方或下方写一次。
通常可以通过将源拆分为块然后使用块作为函数字典的键(或者,在您的情况下,字符串字典也可以工作,因为所有终端函数不同)来改进这样的解析只在他们的字符串中)。例如,请参阅this blog post。只要输入是常规的,这可能是您获得良好重构的最佳途径。思考数据结构而不是流控制 - 围绕将输入映射到输出的数据结构构建一个小程序,而不是将映射嵌入程序代码中的大型程序。