我有将一种自定义文件格式转换为其他自定义文件格式的代码。
使用多处理将工作划分为并行工作。有些除以进程计数逻辑以将文件列表作为输入参数传递。
码
def CustomFun1(self, ):
start_time = time.time()
custom1_files = os.listdir("/Some/location")
self.total_custom1_count = custom1_files.__len__()
self.custom2_ctr = 0
self.updateReport(2, 1, self.total_custom_count, -1, "")
self.custom1_2_custom2_cmd = "command to convert custom1 to custom2"
process_count = 4
if process_count>1 and self.total_custom1_count>process_count:
import multiprocessing
mid = self.total_custom1_count/process_count
processes = []
start_index = 0
end_index = 0
for ii in range(1, process_count):
end_index += mid
pp = multiprocessing.Process(name='custom12custom2_%s_%s'%(ii, time.time()),\
target=self.createCustom1ToCustom2,\
args=(custom1_files[start_index:end_index], ))
pp.daemon = False
processes.append(pp)
start_index = end_index
pp = multiprocessing.Process(name='custom12custom2_%s_%s'%(ii, time.time()),\
target=self.createCustom1ToCustom2,\
args=(custom1_files[start_index:], ))
pp.daemon = False
processes.append(pp)
for pp in processes:
pp.start()
for pp in processes:
pp.join()
else:
self.createCustom1ToCustom2(custom1_files)
t2 = time.time() - start_time
self.updateReport(2, 2, self.total_custom1_count, self.custom2_ctr, t2)
def createCustom1ToCustom2(self, custom1_files):
""" Create Custom2 from the Custom1. """
try:
for cnt, custom1_file in enumerate(custom1_files, 1):
ret = os.system(self.custom1_2_custom2_cmd%(custom1_file, custom1_file.split('.')[0]))
self.custom2_ctr += 1
if self.custom2_ctr%5==0:
self.updateReport(2, 1, self.total_custom1_count, self.custom2_ctr, "")
except:
e = traceback.format_exc()
以下是我编写了多少个Custom1类型文件转换为Custom2类型文件的函数。
报告变量:
self.report = [{"pn": "Extraction", "status": 0, "cnt": 0, "tt": 0},
{"pn": "Basic conversion Generation", "status": 0, "cnt": 0, "cur_i": 0, "tt": 0},
{"pn": "Cutom1 to custom2", "status": 0, "cnt": 0, "cur_i": 0, "tt": 0}
]
def updateReport(self, pos, status, cnt, cur_i, tt):
if not self.reportLoc:
return
try:
self.report[pos]["status"] = status
self.report[pos]["cnt"] = cnt
if tt:
self.report[pos]["tt"] = datetime.fromtimestamp(tt).strftime('%H:%M:%S')
self.report[pos]["cur_i"] = cur_i
with open(self.reportLoc, "w") as fp:
fp.write(simplejson.dumps(self.report))
except Exception, e:
e = traceback.format_exc()
多处理正在运行,也可以获得预期的输出,这个过程需要30到40分钟的2000文件转换,我需要显示30秒或1分钟后转换了多少文件。因为我需要用函数updateReport
编写的报告文件。
self.custom2_ctr
未在多处理运行的代码中更新,意味着self.custom2_ctr
的值在0
末尾为CustomFun1
。updateReport
中,代码剪切常见self.report
变量。如何设置锁定功能updateReport
表示仅
一个过程可以一次剪切此函数?如果您需要更多信息,请与我们联系。