from num2words import num2words
import re
from googletrans import Translator
import time, os, glob
import concurrent.futures
start_time = time.time()
translator = Translator()
src_dir="/home/lol/patrika1"
dest_file="/home/lol/df.txt"
counter=1
def n2w(match):
return translator.translate(num2words(int(match.group(1))),dest='hi').text
def clean_file(file_name):
global counter
fil = open(file_name,'r')
lines = fil.read()
fil.close()
# more logic
result=re.sub(r'[\n]+','\n',result2)
counter+=1
print(counter)
print(file_name)
cleaned.write(result)
print("--- %s seconds ---" % (time.time() - start_time))
if __name__ == '__main__':
global cleaned
os.chdir(src_dir)
file_list=glob.glob("*.txt")
cleaned=open(dest_file,'a')
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(clean_file,file_list[:10])
print("finish "+ str(counter))
cleaned.close()
打印时,计数器在主函数中的值为1。 如何维护函数处理的文件数?
答案 0 :(得分:0)
由于通常不建议全局变量使用(计算什么?谁修改它?以及合并的2个脚本对2个不同的东西使用相同的“计数器”的情况),你可以使用那种类型的构造: / p>
class FileCleaner:
Counter = 0
@classmethod
def clean(cls, file_name):
...
cls.Counter +=1
...
然后从任何地方访问FileCleaner.Counter并使用FileCleaner.clean调用函数(...)
也许在使用ProcessPoolExecutor模型之前尝试使用更干净的代码,因为它不会提供易于阅读的代码(希望很快会被子解释器帮助),您需要拆分文件列表,在try / except中调用executor,添加+1成功所有来自文件清理池类的类计数器。不是来自主。
import concurrent.futures
import threading
import math
PRIMES = [
112272535095293,
112582705942171,
112272535095293,
115280095190773,
115797848077099,
1099726899285419]
class PrimePoolTester:
Counter = 0
@classmethod
def is_prime(cls,n):
if n % 2 == 0:
return False
sqrt_n = int(math.floor(math.sqrt(n)))
for i in range(3, sqrt_n + 1, 2):
if n % i == 0:
return False
return True
@classmethod
def execute(cls,primes):
with concurrent.futures.ProcessPoolExecutor() as executor:
for number, prime in zip(primes, executor.map(cls.is_prime, primes)):
cls.Counter += 1
print('(%s)-%d : %d is prime: %s' % (threading.current_thread().name, cls.Counter, number, prime))
class Runner_interpreter:
def __init__(self, thread_count, worker):
self.thr = []
for _ in range(thread_count):
t = threading.Thread(target = worker)
t.daemon = True
t.start()
self.thr.append( t )
def join(self):
for th in self.thr:
th.join()
if __name__ == '__main__':
def job():
global worklist
PrimePoolTester.execute( worklist.pop(0) )
worklist = [ PRIMES ] * 4
#use 4 "core"
Runner_interpreter(4,job).join()