我正在扫描几千个文本文件并为每个文件创建一个python类对象,在类对象上执行一些解析方法之后存储文件的内容。是否可以使用多线程执行此操作并使用包含所有python对象列表的final变量?假设这是最好的方法,我该如何做?
我对任何解决方案持开放态度,无论是多线程还是多处理。目前按顺序一次执行一个文件似乎很慢。
https://docs.python.org/3/library/multiprocessing.html
过程:
代码:
import os
import sys
root = "C:/Users/jmartini/Desktop/Temp/text_files"
class fileObject(object):
def __init__(self, filepath):
self._content = ''
self._filepath = ''
self.filepath = filepath
@property
def filepath(self):
return self._filepath
@filepath.setter
def filepath(self, value):
self._filepath = value
# instantiate
if os.path.isfile(value):
self._content = ''
with open(value, 'r') as f:
self._content = f.read()
# After reading content i need to do more parsing tasks
@property
def content(self):
return self._content
def collect_files(directory):
objs = []
for root, subdirs, files in os.walk(directory):
for filename in files:
if not os.path.splitext(filename)[-1] in ['.sbs']:
continue
objs.append(fileObject(os.path.join(root, filename)))
return objs
objs = collect_files(root)
print len(objs)
# for o in objs:
# print o.content