我有以下Python代码,它解析来自目录的每个文件的URL,我尝试使用函数map
来实现多重处理
import glob, os
import xmltodict
import mysql.connector
from multiprocessing import Pool
def get_xml_paths(folder):
return (os.path.join(folder, f)
for f in os.listdir(folder)
if 'xml' in f)
def openXML(file):
global i
doc = xmltodict.parse(file.read())
for i in range(0, len(doc['urlset']['url'])):
if i > to:
break
## Validation
url = doc['urlset']['url'][i]['loc'];
if "books" in url:
c.execute("INSERT INTO apps (url) VALUES (%s)", [url])
conn.commit()
i = i + 1
if __name__ == '__main__':
files = get_xml_paths("unzip/")
pool = Pool()
pool.map(openXML, files)
pool.close()
pool.join()
c.close()
所以,当我运行这个应用程序时,我得到错误列表:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\O\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\Users\O\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "C:\Users\O\PycharmProjects\Grabber\grabber.py", line 28, in openXML
doc = xmltodict.parse(file.read())
AttributeError: 'str' object has no attribute 'read'
我该如何解决这个问题?我没有看到明显的原因。
答案 0 :(得分:2)
file
中的 openXML
是一个字符串而不是文件对象,因此字符串中没有read
- 方法。你必须先打开文件:
import glob, os
import xmltodict
import mysql.connector
from multiprocessing import Pool
def open_xml(file):
with open(file) as xml:
doc = xmltodict.parse(xml.read())
cursor = conn.cursor()
for url in doc['urlset']['url']:
url = url['loc'];
if "books" in url:
cursor.execute("INSERT INTO apps (url) VALUES (%s)", [url])
conn.commit()
if __name__ == '__main__':
files = glob.glob("unzip/*.xml")
pool = Pool()
pool.map(open_xml, files)