'TypeError(“无法序列化'_io.BufferedReader'对象”)'

时间:2019-06-22 06:17:55

标签: python multiprocessing pool

我的代码运行良好,但是当我重新启动系统并再次运行它时,抛出此错误。我不知道这种行为背后的原因。 我寻找了类似错误的其他问题,但他们的问题与任何与Web服务相关的软件包有关,在我看来这是无效的。

Traceback (most recent call last):
  File "cli.py", line 76, in <module>
    k=cli_obj.extract_resume_data()
  File "cli.py", line 29, in extract_resume_data
    return self.__extract_from_directory(args.directory)
  File "cli.py", line 52, in __extract_from_directory
    results = pool.map(resume_result_wrapper, resumes)
  File "/anaconda3/envs/nlp/lib/python3.7/multiprocessing/pool.py", line 268, in map
    return self._map_async(func, iterable, mapstar, chunksize).get()
  File "/anaconda3/envs/nlp/lib/python3.7/multiprocessing/pool.py", line 657, in get
    raise self._value
multiprocessing.pool.MaybeEncodingError: Error sending result: '<multiprocessing.pool.ExceptionWithTraceback object at 0x7f80bed62eb8>'. Reason: 'TypeError("cannot serialize '_io.BufferedReader' object")'

我的代码如下:

import os
import argparse
from resume_parser.resume_parser import ResumeParser
import multiprocessing as mp
from urllib.request import Request, urlopen
import io
import json
def print_cyan(text):
    print("\033[96m {}\033[00m" .format(text))

class ResumeParserCli(object):
    def __init__(self):     
        self.__parser = argparse.ArgumentParser()
        self.__parser.add_argument('-d', '--directory', help="directory containing all the resumes to be extracted")
        return

    def extract_resume_data(self):
        args = self.__parser.parse_args()

        if args.directory and not args.file:
            return self.__extract_from_directory(args.directory)
        else:
            return 'Invalid option. Please provide a valid option.'

    def __extract_from_file(self, file):
        if os.path.exists(file):
            # print_cyan('Extracting data from: {}'.format(file))
            resume_parser = ResumeParser(file)
            return [resume_parser.get_extracted_data()]
        else:
            return 'File not found. Please provide a valid file name.'

    def __extract_from_directory(self, directory):
        if os.path.exists(directory):
            pool = mp.Pool(mp.cpu_count())

            resumes = []
            data = []
            for root, directories, filenames in os.walk(directory):
                for filename in filenames:
                    file = os.path.join(root, filename)
                    resumes.append(file)

            results = pool.map(resume_result_wrapper, resumes)
            pool.close()
            pool.join()

            return results
        else:
            return 'Directory not found. Please provide a valid directory.'

def resume_result_wrapper(resume):
    # print_cyan('Extracting data from: {}'.format(resume))
    parser = ResumeParser(resume)
    return parser.get_extracted_data()

if __name__ == '__main__':
    cli_obj = ResumeParserCli()
    k=cli_obj.extract_resume_data()

请帮帮我。预先谢谢你

0 个答案:

没有答案