PicklingError:无法pickle <type'function'=“”>:属性查找__builtin __。函数失败

时间:2015-07-29 16:50:04

标签: python python-multiprocessing

output_rdpartition = mp.Queue()

def read_partition_zipfile(infile,stop_words,startline,endline):  

    # endline = startline + 100
    chunk_user_d = defaultdict(lambda: defaultdict(list))
    chunk_user_withoutstamp_d = defaultdict(list)
    with gzip.open(in_file, "rb") as f:
        for j, line in enumerate(f):  
            if j >= startline and j < endline:
                if j%10000==0 : print "processed",j,"lines"       
                line = line[:-1].split("|:|") 
                time_stamp = int(line[0])
                user_id = line[-1]
                keywords=line[1].split(',')
                keywords = [item.lower() for item in keywords if len(item)>=2]
                keywords = [item for item in keywords if item not in stop_words]
                # print 'user_id', user_id
                # print 'time_stamp', time_stamp
                # print 'keywords',keywords
                chunk_user_d[user_id][time_stamp] += keywords
                chunk_user_withoutstamp_d[user_id] +=keywords
    # print chunk_user_withoutstamp_d,'chunk_user_withoutstamp_d'
    # return chunk_user_d, chunk_user_withoutstamp_d
    output_rdpartition.put((chunk_user_d,chunk_user_withoutstamp_d))
    def main():   
        start_time = datetime.datetime.now()
        print("at the start of main")
        user_id ='1ss7fef4'
        lenth = 0
        tf_idf = defaultdict(int)
        key_dic = defaultdict(float) 
        time_latest = 0

        processes_rd = [mp.Process(target = read_partition_zipfile, args =(in_file, stop_words, p_index[j], p_index[j+1])) for j in range(0,3)]
        for p in processes_rd:
            p.start()

        results_rd = [output_rdpartition.get() for p in processes_rd]
        # results_rd[0]is the chunkuser ,results_rd[1]is the chunkuser_without stamp
        print results_rd

    if __name__ == '__main__':
    stop_words = "a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your"
    stop_words = stop_words.split(",")
    in_file = 'uniq.txt.gz' 
    p_index = range(0,28000000,2800000)
    main()

似乎是因为队列问题,我可以在函数内打印,但是我无法返回函数的输出

0 个答案:

没有答案