Question

我正在从事一些同时使用多处理和多线程的自动化工作。设计-
Architecture

我在脚本中所做的工作是，首先在块创建过程中为每个文件划分原始文件，然后在每个过程中再次创建文件块并为每个块创建线程以执行某些操作。

`

    def file_len(fname):
            with open(fname) as f:
                    for i, l in enumerate(f):
                            pass
            return i
    def do_job(tasks_to_accomplish, tasks_that_are_done,input_values,urlcheck,tarfficcheck,rankcheck):
            #print('Im here')
            while True:
                    try:

                            file = input_values.get_nowait()
                            tasks_to_accomplish.get_nowait().start(aws_obj,file,urlcheck,tarfficcheck,rankcheck)

                    except queue.Empty:

                            break
                    else:

                            #print(res)
                            tasks_that_are_done.put(str(file) + ' is done by ' + threading.currentThread().getName())
                            time.sleep(.5)
            return True

    def do_jobmonth(tasks_to_accomplish, tasks_that_are_done,input_values,urlcheck,tarfficcheck,rankcheck,months):
            #print('Im here')
            while True:
                    try:


                            file = input_values.get_nowait()
                            tasks_to_accomplish.get_nowait().start(aws_obj,file,urlcheck,tarfficcheck,rankcheck,months)

                    except queue.Empty:

                            break
                    else:

                            #print(res)
                            tasks_that_are_done.put(str(file) + ' is done by ' + threading.currentThread().getName())
                            time.sleep(.5)
            return True
    def threadstart(file,u,t,r,m=1 ):
            consolidatedvalidation=''
            consolidatedranking=''
            urlcheck=u
            tarfficcheck =t #input("Need website traffic Y/N : ")
            rankcheck=r#input("Need Ranking  Y/N : ")
            if tarfficcheck.lower()=='y':
                    months = m#int(input("Number of Months(1 to 12 ) - "))

            filepathname=file#r'C:\Users\Himanshu Gupta\Desktop\Sample\New folder\Sample.csv'
            st=os.stat(filepathname)
            filepath, filename = os.path.split(filepathname)
            size = st.st_size
            #lines = file_len(filepathname)
            #splitterlines = 10**(int(len(str(lines)))-2)#*(int(str(lines)[0]))*
            #print('Splitter Set to lines - ',splitterlines)
            #linesize= size/lines
            filname,extension=os.path.splitext(file)
            last_char=filname[filname.rfind('_')+1:]
            fs = FileSplit(file=filepathname, splitsize=size/2, output_dir=filepath)
            fs.split(include_header=True)

            files = glob.glob(filepath+'/*_'+str(last_char)+'_[0-9]*.csv')
            size = len(files)
            print('Number of files -',size,' list name -',files)
            number_of_task = size
            number_of_processes =size 
            tasks_to_accomplish = Queue()
            tasks_that_are_done = Queue()
            input_values = Queue()
            processes = []

            for i in range(number_of_task):
                    tasks_to_accomplish.put(Rank())
                    input_values.put(files[i])
            # creating processes
            for w in range(number_of_processes):
                    if (tarfficcheck.lower()=='y'):
                            p = threading.Thread(target=do_jobmonth, args=(tasks_to_accomplish, tasks_that_are_done,input_values,urlcheck,tarfficcheck,rankcheck,months))
                    else:
                            p = threading.Thread(target=do_job, args=(tasks_to_accomplish, tasks_that_are_done,input_values,urlcheck,tarfficcheck,rankcheck))
                    processes.append(p)
                    p.start()


            # completing process
            for p in processes:

                    p.join()

            # print the output
            while not tasks_that_are_done.empty():
                    print(tasks_that_are_done.get())
            if u.lower()=='y':
                    validationfiles =glob.glob(filepath+'/Validation_'+last_char+'_[0-9]*.csv')
                    #print('validationfiles - ',validationfiles)
                    root =0
                    for file in validationfiles:
                            if root==0:
                                    consolidatedvalidation= pd.read_csv(file)
                                    root+=1
                            else:
                                    #print('Root not zero')
                                    temp = pd.read_csv(file)
                                    consolidatedvalidation=consolidatedvalidation.append(temp, ignore_index=True,sort=False)
                            os.remove(file)
                    consolidatedvalidation.to_csv(filepath+'/Validation_'+last_char+'.csv',index=False)
                    print('Thread Validation file Created')
            for i in range(number_of_task):
                    os.remove((files[i]))

            if r =='y':
                    rankingfiles =glob.glob(filepath+'/Ranking_'+last_char+'_[0-9]*.csv')
                    root=0
                    for file in rankingfiles:
                            if root==0:
                                    consolidatedranking= pd.read_csv(file)
                                    root+=1
                            else:
                                    #print('Root not zero')
                                    temp = pd.read_csv(file)
                                    consolidatedranking=consolidatedranking.append(temp, ignore_index=True,sort=False)
                            os.remove(file)
                    consolidatedranking.to_csv(filepath+'/Ranking_'+last_char+'.csv',index=False)
                    print(' Thread Ranking file Created') 


            return True

    def do_jobnomonth(tasks_that_are_done,input_values,urlcheck,tarfficcheck,rankcheck):
            #print('Im here')
            while True:
                    try:

                            file = input_values.get_nowait()
                            threadstart(file,urlcheck,tarfficcheck,rankcheck)

                    except queue.Empty:

                            break
                    else:

                            #print(res)
                            tasks_that_are_done.put(str(file) + ' is done by ' + current_process().name)
                            time.sleep(.5)
            return True

    def do_jobwithmonth( tasks_that_are_done,input_values,urlcheck,tarfficcheck,rankcheck,months):
            #print('Im here')
            while True:
                    try:


                            file = input_values.get_nowait()
                            threadstart(file,urlcheck,tarfficcheck,rankcheck,months)

                    except queue.Empty:

                            break
                    else:

                            #print(res)
                            tasks_that_are_done.put(str(file) + ' is done by ' + current_process().name)
                            time.sleep(.5)
            return True


    def main():
            consolidatedvalidation=''
            consolidatedranking=''
            urlvalidationcheck = input("Need Url Validation traffic Y/N : ")
            tarfficcheck = input("Need website traffic Y/N : ")
            rankcheck=input("Need Ranking  Y/N : ")
            if tarfficcheck.lower()=='y':
                    months = int(input("Number of Months(1 to 12 ) - "))

            filepathname=r'C:\Users\Himanshu Gupta\Desktop\Url Validation Script\Input_file.csv'
            st=os.stat(filepathname)
            filepath, filename = os.path.split(filepathname)
            size = st.st_size
            lines = file_len(filepathname)
            splitterlines = 10**(int(len(str(lines)))-2)#*(int(str(lines)[0]))*
            print('Splitter Set to lines - ',splitterlines)
            linesize= size/lines
            fs = FileSplit(file=filepathname, splitsize=splitterlines*linesize, output_dir=filepath)
            fs.split(include_header=True)

            files = glob.glob(filepath+'/*_[0-9]*.csv')
            size = len(files)
            print('Number of files -',size,' list name -',files)
            number_of_task = size
            number_of_processes =size
            tasks_to_accomplish = Queue()
            tasks_that_are_done = Queue()
            input_values = Queue()
            processes = []

            for i in range(number_of_task):
                    #tasks_to_accomplish.put(Rank_Thread())
                    input_values.put(files[i])
            # creating processes
            for w in range(number_of_processes):
                    if (tarfficcheck.lower()=='y'):
                            p = Process(target=do_jobwithmonth, args=( tasks_that_are_done,input_values,urlvalidationcheck,tarfficcheck,rankcheck,months))
                    else:
                            p = Process(target=do_jobnomonth, args=( tasks_that_are_done,input_values,urlvalidationcheck,tarfficcheck,rankcheck))
                    processes.append(p)
                    p.start()


            # completing process
            for p in processes:

                    p.join()

            # print the output
            while not tasks_that_are_done.empty():
                    print(tasks_that_are_done.get())

            if urlvalidationcheck==  'y':
                    validationfiles =glob.glob(filepath+'/Validation*_[0-9]*.csv')
                    #print('validationfiles - ',validationfiles)
                    root =0
                    for file in validationfiles:
                            if root==0:
                                    consolidatedvalidation= pd.read_csv(file)
                                    root+=1
                            else:
                                    #print('Root not zero')
                                    temp = pd.read_csv(file)
                                    consolidatedvalidation=consolidatedvalidation.append(temp, ignore_index=True,sort=False)
                            os.remove(file)
                    consolidatedvalidation.to_csv(filepath+'/Consolidated_Url_Validation.csv',index=False)
                    print('Consolidated Validation file Created')
            for i in range(number_of_task):
                    os.remove((files[i]))


            if rankcheck==  'y':
                    rankingfiles =glob.glob(filepath+'/Ranking*_[0-9]*.csv')
                    root=0
                    for file in rankingfiles:
                            if root==0:
                                    consolidatedranking= pd.read_csv(file)
                                    root+=1
                            else:
                                    #print('Root not zero')
                                    temp = pd.read_csv(file)
                                    consolidatedranking=consolidatedranking.append(temp, ignore_index=True,sort=False)
                            os.remove(file)
                    consolidatedranking.to_csv(filepath+'/Consolidated_Url_Ranking.csv',index=False)
                    print('Consolidated Ranking file Created') 


            return True


    if __name__ == '__main__':
            s_time= time.time()
            main()
            print("Total Time Elapsed --- %s seconds ---" % (time.time() - s_time))

`

我面临的问题是线程在作业完成之前退出，并且如果有错误也不会引发任何错误。

线程终止而不会引发任何错误

0 个答案: