多处理比循环慢

时间:2020-08-16 16:04:53

标签: python csv python-multiprocessing

我正在尝试将大量数据写入csv文件。当我尝试普通方法时,它会在1秒内写入50个数据,但经过多处理后,它会在1秒内降至5个数据。 而且我还添加了此代码sys.setrecursionlimit(25000)。因为没有错误会导致错误。

我可以感觉到我做得不好。正确的方法是什么?

from bs4 import BeautifulSoup
import requests
import lxml
import csv
import cchardet
from multiprocessing import Pool
import sys
import time

sys.setrecursionlimit(25000)

csvfileWrite=open("comments.csv", 'a+', newline='',encoding='utf-8') #declared as a global variable
writer = csv.writer(csvfileWrite, delimiter=';', quotechar='"', 
quoting=csv.QUOTE_MINIMAL) #declared as a global variable


def kacYildiz(div): #This function returns a number 0 to 5. Not important.
    yildizSayisi=0
    yildizYeri=div.find("div",attrs={"class":"RatingPointer-module-1OKF3"})
    yildizlar=yildizYeri.find_all("svg")
    
    for yildiz in yildizlar:
        sonuc=yildiz.find("path").get("fill")
        if(sonuc=="#f28b00"):
            yildizSayisi+=1

    return yildizSayisi

def takeText(div):
    comment=div.find("span",attrs={"itemprop":"description"}).text
    return comment


def yorumSayfaSayisi(row): # This function returns a number that how many 
    pages in the sites comment section. Not important.
    yorumKismi="-yorumlari?"
    adres=row[0]+yorumKismi

    r = requests_session.get(adres)
    
    soup = BeautifulSoup(r.text,"lxml")
    
    sayfaS=soup.find("ul",attrs={"class":"PaginationBar-module-3qhrm"})
        
    sayi=sayfaS.find_all("li")[-1].text
    return sayi



def writeToCsv(comments): #writing commets to csv file.
    global csvfileWrite
    global writer
   
    textToWrite = takeText(comments)
                    
    writer.writerow([kacYildiz(comments),textToWrite]) 


if __name__ == '__main__':
    pageNumber=1
    requests_session = requests.Session()
    comments=list()
    
    csvfile=open('adresler.csv',newline='')
    reader = csv.reader(csvfile, delimiter=';', quotechar='|')

          
    for row in reader:
        rowNumber=yorumSayfaSayisi(row)
            
        for i in range(1,int(rowNumber)):
            comments.clear()
            commetAdress="-yorumlari?sayfa={}".format(i)               
            adress=row[0]+commetAdress                  
            r = requests_session.get(adress)                
            soup = BeautifulSoup(r.text,"lxml") 
            page=soup.find_all("div",attrs={"class":"ReviewCard-module- 
            3Y36S"})  

            for comment in page:
                comments.append(comment)

            p = Pool(10)
            start = time.process_time()   
            p.map(writeToCsv, comments) 
                
            p.terminate()
            p.join()

1 个答案:

答案 0 :(得分:0)

一旦使用ThreadPool尝试这种方法

from multiprocessing.pool import ThreadPool

def csvYaz(yorumlar):
    global csvfileYaz
    global yazici
    yazi = yorumAl(yorumlar)
    yazici.writerow([kacYildiz(yorumlar),yazi])

# ------main-----
for yorum in yorumSayfasi:
    yorumlar.append(yorum)

threads = ThreadPool(10).map(csvYaz, yorumlar)
for zz in threads:
    print(zz)