from bs4 import BeautifulSoup
import requests
import time
import pymysql
from multiprocessing import Pool
import os
t1=time.time()
定义方法
def multiproc(i,s,cursor):
t3=time.time()
print('process %s download %s page,time %s'%(os.getpid(),i,t3-t1))
url='url%s'%i
wb_data=s.get(url)
soup=BeautifulSoup(wb_data.text,'lxml')
#print(soup)
titles=soup.select('#content > div.site-section > div.site-house-list.clearfix > dl > dd.fl > p:nth-of-type(1) > a')
areas=soup.select('#content > div.site-section > div.site-house-list.clearfix > dl > dd.h-metre.f14.bold.pa')
prices=soup.select('#content > div.site-section > div.site-house-list.clearfix > dl > dd.h-price.pa.f18.yahei.c_red')
for title,area,price in zip(titles,areas,prices):
title=title.get_text()
ti2=title[:10]
area=area.get_text()
a2=area[:-2]
price=price.get_text()
try:
cursor.execute('insert into user values ("%s","%s","%s")'%(ti2,a2,price))
except Exception as err:
print(err)
conn.commit()
if __name__=='__main__':
创建进程池
p=Pool(4)
conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='123456', db='sqlxianning')
cursor = conn.cursor()
s = requests.Session()
for i in range(1,10):
p.apply_async(multiproc,args=(i,s,cursor))
cursor.close()
conn.close()
p.close()
p.join()
t2=time.time()
print("total time %s"%(t2-t1))
进程池如何避免数据库游标关闭?当进程池运行时,游标将关闭。那么我无法将数据插入数据库。