我正在抓取一个网站,然后将数据存储到mysql中,代码工作正常但是在一段时间之后它会产生以下错误。我使用python 3.5.1和pymysql连接数据库。
pymysql.err.OperationalError: (2013, 'Lost connection to MySQL server during query')
这是我的代码:
from bs4 import BeautifulSoup
import urllib.request
import re
import json
import pymysql
import pymysql.cursors
connection = pymysql.connect(host='XXX.XXX.XXX.XX',
user='XXX',
password='XXX',
db='XXX',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
r = urllib.request.urlopen('http://i.cantonfair.org.cn/en/ExpExhibitorList.aspx?k=glassware')
soup = BeautifulSoup(r, "html.parser")
links = soup.find_all("a", href=re.compile(r"expexhibitorlist\.aspx\?categoryno=[0-9]+"))
linksfromcategories = ([link["href"] for link in links])
string = "http://i.cantonfair.org.cn/en/"
linksfromcategories = [string + x for x in linksfromcategories]
for link in linksfromcategories:
response = urllib.request.urlopen(link)
soup2 = BeautifulSoup(response, "html.parser")
links2 = soup2.find_all("a", href=re.compile(r"\ExpExhibitorList\.aspx\?categoryno=[0-9]+"))
linksfromsubcategories = ([link["href"] for link in links2])
linksfromsubcategories = [string + x for x in linksfromsubcategories]
for link in linksfromsubcategories:
response = urllib.request.urlopen(link)
soup3 = BeautifulSoup(response, "html.parser")
links3 = soup3.find_all("a", href=re.compile(r"\ExpExhibitorList\.aspx\?categoryno=[0-9]+"))
linksfromsubcategories2 = ([link["href"] for link in links3])
linksfromsubcategories2 = [string + x for x in linksfromsubcategories2]
for link in linksfromsubcategories2:
response2 = urllib.request.urlopen(link)
soup4 = BeautifulSoup(response2, "html.parser")
companylink = soup4.find_all("a", href=re.compile(r"\expCompany\.aspx\?corpid=[0-9]+"))
companylink = ([link["href"] for link in companylink])
companydetail = soup4.find_all("div", id="contact")
companylink = [string + x for x in companylink]
my_list = list(set(companylink))
for link in my_list:
print (link)
response3 = urllib.request.urlopen(link)
soup5 = BeautifulSoup(response3, "html.parser")
companydetail = soup5.find_all("div", id="contact")
for d in companydetail:
lis = d.find_all('li')
companyname = lis[0].get_text().strip()
companyaddress = lis[1].get_text().strip()
companycity = lis[2].get_text().strip()
try:
companypostalcode = lis[3].get_text().strip()
companypostalcode = companypostalcode.replace(",","")
except:
companypostalcode = lis[3].get_text().strip()
try:
companywebsite = lis[4].get_text().strip()
companywebsite = companywebsite.replace("\xEF\xBC\x8Cifl...","")
except IndexError:
companywebsite = 'null'
try:
with connection.cursor() as cursor:
print ('saving company details to db')
cursor.execute("""INSERT INTO company(
companyname,address,city,pincode,website)
VALUES (%s, %s, %s, %s, %s)""",
(companyname, companyaddress, companycity,
companypostalcode, companywebsite))
connection.commit()
finally:
print ("Company Data saved")
productlink = soup5.find_all("a", href=re.compile(r"\ExpProduct\.aspx\?corpid=[0-9]+.categoryno=[0-9]+"))
productlink = ([link["href"] for link in productlink])
productlink = [string + x for x in productlink]
productlinkun = list(set(productlink))
for link in productlinkun:
print (link)
responseproduct = urllib.request.urlopen(link)
soupproduct = BeautifulSoup(responseproduct, "html.parser")
productname = soupproduct.select('div[class="photolist"] li a')
for element in productname:
print ("====================Product Name=======================")
productnames = element.get_text().strip()
print (productnames)
try:
with connection.cursor() as cursor:
# Create a new record
print ('saving products to db')
cursor.execute("""INSERT INTO products(
companyname,products)
VALUES (%s, %s)""",
(companyname, productnames))
connection.commit()
finally:
print ("Products Data Saved")
现在我无法找到我的代码出错的地方
答案 0 :(得分:2)
while True: #it works until the data was not saved
try:
with connection.cursor() as cursor:
print ('saving company details to db')
cursor.execute("""INSERT INTO company(
companyname,address,city,pincode,website)
VALUES (%s, %s, %s, %s, %s)""",
(companyname, companyaddress, companycity,
companypostalcode, companywebsite))
connection.commit()
break
except OperationalError:
connection.ping(True)
print ("Company Data saved")
您还可以使用连接池
查看类似的question或阅读source