我需要运行多个查询来对所有数据进行爬网后进行排序。我在管道中执行一些代码,如下所示:
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = mysql.connector.connect(
host = 'localhost',
user = 'root',
passwd = 'xxxxxxx',
database ='pyproject'
self.cur = self.conn.cursor() #open connection
def create_table(self):
xxxxxxx
def close_spider(self,spider): # do last query on closing spider
self.cur.execute("""INSERT INTO tbp_table
(Title,Source,Date,Paragraph,emiten_code,emiten_name)
SELECT t1.Title,t1.Source, t1.Date,
t1.Paragraph,t2.emiten_code, t2.emiten_name
from pyproject.news_data t1
inner join pyproject.emiten_dict t2 ON (
(t1.Paragraph LIKE CONCAT('%', t2.emiten_code,
'%')) OR
(t1.Paragraph LIKE CONCAT('%', t2.emiten_name,
'%'))
);""")
self.conn.commit()
此处有多个查询:
def store_db(self, item):
self.cur.execute("""insert into news_data( Title, Source, Date,
Paragraph ) values (%s,%s,%s,%s)""",(
item['title'],
item['source'],
item['date'],
item['paragraph']
))
self.cur.execute("""DELETE t1 FROM news_data t1
INNER JOIN news_data t2
WHERE
t1.id > t2.id AND t1.Title = t2.Title;""") #delete
duplicate
self.cur.execute("""CREATE TABLE mytable_tmp select * from news_data;
TRUNCATE TABLE news_data;
ALTER TABLE news_data AUTO_INCREMENT = 1;
INSERT INTO news_data(Title, Source, Date, Paragraph)
SELECT Title, Source, Date, Paragraph FROM mytable_tmp
ORDER BY id;
DROP TABLE mytable_tmp;""") #rearrange id num
self.cur.execute("""TRUNCATE TABLE tbp_table;
ALTER TABLE tbp_table AUTO_INCREMENT =
1;""")
self.cur.execute("""INSERT INTO tbp_table
(Title,Source,Date,Paragraph,emiten_code,emiten_name)
SELECT t1.Title,t1.Source, t1.Date,
t1.Paragraph,t2.emiten_code, t2.emiten_name
from pyproject.news_data t1
inner join pyproject.emiten_dict t2 ON (
(t1.Paragraph LIKE CONCAT('%', t2.emiten_code,
'%')) OR
(t1.Paragraph LIKE CONCAT('%', t2.emiten_name,
'%'))
);""")
self.conn.commit()
但是它们出现错误mysql.connector.errors.DatabaseError:2014(HY000):命令不同步;您现在不能运行此命令
我已经尝试过使用打开/关闭连接,如下所示:
def store_db(self, item):
self.cur.execute("""insert into news_data( Title, Source, Date,
Paragraph ) values (%s,%s,%s,%s)""",(
item['title'],
item['source'],
item['date'],
item['paragraph']
))
self.cur.close()
self.cur = self.conn.cursor()
self.cur.execute("""DELETE t1 FROM news_data t1
INNER JOIN news_data t2
WHERE
t1.id > t2.id AND t1.Title = t2.Title;""") #delete
duplicate
self.cur.close()
self.cur = self.conn.cursor()
self.cur.execute("""CREATE TABLE mytable_tmp select * from news_data;
TRUNCATE TABLE news_data;
ALTER TABLE news_data AUTO_INCREMENT = 1;
INSERT INTO news_data(Title, Source, Date, Paragraph)
SELECT Title, Source, Date, Paragraph FROM mytable_tmp
ORDER BY id;
DROP TABLE mytable_tmp;""") #rearrange id num
self.cur.close()
self.cur = self.conn.cursor()
self.cur.execute("""TRUNCATE TABLE tbp_table;
ALTER TABLE tbp_table AUTO_INCREMENT =
1;""")
self.cur.close()
self.cur = self.conn.cursor()
self.cur.execute("""INSERT INTO tbp_table
(Title,Source,Date,Paragraph,emiten_code,emiten_name)
SELECT t1.Title,t1.Source, t1.Date,
t1.Paragraph,t2.emiten_code, t2.emiten_name
from pyproject.news_data t1
inner join pyproject.emiten_dict t2 ON (
(t1.Paragraph LIKE CONCAT('%', t2.emiten_code,
'%')) OR
(t1.Paragraph LIKE CONCAT('%', t2.emiten_name,
'%'))
);""")
self.conn.commit()
但是它一直关闭连接并产生此错误
mysql.connector.errors.ProgrammingError:光标未连接
预期结果是:所有查询都可以运行,尽管其中一些查询需要花费时间,因此抓取过程会有所延迟