在Scrapy管道上运行多个查询

时间:2019-09-06 17:51:38

标签: python mysql scrapy

我需要运行多个查询来对所有数据进行爬网后进行排序。我在管道中执行一些代码,如下所示:

def __init__(self):
    self.create_connection()
    self.create_table()

def create_connection(self):
    self.conn = mysql.connector.connect(
        host = 'localhost',
        user = 'root',
        passwd = 'xxxxxxx',
        database ='pyproject'


    self.cur = self.conn.cursor() #open connection

def create_table(self):
xxxxxxx


def close_spider(self,spider): # do last query on closing spider


    self.cur.execute("""INSERT INTO tbp_table 
    (Title,Source,Date,Paragraph,emiten_code,emiten_name)
    SELECT t1.Title,t1.Source, t1.Date, 
    t1.Paragraph,t2.emiten_code, t2.emiten_name 
    from pyproject.news_data t1
    inner join pyproject.emiten_dict t2 ON (
    (t1.Paragraph LIKE CONCAT('%', t2.emiten_code, 
    '%')) OR
    (t1.Paragraph LIKE CONCAT('%', t2.emiten_name, 
    '%'))
    );""")

    self.conn.commit()

此处有多个查询:

def store_db(self, item):
    self.cur.execute("""insert into news_data( Title, Source, Date, 
    Paragraph )  values (%s,%s,%s,%s)""",(
        item['title'],
        item['source'],
        item['date'],
        item['paragraph']

    ))


    self.cur.execute("""DELETE t1 FROM news_data t1 
                        INNER JOIN news_data t2 
                        WHERE
                        t1.id > t2.id AND t1.Title = t2.Title;""") #delete 
                        duplicate



    self.cur.execute("""CREATE TABLE mytable_tmp select * from news_data;
                TRUNCATE TABLE news_data;
                ALTER TABLE news_data AUTO_INCREMENT = 1;
                INSERT INTO news_data(Title, Source, Date, Paragraph) 
                SELECT Title, Source, Date, Paragraph FROM mytable_tmp 
                ORDER BY id;
                DROP TABLE mytable_tmp;""") #rearrange id num


    self.cur.execute("""TRUNCATE TABLE tbp_table;
                      ALTER TABLE tbp_table AUTO_INCREMENT = 
                       1;""")


    self.cur.execute("""INSERT INTO tbp_table 
                     (Title,Source,Date,Paragraph,emiten_code,emiten_name)
                        SELECT t1.Title,t1.Source, t1.Date, 
                         t1.Paragraph,t2.emiten_code, t2.emiten_name
                        from pyproject.news_data t1
                        inner join pyproject.emiten_dict t2 ON (
                        (t1.Paragraph LIKE CONCAT('%', t2.emiten_code, 
                          '%')) OR
                         (t1.Paragraph LIKE CONCAT('%', t2.emiten_name, 
                          '%'))
                         );""")





    self.conn.commit()

但是它们出现错误mysql.connector.errors.DatabaseError:2014(HY000):命令不同步;您现在不能运行此命令

我已经尝试过使用打开/关闭连接,如下所示:

def store_db(self, item):
    self.cur.execute("""insert into news_data( Title, Source, Date, 
    Paragraph )  values (%s,%s,%s,%s)""",(
        item['title'],
        item['source'],
        item['date'],
        item['paragraph']

    ))


    self.cur.close()
    self.cur = self.conn.cursor()

    self.cur.execute("""DELETE t1 FROM news_data t1 
                        INNER JOIN news_data t2 
                        WHERE
                        t1.id > t2.id AND t1.Title = t2.Title;""") #delete 
                        duplicate

    self.cur.close()
    self.cur = self.conn.cursor()

    self.cur.execute("""CREATE TABLE mytable_tmp select * from news_data;
                TRUNCATE TABLE news_data;
                ALTER TABLE news_data AUTO_INCREMENT = 1;
                INSERT INTO news_data(Title, Source, Date, Paragraph) 
                SELECT Title, Source, Date, Paragraph FROM mytable_tmp 
                ORDER BY id;
                DROP TABLE mytable_tmp;""") #rearrange id num



    self.cur.close()
    self.cur = self.conn.cursor()

    self.cur.execute("""TRUNCATE TABLE tbp_table;
                               ALTER TABLE tbp_table AUTO_INCREMENT = 
                       1;""")

    self.cur.close()
    self.cur = self.conn.cursor()
    self.cur.execute("""INSERT INTO tbp_table 
                     (Title,Source,Date,Paragraph,emiten_code,emiten_name)
                        SELECT t1.Title,t1.Source, t1.Date, 
                         t1.Paragraph,t2.emiten_code, t2.emiten_name
                        from pyproject.news_data t1
                        inner join pyproject.emiten_dict t2 ON (
                        (t1.Paragraph LIKE CONCAT('%', t2.emiten_code, 
                          '%')) OR
                         (t1.Paragraph LIKE CONCAT('%', t2.emiten_name, 
                          '%'))
                         );""")





    self.conn.commit()

但是它一直关闭连接并产生此错误

mysql.connector.errors.ProgrammingError:光标未连接

预期结果是:所有查询都可以运行,尽管其中一些查询需要花费时间,因此抓取过程会有所延迟

0 个答案:

没有答案