往返DF的MySQL熊猫

时间:2018-11-07 15:35:55

标签: python pandas dataframe pymysql

import config
import pandas as pd
import pymysql

username = config.username
dbpassword = config.dbpassword
dbhost = config.dburl
engine =  pymysql.connect(host=dbhost, port=3306,user=username,password=dbpassword,db='db',autocommit=True) 

tableBuilder1='''SELECT b.`IssueId` AS `Id`, b.`ShortId` AS `ShortId`, b.`Path` AS `Path`, b.`Data` AS `Data`,  b.`Actual Create Date` AS `Actual Create Date` FROM `SIM_FE_Audit_Data` b WHERE b.`Data` IN ( 'Open', 'Comment', 'Pending Others', 'Work in Progress', 'Resolved') AND NOT b.`IssueId` IN (SELECT c.`IssueId` FROM `SIM_FE_Audit_Data` c WHERE b.`Actual Create Date` = c.`Actual Create Date` AND b.`Data` = 'Comment' AND c.`Data` = 'Open') ORDER BY b.`IssueId`, b.`Actual Create Date`'''

df = pd.read_sql(tableBuilder1, con=engine)
df.to_sql('SIM_FE_Audit_Durations_No_First_Comment', con=engine, if_exists='replace',index=False)

正在开发上述代码来替换视图,因为视图需要花费15分钟以上的时间来渲染,并导致Tableau仪表板发生故障。第一部分是制作一系列3中的第一个表。但是,我目前在df.to_sql上收到错误DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting,但我不知道为什么会收到该错误。我做了一个print(df)来验证它正在读取sql。将其写回到新表会产生错误,我不明白为什么。

1 个答案:

答案 0 :(得分:0)

直接交换到sqlalchemy似乎已经解决了它:

import config
import pandas as pd
from sqlalchemy import create_engine
import time

username = config.username
dbpassword = config.dbpassword
dbhost = config.dburl
engine =  create_engine('mysql://%s:%s@%s/db?charset=utf8' %(username, dbpassword, dbhost), encoding="utf-8") 

tableBuilder1='''SELECT b.`IssueId` AS `Id`, b.`ShortId` AS `ShortId`, b.`Path` AS `Path`, b.`Data` AS `Data`,  b.`Actual Create Date` AS `Actual Create Date` FROM `SIM_FE_Audit_Data` b WHERE b.`Data` IN ( 'Open', 'Comment', 'Pending Others', 'Work in Progress', 'Resolved') AND NOT b.`IssueId` IN (SELECT c.`IssueId` FROM `SIM_FE_Audit_Data` c WHERE b.`Actual Create Date` = c.`Actual Create Date` AND b.`Data` = 'Comment' AND c.`Data` = 'Open') ORDER BY b.`IssueId`, b.`Actual Create Date`'''

df = pd.read_sql(tableBuilder1, con=engine)
df.to_sql('SIM_FE_Audit_Durations_No_First_Comment', con=engine, if_exists='replace')