我尝试使用 Python |将我的数百万条记录从 CSV 文件插入 MySQL 数据库带有 sqlalchemy 的 Pandas 。有些时候这个插入在完成之前被中断,或者甚至没有将单行插入数据库。
我的代码是:
import pandas as pd
from sqlalchemy import create_engine
df = pd.read_csv('/home/shankar/LAB/Python/Rough/*******.csv')
# 2nd argument replaces where conditions is False
df = df.where(pd.notnull(df), None)
df.head()
conn_str = "mysql+pymysql://root:MY_PASS@localhost/MY_DB?charset=utf8&use_unicode=0"
engine = create_engine(conn_str)
conn = engine.raw_connection()
df.to_sql(name='table_name', con=conn,
if_exists='append')
conn.close()
错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1563 else:
-> 1564 cur.execute(*args)
1565 return cur
/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args)
164
--> 165 query = self.mogrify(query, args)
166
/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args)
143 if args is not None:
--> 144 query = query % self._escape_args(args, conn)
145
TypeError: not all arguments converted during string formatting
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last)
<ipython-input-6-bb91db9eb97e> in <module>()
11 df.to_sql(name='company', con=conn,
12 if_exists='append',
---> 13 chunksize=10000)
14 conn.close()
/home/shankar/.local/lib/python3.5/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
1163 sql.to_sql(self, name, con, flavor=flavor, schema=schema,
1164 if_exists=if_exists, index=index, index_label=index_label,
-> 1165 chunksize=chunksize, dtype=dtype)
1166
1167 def to_pickle(self, path):
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
569 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
570 index_label=index_label, schema=schema,
--> 571 chunksize=chunksize, dtype=dtype)
572
573
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1659 if_exists=if_exists, index_label=index_label,
1660 dtype=dtype)
-> 1661 table.create()
1662 table.insert(chunksize)
1663
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in create(self)
688
689 def create(self):
--> 690 if self.exists():
691 if self.if_exists == 'fail':
692 raise ValueError("Table '%s' already exists." % self.name)
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in exists(self)
676
677 def exists(self):
--> 678 return self.pd_sql.has_table(self.name, self.schema)
679
680 def sql_schema(self):
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in has_table(self, name, schema)
1674 query = flavor_map.get(self.flavor)
1675
-> 1676 return len(self.execute(query, [name, ]).fetchall()) > 0
1677
1678 def get_table(self, table_name, schema=None):
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1574 ex = DatabaseError(
1575 "Execution failed on sql '%s': %s" % (args[0], exc))
-> 1576 raise_with_traceback(ex)
1577
1578 @staticmethod
/home/shankar/.local/lib/python3.5/site-packages/pandas/compat/__init__.py in raise_with_traceback(exc, traceback)
331 if traceback == Ellipsis:
332 _, _, traceback = sys.exc_info()
--> 333 raise exc.with_traceback(traceback)
334 else:
335 # this version of raise is a syntax error in Python 3
/home/shankar/.local/lib/python3.5/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
1562 cur.execute(*args, **kwargs)
1563 else:
-> 1564 cur.execute(*args)
1565 return cur
1566 except Exception as exc:
/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in execute(self, query, args)
163 pass
164
--> 165 query = self.mogrify(query, args)
166
167 result = self._query(query)
/home/shankar/.local/lib/python3.5/site-packages/pymysql/cursors.py in mogrify(self, query, args)
142
143 if args is not None:
--> 144 query = query % self._escape_args(args, conn)
145
146 return query
DatabaseError: Execution failed on sql 'SELECT name FROM sqlite_master WHERE type='table' AND name=?;': not all arguments converted during string formatting
此错误仅在某些CSV文件中发生。 请告知我的错误!
先谢谢。
答案 0 :(得分:0)
从引发的错误中,您的参数不是所有字符串,因此您需要做的是将数据帧的每个元素转换为字符串。 df =df.astype(str)