我正在使用SQLALchemy版本1.2.0b1
我的表格看起来像这样
class Company(Base):
__tablename__ = 'company'
id = Column(Integer, primary_key=True, autoincrement=True)
cik = Column(String(10), nullable=False, index=True, unique=True)
name = Column(String(71), nullable=False)
当我在表中插入新值时,我指定了id
company=Company()
company.id =counter
company.cik = ...
company.name = ...
程序运行速度非常快。 Sqlalchemy向服务器发出的插入代码是批量插入。
如果我跳过id依赖db来生成唯一id
company=Company()
company.cik = ...
company.name = ...
代码变得和质子衰变一样慢,回声显示SQLalchemy为每个公司项目发出insert语句。没有批量插入。
有没有办法避免这种行为并依赖数据库来生成ID?
答案 0 :(得分:0)
我最终做的是上传数据上传。 首先,我创建了我计划将数据放入的表的结构副本。我是按照以下建议做到的:sqlalchemy construct new declarative class from existing
def mutate_declarative(source):
columns = []
omit_columns = ['created_at', 'updated_at']
for c in source.__table__.c:
if c.name not in omit_columns:
columns.append(((c.name,
c.type),
{'primary_key': c.primary_key,
'nullable': c.nullable,
'doc': c.doc,
'default': c.default,
'unique': c.unique,
'autoincrement': c.autoincrement}))
class Stage(get_base()):
original = source
__tablename__ = source.__tablename__ + '_staging'
__table__ = Table(source.__tablename__ + '_staging',
get_base().metadata, *[Column(*c[0], **c[1]) for c in columns])
return Stage
def create_staging_table(source):
new_class = mutate_declarative(source)
engine = get_base().metadata.bind
new_class.__table__.drop(engine, checkfirst=True)
new_class.__table__.create(engine)
return new_class
def drop_staging_table(source):
engine = get_base().metadata.bind
source.__table__.drop(engine, checkfirst=True)
enter code here
上面的代码允许我快速创建空白页面并将其用作临时存储,以使用代码中生成的密钥上传我的数据。正如我在原始问题文本中所示,这种模式相对较快。 之后,需要将登台表中的数据移动到主表中。这里的问题是我们需要将现有数据与分阶段数据对齐。这可以通过" ON DUPLICATE KEY UPDATE" MySQL支持的子句。不幸的是,SQLALchemy并不支持这一点。为了解决这个问题,我接着是SQLAlchemy ON DUPLICATE KEY UPDATE
的推荐def move_data_from_staging_to_main(session, staging, attempt_update=False):
# attempt_update controls if new data should overwrite the existing data
# if attempt_update is set to True existing data will be overwritten with new data
# otherwise presence of conflicting existing data will result in error.
main_table = staging.original.__table__
staged_table = staging.__table__
column_list = []
for column in staging.__table__.columns:
if not column.primary_key:
column_list.append(column)
staged_data = staged_table.select() #
staged_data_1 = staged_data.with_only_columns(column_list).alias("subquery1")
value_string = ''
if attempt_update:
# here we need to introduce our own language to the query because SQLAlchemy
# doesn't support ON DUPLICATE UPDATE see
# stackexchange "ON DUPLICATE KEY UPDATE in the SQL statement" and "SQLAlchemy ON DUPLICATE KEY UPDATE"
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import Insert
# we do that by introducing our own compiler modification which simply adds the string we provide as a parameter
# to the end of query.
@compiles(Insert, "mysql")
def append_string(insert, compiler, **kw):
s = compiler.visit_insert(insert, **kw)
# if our special parameter is present AND parameter's value is not None
# The presence of "mysql_appendstring" in kwargs gets stuck for some reason
# that is why additional testing for None is necessary
if ('mysql_appendstring' in insert.kwargs) and insert.kwargs['mysql_appendstring']:
return s + " " + insert.kwargs['mysql_appendstring']
return s
# Below statement is needed to silence some "dialect unknown" warning.
# Unfortunately I don't know SQLAlchemy well enough yet to explain why it is needed
Insert.argument_for("mysql", "appendstring", None)
# we need to form correct ON DUPLICATE KEY UPDATE a=values(a), b=values(b), ... string which will added
# at the end of the query to make insert query into insert_or_update_if_exists query
value_string = ' ON DUPLICATE KEY UPDATE '
value_string += ' '.join(
[c.name + "=values(" + c.name + "), " for c in staged_data_1.columns])
value_string = value_string[:-2]
insert = main_table.insert(mysql_appendstring=value_string).from_select(
[c.name for c in staged_data_1.columns],
staged_data_1.select()
)
else:
insert = main_table.insert().from_select(
[c.name for c in staged_data_1.columns],
staged_data_1.select()
)
session.execute(insert)