在使用SQLAlchemy http://docs.sqlalchemy.org/en/latest/faq/performance.html中的性能链接中给出的批量插入代码时,sqlite工作正常并且需要时间,如文档中所述。同时为postgresql连接字符串使用相同的代码。总时间乘以很多次。
有没有办法让它在postgresql中更快?我在这做错了什么?
特别是bulk_insert_mappings和bulk_save_objects,这是我唯一可以插入370,000行的选项。
Postgresql连接字符串
connection_string = 'postgresql://' + conf.DB_USER + ':' + conf.DB_PASSWORD + '@' + \
conf.DB_HOST + ':' + conf.DB_PORT + '/' + conf.DB_NAME
用于检查效果的代码:
import time
import sqlite3
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.orm import scoped_session, sessionmaker
Base = declarative_base()
DBSession = scoped_session(sessionmaker())
engine = None
class Customer(Base):
__tablename__ = "customer"
id = Column(Integer, primary_key=True)
name = Column(String(255))
def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'):
global engine
connection_string = 'postgresql://' + 'scott' + ':' + 'tiger' + '@' + \
'localhost' + ':' + '5432' + '/' + 'test_db'
engine = create_engine(connection_string, echo=False)
DBSession.remove()
DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
def test_sqlalchemy_orm(n=100000):
init_sqlalchemy()
t0 = time.time()
for i in xrange(n):
customer = Customer()
customer.name = 'NAME ' + str(i)
DBSession.add(customer)
if i % 1000 == 0:
DBSession.flush()
DBSession.commit()
print(
"SQLAlchemy ORM: Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_orm_pk_given(n=100000):
init_sqlalchemy()
t0 = time.time()
for i in xrange(n):
customer = Customer(id=i+1, name="NAME " + str(i))
DBSession.add(customer)
if i % 1000 == 0:
DBSession.flush()
DBSession.commit()
print(
"SQLAlchemy ORM pk given: Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_orm_bulk_save_objects(n=100000):
init_sqlalchemy()
t0 = time.time()
n1 = n
while n1 > 0:
n1 = n1 - 10000
DBSession.bulk_save_objects(
[
Customer(name="NAME " + str(i))
for i in xrange(min(10000, n1))
]
)
DBSession.commit()
print(
"SQLAlchemy ORM bulk_save_objects(): Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_orm_bulk_insert(n=100000):
init_sqlalchemy()
t0 = time.time()
n1 = n
while n1 > 0:
n1 = n1 - 10000
DBSession.bulk_insert_mappings(
Customer,
[
dict(name="NAME " + str(i))
for i in xrange(min(10000, n1))
]
)
DBSession.commit()
print(
"SQLAlchemy ORM bulk_insert_mappings(): Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def test_sqlalchemy_core(n=100000):
init_sqlalchemy()
t0 = time.time()
engine.execute(
Customer.__table__.insert(),
[{"name": 'NAME ' + str(i)} for i in xrange(n)]
)
print(
"SQLAlchemy Core: Total time for " + str(n) +
" records " + str(time.time() - t0) + " secs")
def init_sqlite3(dbname):
conn = sqlite3.connect(dbname)
c = conn.cursor()
c.execute("DROP TABLE IF EXISTS customer")
c.execute(
"CREATE TABLE customer (id INTEGER NOT NULL, "
"name VARCHAR(255), PRIMARY KEY(id))")
conn.commit()
return conn
def test_sqlite3(n=100000, dbname='sqlite3.db'):
conn = init_sqlite3(dbname)
c = conn.cursor()
t0 = time.time()
for i in xrange(n):
row = ('NAME ' + str(i),)
c.execute("INSERT INTO customer (name) VALUES (?)", row)
conn.commit()
print(
"sqlite3: Total time for " + str(n) +
" records " + str(time.time() - t0) + " sec")
if __name__ == '__main__':
test_sqlalchemy_orm(100000)
test_sqlalchemy_orm_pk_given(100000)
test_sqlalchemy_orm_bulk_save_objects(100000)
test_sqlalchemy_orm_bulk_insert(100000)
test_sqlalchemy_core(100000)
test_sqlite3(100000)
输出:
SQLAlchemy ORM: Total time for 100000 records 40.6781959534 secs
SQLAlchemy ORM pk given: Total time for 100000 records 21.0855250359 secs
SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 14.068707943 secs
SQLAlchemy ORM bulk_insert_mappings(): Total time for 100000 records 11.6551070213 secs
SQLAlchemy Core: Total time for 100000 records 12.5298728943 secs
sqlite3: Total time for 100000 records 0.477468013763 sec
使用原始连接字符串(即sqlite):
engine = create_engine(dbname, echo=False)
输出:
SQLAlchemy ORM: Total time for 100000 records 16.9145789146 secs
SQLAlchemy ORM pk given: Total time for 100000 records 10.2713520527 secs
SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 3.69206118584 secs
SQLAlchemy ORM bulk_insert_mappings(): Total time for 100000 records 1.00701212883 secs
SQLAlchemy Core: Total time for 100000 records 0.467703104019 secs
sqlite3: Total time for 100000 records 0.566409826279 sec
答案 0 :(得分:0)
最快的方法是使用COPY FROM
(请参阅SQLAlchemy, Psycopg2 and Postgresql COPY),但是如果您不具有写权限,例如部署到Heroku,则可以利用Psycopg2 Fast Execution Helpers。
例如,对于批量插入或核心插入,以下内容:
engine = create_engine(
"postgresql+psycopg2://scott:tiger@host/dbname",
executemany_mode='values',
executemany_values_page_size=10000)
将时间带到:
SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 2.796818971633911 secs
SQLAlchemy ORM bulk_insert_mappings(): Total time for 100000 records 1.3805248737335205 secs
SQLAlchemy Core: Total time for 100000 records 1.1153180599212646 secs
代替
SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 9.02771282196045 secs
SQLAlchemy ORM bulk_insert_mappings(): Total time for 100000 records 7.643821716308594 secs
SQLAlchemy Core: Total time for 100000 records 7.460561275482178 secs