我用sqlalchemy(python 2.7)创建了两个表,数据库是mysql 5.5。 以下是我的代码:
engine = create_engine('mysql://root:123@localhost/test')
metadata = MetaData()
conn = engin.connect()
# For table 1:
columns = []
for i in xrange(100):
columns.append(Column('c%d' % i, TINYINT, nullable = False, server_default = '0'))
columns.append(Column('d%d' % i, SmallInteger, nullable = False, server_default = '0'))
user = Table('user', metadata, *columns)
# So user has 100 tinyint columns and 100 smallint columns.
# For table 2:
user2 = Table('user2', metadata,
Column('c0', BINARY(100), nullable = False, server_default='\0'*100),
Column('d0', BINARY(200), nullable = False, server_default='\0'*200),
)
# user2 has two columns contains 100 bytes and 200 bytes respectively.
I then inserted 4000 rows into each table. Since these two tables have same row length, I
expect the select speed will be almost the same. I ran the following test code:
s1 = select([user]).compile(engine)
s2 = select([user2]).compile(engine)
t1 = time()
result = conn.execute(s1).fetchall()
print 't1:', time() - t1
t2 = time()
result = conn.execute(s2).fetchall()
print 't2', time() - t2
The result is :
t1: 0.5120000
t2: 0.0149999
这是否意味着表中的列数会显着影响性能 SQLAlchemy?提前谢谢!
答案 0 :(得分:2)
这是否意味着表中的列数会显着影响SQLAlchemy的性能?
这是一个艰难的,它可能更多地取决于底层SQL引擎,在这种情况下MySQL
,然后实际上sqlalchemy
,这只不过是一种与不同数据库引擎交互的方式使用相同的界面。
SQLAlchemy是Python SQL工具包和对象关系映射器,它为应用程序开发人员提供了SQL的全部功能和灵活性。
它提供了一整套众所周知的企业级持久性模式,旨在实现高效,高性能的数据库访问,并采用简单的Pythonic域语言。
虽然我可能错了,但您可以尝试使用常规SQL
对其进行基准测试。
我实际上进行了一些测试...
import timeit
setup = """
from sqlalchemy import create_engine, MetaData, select, Table, Column
from sqlalchemy.dialects.sqlite import BOOLEAN, SMALLINT, VARCHAR
engine = create_engine('sqlite://', echo = False)
metadata = MetaData()
conn = engine.connect()
columns = []
for i in xrange(100):
columns.append(Column('c%d' % i, VARCHAR(1), nullable = False, server_default = '0'))
columns.append(Column('d%d' % i, VARCHAR(2), nullable = False, server_default = '00'))
user = Table('user', metadata, *columns)
user.create(engine)
conn.execute(user.insert(), [{}] * 4000)
user2 = Table('user2', metadata, Column('c0', VARCHAR(100), nullable = False, server_default = '0' * 100), \
Column('d0', VARCHAR(200), nullable = False, server_default = '0' * 200))
user2.create(engine)
conn.execute(user2.insert(), [{}] * 4000)
"""
many_columns = """
s1 = select([user]).compile(engine)
result = conn.execute(s1).fetchall()
"""
two_columns = """
s2 = select([user2]).compile(engine)
result = conn.execute(s2).fetchall()
"""
raw_many_columns = "res = conn.execute('SELECT * FROM user').fetchall()"
raw_two_columns = "res = conn.execute('SELECT * FROM user2').fetchall()"
timeit.Timer(two_columns, setup).timeit(number = 1)
timeit.Timer(raw_two_columns, setup).timeit(number = 1)
timeit.Timer(many_columns, setup).timeit(number = 1)
timeit.Timer(raw_many_columns, setup).timeit(number = 1)
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.010751008987426758
>>> timeit.Timer(raw_two_columns, setup).timeit(number = 1)
0.0099620819091796875
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.23563408851623535
>>> timeit.Timer(raw_many_columns, setup).timeit(number = 1)
0.21881699562072754
我确实发现了这个:
http://www.mysqlperformanceblog.com/2009/09/28/how-number-of-columns-affects-performance/
虽然他使用max
进行测试......但这很有意思。
我真的很喜欢sqlalchemy,所以我决定使用pythons自己的sqlite3模块进行比较
import timeit
setup = """
import sqlite3
conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('CREATE TABLE user (%s)' %\
("".join(("c%i VARCHAR(1) DEFAULT '0' NOT NULL, d%i VARCHAR(2) DEFAULT '00' NOT NULL," % (index, index) for index in xrange(99))) +\
"c99 VARCHAR(1) DEFAULT '0' NOT NULL, d99 VARCHAR(2) DEFAULT '0' NOT NULL"))
c.execute("CREATE TABLE user2 (c0 VARCHAR(100) DEFAULT '%s' NOT NULL, d0 VARCHAR(200) DEFAULT '%s' NOT NULL)" % ('0'* 100, '0'*200))
conn.commit()
c.executemany('INSERT INTO user VALUES (%s)' % ('?,' * 199 + '?'), [('0',) * 200] * 4000)
c.executemany('INSERT INTO user2 VALUES (?,?)', [('0'*100, '0'*200)] * 4000)
conn.commit()
"""
many_columns = """
r = c.execute('SELECT * FROM user')
all = r.fetchall()
"""
two_columns = """
r2 = c.execute('SELECT * FROM user2')
all = r2.fetchall()
"""
timeit.Timer(many_columns, setup).timeit(number = 1)
timeit.Timer(two_columns, setup).timeit(number = 1)
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.21009302139282227
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.0083379745483398438
并得出了相同的结果,所以我确实认为它的数据库实现不是sqlalchemy
问题。
DEFAULT INSERT
import timeit
setup = """
from sqlalchemy import create_engine, MetaData, select, Table, Column
from sqlalchemy.dialects.sqlite import BOOLEAN, SMALLINT, VARCHAR
engine = create_engine('sqlite://', echo = False)
metadata = MetaData()
conn = engine.connect()
columns = []
for i in xrange(100):
columns.append(Column('c%d' % i, VARCHAR(1), nullable = False, server_default = '0'))
columns.append(Column('d%d' % i, VARCHAR(2), nullable = False, server_default = '00'))
user = Table('user', metadata, *columns)
user.create(engine)
user2 = Table('user2', metadata, Column('c0', VARCHAR(100), nullable = False, server_default = '0' * 100), \
Column('d0', VARCHAR(200), nullable = False, server_default = '0' * 200))
user2.create(engine)
"""
many_columns = """
conn.execute(user.insert(), [{}] * 4000)
"""
two_columns = """
conn.execute(user2.insert(), [{}] * 4000)
"""
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.017949104309082031
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.047809123992919922
使用sqlite3模块进行测试。
import timeit
setup = """
import sqlite3
conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('CREATE TABLE user (%s)' %\
("".join(("c%i VARCHAR(1) DEFAULT '0' NOT NULL, d%i VARCHAR(2) DEFAULT '00' NOT NULL," % (index, index) for index in xrange(99))) +\
"c99 VARCHAR(1) DEFAULT '0' NOT NULL, d99 VARCHAR(2) DEFAULT '0' NOT NULL"))
c.execute("CREATE TABLE user2 (c0 VARCHAR(100) DEFAULT '%s' NOT NULL, d0 VARCHAR(200) DEFAULT '%s' NOT NULL)" % ('0'* 100, '0'*200))
conn.commit()
"""
many_columns = """
c.executemany('INSERT INTO user VALUES (%s)' % ('?,' * 199 + '?'), [('0', '00') * 100] * 4000)
conn.commit()
"""
two_columns = """
c.executemany('INSERT INTO user2 VALUES (?,?)', [('0'*100, '0'*200)] * 4000)
conn.commit()
"""
timeit.Timer(many_columns, setup).timeit(number = 1)
timeit.Timer(two_columns, setup).timeit(number = 1)
>>> timeit.Timer(many_columns, setup).timeit(number = 1)
0.14044189453125
>>> timeit.Timer(two_columns, setup).timeit(number = 1)
0.014360189437866211
>>>
答案 1 :(得分:1)
Samy.vilar的答案很棒。但要记住的一个关键事项是列数会对任何数据库和任何ORM的性能产生影响。您拥有的列越多,从磁盘访问和传输的数据就越多。
此外,根据查询和表结构,添加更多列可能会将查询从索引覆盖变为强制访问基表,这可能会在某些数据库和某些情况下增加大量时间。
我只使用了SQLAlchemy,但作为一名DBA,我通常建议与我合作的开发人员只查询他们需要的列,并避免使用" select *"在生产代码中,因为它可能包含比所需更多的列,并且因为它会使代码在添加到表/视图中的潜在列时变得更脆弱。