我正在使用SQLalchemy和automap以及由我无法控制的程序生成的一系列SQLite数据库。但是,我需要扩展数据库以提供其他功能,因此我使用SQLalchemy ORM修改此数据库并使用automap访问我没有更改的表和列。
类定义(保留在自己的模块中):
Base = automap_base()
class VariantAssociation(Base):
__tablename__ = "sample_variant_association"
vid = Column(Integer, ForeignKey("variants.variant_id"),
primary_key=True, index=True)
sid = Column(Integer, ForeignKey("samples.sample_id"),
primary_key=True, index=True)
vdepth = Column(Integer, index=True)
valt_depth = Column(Integer, index=True)
gt = Column(Text)
gt_type = Column(Integer)
fraction = Column(Float, index=True)
variant = relationship("Variant", back_populates="samples")
sample = relationship("Samples", back_populates="variants")
class Variant(Base):
__tablename__ = "variants"
variant_id = Column(Integer, primary_key=True)
info = deferred(Column(LargeBinary))
samples = relationship("VariantAssociation",
back_populates="variant")
compressed = False
_gt_alt_depths = None
_gt_depths = None
_gts = None
_gt_types = None
def decompress(self, compressed=False):
if compressed:
unpack = snappy_unpack_blob
else:
unpack = unpack_blob
self._gt_alt_depths = unpack(self.gt_alt_depths)
self._gt_depths = unpack(self.gt_depths)
self._gts = unpack(self.gts)
self._gt_types = unpack(self.gt_types)
def __repr__(self):
data = "<Variant {chrom}:{start}-{end} {gene} {ref}/{alt} {type}>"
return data.format(chrom=self.chrom,
start=self.start,
end=self.end,
gene=self.gene,
ref=self.ref,
alt=self.alt,
type=self.type)
@property
def alt_depths(self):
return self._gt_alt_depths
@property
def depths(self):
return self._gt_depths
@property
def genotypes(self):
return self._gts
@property
def genotype_types(self):
return self._gt_types
class Samples(Base):
__tablename__ = "samples"
sample_id = Column(Integer, primary_key=True, index=True)
name = Column(Text, index=True)
variants = relationship("VariantAssociation",
back_populates="sample")
class DataMigration(Base):
__tablename__ = "datamigration"
done = Column(Boolean, primary_key=True)
然后我用这种方法加载数据库(在一个不同的函数中,从定义表的位置导入Base
):
engine = create_engine("sqlite:///{}".format(database),
poolclass=StaticPool,
echo=echo)
# Create association table if not present
VariantAssociation.__table__.create(bind=engine, checkfirst=True)
DataMigration.__table__.create(bind=engine, checkfirst=True)
Base.prepare(engine, reflect=True)
session = sessionmaker(bind=engine)()
但是,如果我加载两个数据库,第一次泄漏到后续运行中会发生什么:
db = load_database("file1.db") # All right
db2 = load_database("file2.db") # Throws exception
有问题的例外是:
io.py in load_database(database)
150
151 # Create association table if not present
--> 152 VariantAssociation.__table__.create(bind=engine, checkfirst=True)
153 DataMigration.__table__.create(bind=engine, checkfirst=True)
154
[cut exception all into SQLalchemy internals]
OperationalError: (sqlite3.OperationalError) index ix_sample_variant_association_valt_depth already exists [SQL: 'CREATE INDEX ix_sample_variant_association_valt_depth ON sample_variant_association (valt_depth)']
如果我在一个单独的会话中加载数据库,则不会发生这种情况,因此db
的某个状态正在泄漏到db2
。
确保每个加载的数据库ORM独立的最佳方法是什么?
编辑:更多调试显示CREATE INDEX
被使用了两次:
2017-09-15 15:18:00,284 INFO sqlalchemy.engine.base.Engine ()
2017-09-15 15:18:00,417 INFO sqlalchemy.engine.base.Engine COMMIT
2017-09-15 15:18:00,418 INFO sqlalchemy.engine.base.Engine CREATE INDEX
ix_sample_variant_association_valt_depth ON sample_variant_association
(valt_depth)
2017-09-15 15:18:00,418 INFO sqlalchemy.engine.base.Engine ()
2017-09-15 15:18:00,550 INFO sqlalchemy.engine.base.Engine COMMIT
2017-09-15 15:18:00,551 INFO sqlalchemy.engine.base.Engine CREATE INDEX
ix_sample_variant_association_valt_depth ON sample_variant_association
(valt_depth)