我有一张表dna_extraction_protocols
,其中包含有关DNA提取方案的数据。存储在Incubation
表上的incubations
个对象占用了许多密钥。孵化具有duration_unit
密钥,其中包含MeasurementUnit
对象,该对象包含在measurement_units
表中。
这些表的创建如下:
class DNAExtractionProtocol(Protocol):
__tablename__ = 'dna_extraction_protocols'
__mapper_args__ = {'polymorphic_identity': 'dna_extraction'}
id = Column(Integer, ForeignKey('protocols.id'), primary_key=True)
sample_mass = Column(Float)
mass_unit_id = Column(String, ForeignKey('measurement_units.id'))
mass_unit = relationship("MeasurementUnit", foreign_keys=[mass_unit_id])
digestion_buffer_id = Column(String, ForeignKey("solutions.id"))
digestion_buffer = relationship("Solution", foreign_keys=[digestion_buffer_id])
digestion_buffer_volume = Column(Float)
digestion_id = Column(Integer, ForeignKey("incubations.id"))
digestion = relationship("Incubation", foreign_keys=[digestion_id])
lysis_buffer_id = Column(String, ForeignKey("solutions.id"))
lysis_buffer = relationship("Solution", foreign_keys=[lysis_buffer_id])
lysis_buffer_volume = Column(Float)
lysis_id = Column(Integer, ForeignKey("incubations.id"))
lysis = relationship("Incubation", foreign_keys=[lysis_id])
proteinase_id = Column(String, ForeignKey("solutions.id"))
proteinase = relationship("Solution", foreign_keys=[proteinase_id])
proteinase_volume = Column(Float)
inactivation_id = Column(Integer, ForeignKey("incubations.id"))
inactivation = relationship("Incubation", foreign_keys=[inactivation_id])
cooling_id = Column(Integer, ForeignKey("incubations.id"))
cooling = relationship("Incubation", foreign_keys=[cooling_id])
centrifugation_id = Column(Integer, ForeignKey("incubations.id"))
centrifugation = relationship("Incubation", foreign_keys=[centrifugation_id])
volume_unit_id = Column(String, ForeignKey('measurement_units.id'))
volume_unit = relationship("MeasurementUnit", foreign_keys=[volume_unit_id])
class Incubation(Base):
__tablename__ = "incubations"
id = Column(Integer, primary_key=True)
speed = Column(Float)
duration = Column(Float)
temperature = Column(Float)
movement = Column(String) # "centrifuge" or "shake"
#speed - usually in RPM - will refer to either centrifugation or shaking (See above)
speed_unit_id = Column(String, ForeignKey('measurement_units.id'))
speed_unit = relationship("MeasurementUnit", foreign_keys=[speed_unit_id])
duration_unit_id = Column(String, ForeignKey('measurement_units.id'))
duration_unit = relationship("MeasurementUnit", foreign_keys=[duration_unit_id])
temperature_unit_id = Column(String, ForeignKey('measurement_units.id'))
temperature_unit = relationship("MeasurementUnit", foreign_keys=[temperature_unit_id]
class MeasurementUnit(Base):
__tablename__ = "measurement_units"
id = Column(Integer, primary_key=True)
code = Column(String, unique=True)
long_name = Column(String)
siunitx = Column(String)
现在,我想提取一个Pandas数据框,我可以在其中获取DNAPurificationProtocol
对象,链接的Incubation
对象和链接的MeasurementUnit
对象的所有属性
我尝试了很多方法,这个方法似乎很适合第一种关系:
sql_query = session.query(DNAExtractionProtocol, MeasurementUnit, Incubation) \
.join(MeasurementUnit, MeasurementUnit.id == DNAExtractionProtocol.volume_unit_id) \
.join(Incubation, Incubation.id == DNAExtractionProtocol.lysis_id) \
.filter(tables[table].code == code)
但对我来说,感觉就像是一个合理的延伸:
sql_query = session.query(DNAExtractionProtocol, MeasurementUnit, Incubation) \
.join(MeasurementUnit, MeasurementUnit.id == DNAExtractionProtocol.volume_unit_id) \
.join(Incubation, Incubation.id == DNAExtractionProtocol.lysis_id) \
.join(MeasurementUnit, MeasurementUnit.id == Incubation.temperature_unit_id) \
.filter(tables[table].code == code)
失败:
sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) ambiguous column name: measurement_units.id [SQL: u'SELECT protocols.type, dna_extraction_protocols.id, protocols.id, protocols.code, protocols.name, dna_extraction_protocols.sample_mass, dna_extraction_protocols.mass_unit_id, dna_extraction_protocols.digestion_buffer_id, dna_extraction_protocols.digestion_buffer_volume, dna_extraction_protocols.digestion_id, dna_extraction_protocols.lysis_buffer_id, dna_extraction_protocols.lysis_buffer_volume, dna_extraction_protocols.lysis_id, dna_extraction_protocols.proteinase_id, dna_extraction_protocols.proteinase_volume, dna_extraction_protocols.inactivation_id, dna_extraction_protocols.cooling_id, dna_extraction_protocols.centrifugation_id, dna_extraction_protocols.volume_unit_id, measurement_units.id, measurement_units.code, measurement_units.long_name, measurement_units.siunitx, incubations.id, incubations.speed, incubations.duration, incubations.temperature, incubations.movement, incubations.speed_unit_id, incubations.duration_unit_id, incubations.temperature_unit_id \nFROM protocols JOIN dna_extraction_protocols ON protocols.id = dna_extraction_protocols.id JOIN measurement_units ON measurement_units.id = dna_extraction_protocols.volume_unit_id JOIN incubations ON incubations.id = dna_extraction_protocols.lysis_id JOIN measurement_units ON measurement_units.id = incubations.temperature_unit_id \nWHERE protocols.code = ?'] [parameters: ('EPDqEP',)]
知道我怎么能得到我想要的东西?
答案 0 :(得分:1)
问题的核心在于你要加入同一张桌子两次。在SQL-land中,解决这个问题的方法是将其中一个别名:
SELECT * FROM protocols
JOIN dna_extraction_protocols ON ...
JOIN measurement_units ON ...
JOIN incubations ON ...
JOIN measurement_units AS incubation_measurement_units ON incubation_measurement_units.id = incubations.temperature_unit_id
同样的事情:
sql_query = session.query(DNAExtractionProtocol, MeasurementUnit, Incubation) \
.join(MeasurementUnit, ...) \
.join(Incubation, ...) \
.join(MeasurementUnit, ..., aliased=True) \
.filter(tables[table].code == code)
如果您需要从别名表中返回列或过滤它们,您将遇到问题,因为您将无法消除两者之间的歧义。在这种情况下,您需要加入明确的aliased()
构造。
IncubationMeasurementUnit = aliased(MeasurementUnit)
sql_query = session.query(DNAExtractionProtocol, MeasurementUnit, Incubation, IncubationMeasurementUnit) \
.join(MeasurementUnit, ...) \
.join(Incubation, ...) \
.join(IncubationMeasurementUnit, ...) \
.filter(tables[table].code == code)