如何顺利地以pythonic方式集成SQLAlchemy和子类Numpy.ndarray?

时间:2012-01-20 11:35:25

标签: python numpy sqlalchemy

我想在关系数据库中通过SQLAlchemy存储带注释的NumPy数组(如name)。为此,

  • 我通过数据传输对象(DTONumpy作为MyNumpy的一部分)将NumPy数组与其数据分开。
  • 使用Container收集NumPy对象。

修改Container(从下面的示例中)以一种直接列出MyNumpy对象而不是DTONumpy提供的方式修改import numpy as np import zlib import sqlalchemy as sa from sqlalchemy.orm import relationship, scoped_session, sessionmaker from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.types import TypeDecorator, CHAR DBSession = scoped_session(sessionmaker()) Base = declarative_base() #### New SQLAlchemy-Type ##################### class NumpyType (sa.types.TypeDecorator): impl = sa.types.LargeBinary def process_bind_param(self, value, dialect): return zlib.compress(value.dumps(), 9) def process_result_value(self, value, dialect): return np.loads(zlib.decompress(value)) ############################################## class DTONumpy(Base): __tablename__ = 'dtos_numpy' id = sa.Column(sa.Integer, primary_key=True) amount = sa.Column('amount', NumpyType) name = sa.Column('name', sa.String, default='') container_id = sa.Column(sa.ForeignKey('containers.id')) container_object = relationship( "Container", uselist=False, backref='dto_numpy_objects' ) def __init__(self, amount, name=None): self.amount = np.array(amount) self.name = name class Container(Base): __tablename__ = 'containers' id = sa.Column(sa.Integer, primary_key=True) name = sa.Column(sa.String, unique=True) # HERE: how to access DTONumpy BUT as MyNumpy objects in a way that MyNumpy # is smoothly integrated into SQLAlchemy? class MyNumpy(np.ndarray): _DTO = DTONumpy def __new__(cls, amount, name=''): dto = cls._DTO(amount=amount, name=name) return cls.newByDTO(dto) @classmethod def newByDTO(cls, dto): obj = np.array(dto.amount).view(cls) obj.setflags(write=False) # Immutable obj._dto = dto return obj @property def name(self): return self._dto.name if __name__ == '__main__': engine = sa.create_engine('sqlite:///:memory:', echo=True) DBSession.configure(bind=engine) Base.metadata.create_all(engine) session = DBSession() mn1 = MyNumpy ([1,2,3], "good data") mn2 = MyNumpy ([2,3,4], "bad data") # Save MyNumpy objects c1 = Container() c1.name = "Test-Container" c1.dto_numpy_objects += [mn1._dto, mn2._dto] # not a good ui session.add(c1) session.commit() # Load MyNumpy objects c2 = session.query(Container).filter_by(name="Test-Container").first() # Ugly UI: mn3 = MyNumpy.newByDTO(c2.dto_numpy_objects[0]) mn4 = MyNumpy.newByDTO(c2.dto_numpy_objects[1]) name3 = mn3._dto.name name4 = mn4._dto.name 的好方法和pythonic方法是什么?通过SQLAlchemy?

以下是问题的说明:

Container

MyNumpy现在应该提供MyNumpy个对象列表和Container对相应type (c2.my_numpy_objects[0]) == MyNumpy >>> True c2.my_numpy_objects.append(MyNumpy ([7,2,5,6], "new data") print c2.dto_numpy_objects[-1].name >>> "new data" 对象的引用(列表和引用必须采用SQLAlchemy映射)考虑到):

{{1}}

1 个答案:

答案 0 :(得分:6)

使用ListView - 来自that问题的答案,我提出了以下解决方案:

首先,通过在SQLAlchemy-property Container之上添加ListView - 属性来修改dto_numpy_objects

  def __init__(self, name):
    self.name = name
    """
    At this point, the following code doesn't work:
    ---------------------
    self.my_numpies = ListView(
        self.dto_numpy_objects, # see `DTO_Numpy.container_object`
        MyNumpy.newByDTO,
        MyNumpy.getDTO)
    ---------------------
    SQLAlchemy seems to change the `dto_numypy_object`-object after the
    init-call. Thus, `my_numpies._data` doesn't reference `dto_numpy_objects`
    anymore. One solution is to implement a property that initalizes `ListView`
    on first access. See below, property `Container.my_numpies`.
    """

  @property
  def my_numpies(self):
    if not hasattr(self, '_my_numpies'):
      # The following part can not be exe
      self._my_numpies = ListView(
          self.dto_numpy_objects, # see `DTO_Numpy.container_object`
          MyNumpy.newByDTO,
          MyNumpy.getDTO)

    return self._my_numpies

其次,添加方法getDTO,可用作new2raw - 转换器 MyNumpy

  def getDTO(self):
    return self._dto

为了使用来自container_object backref MyNumpy,可以通过添加以下方法将其实现为包装:

  def __getattr__(self, attr):
    return getattr(self._dto, attr)

总之,代码看起来像这样:

import numpy as np
import zlib

import sqlalchemy as sa
from sqlalchemy.orm import relationship, scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import TypeDecorator, CHAR

DBSession = scoped_session(sessionmaker())
Base = declarative_base()


class ListView(list):
  def __init__(self, raw_list, raw2new, new2raw):
    self._data = raw_list
    self.converters = {'raw2new': raw2new,
        'new2raw': new2raw}

  def __repr__(self):
    repr_list = [self.converters['raw2new'](item) for item in self._data]
    repr_str = "["
    for element in repr_list:
      repr_str += element.__repr__() + ",\n "
    repr_str = repr_str[:-3] + "]"
    return repr_str

  def append(self, item):
    self._data.append(self.converters['new2raw'](item))

  def pop(self, index):
    self._data.pop(index)

  def __getitem__(self, index):
    return self.converters['raw2new'](self._data[index])

  def __setitem__(self, key, value):
    self._data.__setitem__(key, self.converters['new2raw'](value))

  def __delitem__(self, key):
    return self._data.__delitem__(key)

  def __getslice__(self, i, j):
    return ListView(self._data.__getslice__(i,j), **self.converters)

  def __contains__(self, item):
    return self._data.__contains__(self.converters['new2raw'](item))

  def __add__(self, other_list_view):
    assert self.converters == other_list_view.converters
    return ListView(
        self._data + other_list_view._data,
        **self.converters)

  def __len__(self):
    return len(self._data)

  def __iter__(self):
    return iter([self.converters['raw2new'](item) for item in self._data])

  def __eq__(self, other):
    return self._data == other._data


#### New SQLAlchemy-Type #####################
class NumpyType (sa.types.TypeDecorator):
  impl = sa.types.LargeBinary

  def process_bind_param(self, value, dialect):
    return zlib.compress(value.dumps(), 9)

  def process_result_value(self, value, dialect):
    return np.loads(zlib.decompress(value))
##############################################


class DTONumpy(Base):
  __tablename__ = 'dtos_numpy'
  id = sa.Column(sa.Integer, primary_key=True)
  amount = sa.Column('amount', NumpyType)
  name = sa.Column('name', sa.String, default='')
  container_id = sa.Column(sa.ForeignKey('containers.id'))

  container_object = relationship(
      "Container",
      uselist=False,
      backref='dto_numpy_objects'
      )

  def __init__(self, amount, name=None):
    self.amount = np.array(amount)
    self.name = name

  def reprInitParams(self):
    return "(%r, %r)" %(self.amount, self.name)

  def __repr__(self):
    return "%s%s" %(
        self.__class__.__name__,
        self.reprInitParams())


class Container(Base):
  __tablename__ = 'containers'
  id = sa.Column(sa.Integer, primary_key=True)
  name = sa.Column(sa.String, unique=True)

  def __init__(self, name):
    self.name = name
    super(Container, self).__init__()

  @property
  def my_numpies(self):
    if not hasattr(self, '_my_numpies'):
      # The following part can not be exe
      self._my_numpies = ListView(
          self.dto_numpy_objects, # see `DTO_Numpy.container_object`
          MyNumpy.newByDTO,
          MyNumpy.getDTO)

    return self._my_numpies


class MyNumpy(np.ndarray):
  _DTO = DTONumpy
  def __new__(cls, amount, name=''):
    dto = cls._DTO(amount=amount, name=name)
    return cls.newByDTO(dto)

  @classmethod
  def newByDTO(cls, dto):
    obj = np.array(dto.amount).view(cls)
    obj.setflags(write=False) # Immutable
    obj._dto = dto
    return obj

  @property
  def name(self):
    return self._dto.name

  def getDTO(self):
    return self._dto

  def __getattr__(self, attr):
    return getattr(self._dto, attr)

  def __repr__(self):
    return "%s%s" %(
        self.__class__.__name__,
        self._dto.reprInitParams())


if __name__ == '__main__':
  engine = sa.create_engine('sqlite:///:memory:', echo=True)
  DBSession.configure(bind=engine)
  Base.metadata.create_all(engine)
  session = DBSession()

  mn1 = MyNumpy ([1,2,3], "good data")
  mn2 = MyNumpy ([2,3,4], "bad data")

  # Save MyNumpy-Objects
  c1 = Container("Test-Container")
  c1.my_numpies.append(mn1)
  c1.my_numpies.append(mn2)
  session.add(c1)
  session.commit()

  # Load MyNumpy-Objects
  c2 = session.query(Container).filter_by(name="Test-Container").first()
  mn3 = c1.my_numpies[0]
  mn4 = c1.my_numpies[1]

为了更好的代表性,我添加了

  • DTONumpy.reprInitParams
  • DTONumpy.__repr__
  • MyNumpy.__repr__

有一件事仍然无效:

  c1.my_numpies += [mn1, mn2.dto]