YAML - 序列化属性类型

时间:2017-08-18 12:05:49

标签: python-3.x serialization yaml ruamel.yaml

我遇到了YAML序列化类,这些类具有类型引用作为成员。我正在使用ruamel.yaml的安全加载程序。

我从REPL提示符中运行了以下所有内容(以获得多个错误)。

初​​始化:

import sys
from ruamel.yaml import YAML, yaml_object

Y = YAML(typ="safe",pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type
    def f(self):
        return self.type()
    pass

class T1(object):
    """This will be referenced."""
    pass

@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    pass

class T3(object):
    """Yet another try"""
    pass
Y.register_class(T3.__class__)

导致失败的代码:

Y.dump(A(T1), sys.stdout)
Y.dump(A(T2), sys.stdout)
Y.dump(A(T3), sys.stdout)
Y.dump(A(int), sys.stdout)

此输出(仅最后一行追溯):

ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T1' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T2' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T3' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__abs__' of 'int' objects>

任何允许我(安全地)唯一保存类型的解决方案(我需要生成类型的对象并检查传入对象是否属于某种类型)将不胜感激。一个函数或生成我所需类型的类也会遇到无法序列化的问题。

P.S。我也可能发现了一个错误,由于某种原因,解析器会根据是否(尝试)序列化相同的有效参数而具有不同的行为。

Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)

输出:

>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 352, in dump
    return self.dump_all([data], stream, _kw, transform=transform)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 383, in dump_all
    self.representer.represent(data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 73, in represent
    node = self.represent_data(data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
    node = self.yaml_representers[data_types[0]](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 552, in t_y
    tag, data, cls, flow_style=representer.default_flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
    return self.represent_mapping(tag, state, flow_style=flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
    node_value = self.represent_data(item_value)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
    node = self.yaml_representers[data_types[0]](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 492, in t_y
    tag, data, cls, flow_style=representer.default_flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
    return self.represent_mapping(tag, state, flow_style=flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
    node_value = self.represent_data(item_value)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 111, in represent_data
    node = self.yaml_representers[None](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 375, in represent_undefined
    raise RepresenterError("cannot represent an object: %s" % data)
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
# same traceback here
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> 

2 个答案:

答案 0 :(得分:1)

YAML期望转储对象,并最终通过写出标量字符串来实现。 T1不是对象(也不是T2T3),而问题来自于此。您可以尝试将每个类引用到对象中并在其上使用标记,但IMO只会使事情复杂化。

最终归结为获取标量表示,即将类的字符串表示形式添加到文件中,因此您也可以调整A()直接转储字符串表示并将其读回:

import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString


Y = YAML(typ="safe", pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type  #.__class__.__name__

    @classmethod
    def to_yaml(cls, representer, node):
        return representer.represent_scalar(
            cls.yaml_tag, u'{}'.format(node.type.__name__)
        )

    @classmethod
    def from_yaml(cls, constructor, node):
        if '.' in node.value:  # in some other module
            m, n = node.value.rsplit('.', 1)
            return cls(getattr(sys.modules[m], n))
        else:
            return cls(globals()[node.value])


class T1(object):
    """This will be referenced."""
    pass


@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    pass


class T3(object):
    """Yet another try"""
    pass
Y.register_class(T3)


for t in T1, T2, T3, DoubleQuotedScalarString:
    print('----------------------')
    x = StringIO()
    s = A(t)
    print('s', s.type)
    Y.dump(s, x)
    print(x.getvalue())

    d = Y.load(x.getvalue())
    print('d', d.type)

给出:

----------------------
s <class '__main__.T1'>
!Aclass T1
...

d <class '__main__.T1'>
----------------------
s <class '__main__.T2'>
!Aclass T2
...

d <class '__main__.T2'>
----------------------
s <class '__main__.T3'>
!Aclass T3
...

d <class '__main__.T3'>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...

d <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>

如果A()上有其他需要转储/加载的属性,你应该创建一个字典(字符串转换为.type)并转储/加载。

我不认为您发现了一个真正的错误,但是您在错误发生后继续遇到副作用:Y对象(及其组件)处于未定义状态。捕获错误后,您不应重用YAML()实例。这应该在文档中更清楚。因此,如果您想在for循环中执行try/except,则应移动Y = YAML(typ='safe', pure=True)部分中的try

答案 1 :(得分:0)

添加到Anthon's answer,我开始修改A.from_yaml以更安全,但我还没有完成_check_registered()的所有情况。我们的想法是加载Y允许加载实例的所有类型,并阻止所有其他类型。将此视为WIP:

import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString


Y = YAML(typ="safe", pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type  #.__class__.__name__

    @classmethod
    def to_yaml(cls, representer, node):
        return representer.represent_scalar(
            cls.yaml_tag, u'{}'.format(node.type.__name__)
        )

    @classmethod
    def from_yaml(cls, constructor, node):
        if '.' in node.value:  # in some other module
            m, n = node.value.rsplit('.', 1)
            t = getattr(sys.modules[m], n)
        else:
            t = globals()[node.value]
        cls._check_registered(t,constructor, node)
        return cls(t)

    @classmethod
    def _check_registered(cls, t, constructor, node):
        # Check if type "t" is registered in "constr"
        # Note: only a very basic check, 
        # and ideally should be made more secure

        if hasattr(t,"yaml_tag"):
            if t.yaml_tag in constructor.yaml_constructors: 

                return
            raise Exception("Error: Tag not registered!")
        else:
            #
            raise Exception("Error: No attribute 'yaml_tag'!")
        pass

    pass

class T1(object):
    """This will be referenced."""
    yaml_tag = u"!T1"
    pass


@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    yaml_tag = u"!T2"

    def __init__(self):
        print("Initializing...")
        pass
    pass

class T2_bad(object):
    """Malicious class impersonating T2"""
    # Note: It's not registered
    yaml_tag = u"!T2"

    def __init__(self):
        print("Evil code here!")
        pass

    pass


class T3(object):
    """Yet another try"""
    yaml_tag = u"!T3"
    pass
Y.register_class(T3)



for t in T1, T2, T2_bad, T3, DoubleQuotedScalarString:
    try:
        print('----------------------')
        x = StringIO()
        s = A(t)
        print('s', s.type)
        Y.dump(s, x)
        print(x.getvalue())
        d = Y.load(x.getvalue())
        print('d', d.type)
        d.type()
    except Exception as e:
        print(e)
        continue
    pass

返回:

----------------------
s <class '__main__.T1'>
!Aclass T1
...

Error: Tag not registered!
----------------------
s <class '__main__.T2'>
!Aclass T2
...

d <class '__main__.T2'>
Initializing...
<__main__.T2 object at 0x0000015B8EC82F60>
----------------------
s <class '__main__.T2_bad'>
!Aclass T2_bad
...

d <class '__main__.T2_bad'>
Evil code here!
<__main__.T2_bad object at 0x0000015B8EC82EF0>
----------------------
s <class '__main__.T3'>
!Aclass T3
...

d <class '__main__.T3'>
<__main__.T3 object at 0x0000015B8EC82E10>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...

Error: No attribute 'yaml_tag'!

可以看出,它仍然不安全(运行“邪恶代码”),也没有定义没有yaml_tag的类型。随意修改以解决此问题。