我遇到了YAML序列化类,这些类具有类型引用作为成员。我正在使用ruamel.yaml的安全加载程序。
我从REPL提示符中运行了以下所有内容(以获得多个错误)。
初始化:
import sys
from ruamel.yaml import YAML, yaml_object
Y = YAML(typ="safe",pure=True)
# ==============
@yaml_object(Y)
class A(object):
"""Object I want to serialize"""
yaml_tag = "!Aclass"
def __init__(self, type):
self.type = type
def f(self):
return self.type()
pass
class T1(object):
"""This will be referenced."""
pass
@yaml_object(Y)
class T2(object):
"""Another referenced object"""
pass
class T3(object):
"""Yet another try"""
pass
Y.register_class(T3.__class__)
导致失败的代码:
Y.dump(A(T1), sys.stdout)
Y.dump(A(T2), sys.stdout)
Y.dump(A(T3), sys.stdout)
Y.dump(A(int), sys.stdout)
此输出(仅最后一行追溯):
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T1' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T2' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T3' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__abs__' of 'int' objects>
任何允许我(安全地)唯一保存类型的解决方案(我需要生成类型的对象并检查传入对象是否属于某种类型)将不胜感激。一个函数或生成我所需类型的类也会遇到无法序列化的问题。
P.S。我也可能发现了一个错误,由于某种原因,解析器会根据是否(尝试)序列化相同的有效参数而具有不同的行为。
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
输出:
>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 352, in dump
return self.dump_all([data], stream, _kw, transform=transform)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 383, in dump_all
self.representer.represent(data)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 73, in represent
node = self.represent_data(data)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
node = self.yaml_representers[data_types[0]](self, data)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 552, in t_y
tag, data, cls, flow_style=representer.default_flow_style)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
return self.represent_mapping(tag, state, flow_style=flow_style)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
node_value = self.represent_data(item_value)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
node = self.yaml_representers[data_types[0]](self, data)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 492, in t_y
tag, data, cls, flow_style=representer.default_flow_style)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
return self.represent_mapping(tag, state, flow_style=flow_style)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
node_value = self.represent_data(item_value)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 111, in represent_data
node = self.yaml_representers[None](self, data)
File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 375, in represent_undefined
raise RepresenterError("cannot represent an object: %s" % data)
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
# same traceback here
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>>
答案 0 :(得分:1)
YAML期望转储对象,并最终通过写出标量字符串来实现。 T1
不是对象(也不是T2
或T3
),而问题来自于此。您可以尝试将每个类引用到对象中并在其上使用标记,但IMO只会使事情复杂化。
最终归结为获取标量表示,即将类的字符串表示形式添加到文件中,因此您也可以调整A()
直接转储字符串表示并将其读回:
import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString
Y = YAML(typ="safe", pure=True)
# ==============
@yaml_object(Y)
class A(object):
"""Object I want to serialize"""
yaml_tag = "!Aclass"
def __init__(self, type):
self.type = type #.__class__.__name__
@classmethod
def to_yaml(cls, representer, node):
return representer.represent_scalar(
cls.yaml_tag, u'{}'.format(node.type.__name__)
)
@classmethod
def from_yaml(cls, constructor, node):
if '.' in node.value: # in some other module
m, n = node.value.rsplit('.', 1)
return cls(getattr(sys.modules[m], n))
else:
return cls(globals()[node.value])
class T1(object):
"""This will be referenced."""
pass
@yaml_object(Y)
class T2(object):
"""Another referenced object"""
pass
class T3(object):
"""Yet another try"""
pass
Y.register_class(T3)
for t in T1, T2, T3, DoubleQuotedScalarString:
print('----------------------')
x = StringIO()
s = A(t)
print('s', s.type)
Y.dump(s, x)
print(x.getvalue())
d = Y.load(x.getvalue())
print('d', d.type)
给出:
----------------------
s <class '__main__.T1'>
!Aclass T1
...
d <class '__main__.T1'>
----------------------
s <class '__main__.T2'>
!Aclass T2
...
d <class '__main__.T2'>
----------------------
s <class '__main__.T3'>
!Aclass T3
...
d <class '__main__.T3'>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...
d <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
如果A()
上有其他需要转储/加载的属性,你应该创建一个字典(字符串转换为.type
)并转储/加载。
我不认为您发现了一个真正的错误,但是您在错误发生后继续遇到副作用:Y
对象(及其组件)处于未定义状态。捕获错误后,您不应重用YAML()
实例。这应该在文档中更清楚。因此,如果您想在for循环中执行try/except
,则应移动Y = YAML(typ='safe', pure=True)
部分中的try
。
答案 1 :(得分:0)
添加到Anthon's answer,我开始修改A.from_yaml以更安全,但我还没有完成_check_registered()的所有情况。我们的想法是加载Y允许加载实例的所有类型,并阻止所有其他类型。将此视为WIP:
import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString
Y = YAML(typ="safe", pure=True)
# ==============
@yaml_object(Y)
class A(object):
"""Object I want to serialize"""
yaml_tag = "!Aclass"
def __init__(self, type):
self.type = type #.__class__.__name__
@classmethod
def to_yaml(cls, representer, node):
return representer.represent_scalar(
cls.yaml_tag, u'{}'.format(node.type.__name__)
)
@classmethod
def from_yaml(cls, constructor, node):
if '.' in node.value: # in some other module
m, n = node.value.rsplit('.', 1)
t = getattr(sys.modules[m], n)
else:
t = globals()[node.value]
cls._check_registered(t,constructor, node)
return cls(t)
@classmethod
def _check_registered(cls, t, constructor, node):
# Check if type "t" is registered in "constr"
# Note: only a very basic check,
# and ideally should be made more secure
if hasattr(t,"yaml_tag"):
if t.yaml_tag in constructor.yaml_constructors:
return
raise Exception("Error: Tag not registered!")
else:
#
raise Exception("Error: No attribute 'yaml_tag'!")
pass
pass
class T1(object):
"""This will be referenced."""
yaml_tag = u"!T1"
pass
@yaml_object(Y)
class T2(object):
"""Another referenced object"""
yaml_tag = u"!T2"
def __init__(self):
print("Initializing...")
pass
pass
class T2_bad(object):
"""Malicious class impersonating T2"""
# Note: It's not registered
yaml_tag = u"!T2"
def __init__(self):
print("Evil code here!")
pass
pass
class T3(object):
"""Yet another try"""
yaml_tag = u"!T3"
pass
Y.register_class(T3)
for t in T1, T2, T2_bad, T3, DoubleQuotedScalarString:
try:
print('----------------------')
x = StringIO()
s = A(t)
print('s', s.type)
Y.dump(s, x)
print(x.getvalue())
d = Y.load(x.getvalue())
print('d', d.type)
d.type()
except Exception as e:
print(e)
continue
pass
返回:
----------------------
s <class '__main__.T1'>
!Aclass T1
...
Error: Tag not registered!
----------------------
s <class '__main__.T2'>
!Aclass T2
...
d <class '__main__.T2'>
Initializing...
<__main__.T2 object at 0x0000015B8EC82F60>
----------------------
s <class '__main__.T2_bad'>
!Aclass T2_bad
...
d <class '__main__.T2_bad'>
Evil code here!
<__main__.T2_bad object at 0x0000015B8EC82EF0>
----------------------
s <class '__main__.T3'>
!Aclass T3
...
d <class '__main__.T3'>
<__main__.T3 object at 0x0000015B8EC82E10>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...
Error: No attribute 'yaml_tag'!
可以看出,它仍然不安全(运行“邪恶代码”),也没有定义没有yaml_tag的类型。随意修改以解决此问题。