在迭代复杂字典

时间:2016-04-13 14:11:44

标签: python list dictionary reference assign

如果有人能帮我解决这个小问题,我将不胜感激。我想迭代复杂的Python数据结构(dict,list,tuple,strings,bytes,...)并用base64编码版本替换所有字节(字节字符串)。这是将原始复杂数据结构转换为JSON(例如json.dumps(complex_data_structure))所必需的,因为JSON不支持二进制数据。我的代码已经做了正确的事情,但有一个特定于Python的问题。这是我的代码:

import sys
import json
import base64


def iter_object(obj):
    if type(obj) is tuple:
        iter_tuple(obj)
    elif type(obj) is dict:
        iter_dict(obj)
    elif type(obj) is list:
        iter_list(obj)
    else: # non iterable types except of string and bytes etc.
        if type(obj) is bytes:
           # THE PROBLEM IS THE COPY OF OBJ!
           obj = base64.b64encode(obj).decode("ascii")
        else:
            pass # we don't care about other data types


def iter_tuple(obj_tuple):
    for t in obj_tuple:
        iter_object(t)


def iter_list(obj_list):
    for l in obj_list:
        iter_object(l)


def iter_dict(obj_dict):
    for k, v in obj_dict.items():
        iter_object(v)


def main():

    test_dict = {
        "foo": [1, 3, 4, 5, 6, 7],
        "bar": 1,
        "baz": (1, 2),
        "blub": {
            "bla": b"\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41",
            "ble": {
                "blu": [
                    1, 3, b"\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42",
                    (1, [b"\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43"])
                ]
            }
        }
    }

    iter_object(test_dict)

    print(json.dumps(test_dict))

    return 0


if __name__ == "__main__":
    sys.exit(main())

问题是行obj = base64.b64encode(obj).decode("ascii"),因为它适用于副本而不是引用(用C ++表示)。以下是我的问题:是否有解决方法可以使上述代码生效?

非常感谢!

2 个答案:

答案 0 :(得分:1)

在副本上工作?不会。正在发生的是该函数返回一个值而不是将其更改到位。这是因为字节字符串是不可变的。在python中没有传递值或引用的概念。变量不是保存对象的框,而是某些对象的名称。对象可以是

  1. 可变 - listsetdict
  2. 不可变 - tuplestrbytes
  3. 因此,如果函数作用于Immutable对象,则必须返回另一个对象。内存使用情况已经过优化。这就是像Haskell这样的语言的真实方式。

答案 1 :(得分:0)

我找到了解决问题的方法:

import sys
import json
import base64

class BinaryToBase64Encoder(json.JSONEncoder):
    def default(self, o):
        if isinstance(o, bytes):
            return base64.b64encode(o).decode("ascii")
        return super(BinaryToBase64Encoder, self).default(o)


def main():

    test_dict = {
        "foo": [1, 3, 4, 5, 6, 7],
        "bar": 1,
        "baz": (1, 2),
        "blub": {
            "bla": b"\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41\x41",
            "ble": {
                "blu": [
                    1, 3, b"\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42",
                    (1, [b"\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43\x43"])
                ]
            }
        }
    }

    print(json.dumps(test_dict, cls=BinaryToBase64Encoder))

    return 0


if __name__ == "__main__":
    sys.exit(main())

JSON输出为:

{
    "foo": [1, 3, 4, 5, 6, 7],
    "baz": [1, 2],
    "bar": 1,
    "blub": {
        "ble": {
            "blu": [
                1,
                3,
                "QkJCQkJCQkJCQkJCQkJCQkJCQkI=",
                [
                    1,
                    ["Q0NDQ0NDQ0NDQ0NDQ0NDQ0NDQ0M="]
                ]
            ]
        },
        "bla": "QUFBQUFBQUFBQUFBQUFBQUFBQUE="
    }
}