自定义格式分析:配对列表中的键和值以构建字典

时间:2018-01-29 11:26:49

标签: python parsing

我正在尝试以自定义格式解析文件(来自max3ds,以防有人遇到同样的问题)。

我有一个如下所示的节点列表:

[['*NODE_NAME', '"30deg017"', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]

我想把它变成一个字典。列表中以星号开头的每个元素都是其邻居的键,直到找到下一个键。还有嵌套元素。这可以使用自然python来完成吗?

这是我目前的尝试:


class AllCameras:

    def __init__(self, file_string, line_ending='\n'):
        self.file_string = file_string
        self.output_dict = dict()
        self.line_ending = line_ending

    def __find_node_name(self, node):

        node_lines = node.split(self.line_ending)

        name_lines = [
            line for line in node_lines if "*NODE_NAME" in line.strip()]
        leading_name = name_lines[0].strip()
        leading_name = leading_name.split(" \"")[-1].replace("\"", "")
        return leading_name

    def __get_nodes(self):
        return self.file_string.split("*CAMERAOBJECT ")

    def __get_node_values(self):
        parse_nodes = []
        for node in self.__get_nodes():
            open_brace_pos = node.find('{')
            if open_brace_pos >= 0:
                parse_nodes.append(pyparsing.nestedExpr(
                    '{', '}').parseString(node[open_brace_pos:]).asList())
        return parse_nodes

    def parse_node(self, node):

        for n in node:
            if type(n) is list:
                self.parse_node(n)

            if type(n) is str and n.startswith('*'):
                print n, "IS KEY"

    def parse(self):
        nodes = self.__get_node_values()

        for node in nodes:

            self.parse_node(node)

我可以隔离节点中的所有键,但我不确定将键值配对的最佳方法,特别是因为它是一个递归问题。

如果某个键旁边有多个值,则应将它们分组到一个列表中。例如"*INHERIT_POS", "0", "0", "0" -> "*INHERIT_POS":[0, 0, 0]

1 个答案:

答案 0 :(得分:2)

您可以使用递归来遍历嵌套列表以创建字典:

def get_dict(d):
   return {d[i]:d[i+1] if isinstance(d[i+1], str) else get_dict(d[i+1]) for i in range(0, len(d), 2)}
s = [['*NODE_NAME', '"30deg017"', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]
print(get_dict(s[0]))

输出:

{'*NODE_NAME': '"30deg017"', '*CAMERA_TYPE': 'Target', '*NODE_TM': {'*NODE_NAME': '"30deg017.Target"', '*INHERIT_POS': '0', '0': '0', '*INHERIT_ROT': '0', '*INHERIT_SCL': '0', '*TM_ROW0': '1.0000', '0.0000': '0.0000', '*TM_ROW1': '0.0000', '1.0000': '1.0000', '*TM_ROW2': '0.0000', '*TM_ROW3': '0.0000', '*TM_POS': '0.0000', '*TM_ROTAXIS': '0.0000', '*TM_ROTANGLE': '0.0000', '*TM_SCALE': '1.0000', '*TM_SCALEAXIS': '0.0000', '*TM_SCALEAXISANG': '0.0000'}, '*CAMERA_SETTINGS': {'*TIMEVALUE': '0', '*CAMERA_NEAR': '0.0000', '*CAMERA_FAR': '1000000.0000', '*CAMERA_FOV': '1.1752', '*CAMERA_TDIST': '500.0000'}}

编辑:

关于可能需要分组的多个“运行”数据的事实,您可以尝试这样做:

s = [['*NODE_NAME', '"30deg017"', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*CAMERA_TYPE', 'Target', '*NODE_TM', ['*NODE_NAME', '"30deg017"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '1', '1', '1', '*TM_ROW0', '0.0939', '-0.9815', '-0.1668', '*TM_ROW1', '0.8819', '0.0043', '0.4714', '*TM_ROW2', '-0.4619', '-0.1913', '0.8660', '*TM_ROW3', '-230.9698', '-95.6709', '433.0127', '*TM_POS', '-230.9698', '-95.6709', '433.0127', '*TM_ROTAXIS', '-0.3314', '0.1476', '0.9319', '*TM_ROTANGLE', '1.5887', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.9925', '-0.1222', '-0.0000', '*TM_SCALEAXISANG', '0.1253'], '*NODE_TM', ['*NODE_NAME', '"30deg017.Target"', '*INHERIT_POS', '0', '0', '0', '*INHERIT_ROT', '0', '0', '0', '*INHERIT_SCL', '0', '0', '0', '*TM_ROW0', '1.0000', '0.0000', '0.0000', '*TM_ROW1', '0.0000', '1.0000', '0.0000', '*TM_ROW2', '0.0000', '0.0000', '1.0000', '*TM_ROW3', '0.0000', '0.0000', '0.0000', '*TM_POS', '0.0000', '0.0000', '0.0000', '*TM_ROTAXIS', '0.0000', '0.0000', '0.0000', '*TM_ROTANGLE', '0.0000', '*TM_SCALE', '1.0000', '1.0000', '1.0000', '*TM_SCALEAXIS', '0.0000', '0.0000', '0.0000', '*TM_SCALEAXISANG', '0.0000'], '*CAMERA_SETTINGS', ['*TIMEVALUE', '0', '*CAMERA_NEAR', '0.0000', '*CAMERA_FAR', '1000000.0000', '*CAMERA_FOV', '1.1752', '*CAMERA_TDIST', '500.0000']]]

import itertools
def create_dict(f):
   def wrapper(d):
      def make_d(s):
         return {s[i]:s[i+1] if all(not c.startswith('*') for c in s[i+1]) or isinstance(s[i+1], str) else make_d(s[i+1]) for i in range(0, len(s), 2)}
      return make_d(f(d))
   return wrapper

@create_dict
def group_data(d):
   data = [(a, list(b)) for a, b in itertools.groupby(d, key=lambda x:isinstance(x, list) or not x.startswith('*'))]
   return [b[0] if not a else b if all(isinstance(i, str) for i in b) and len(b) > 1 else b[0] for a, b in data]

print(group_data(s[0]))

输出:

{'*NODE_NAME': '"30deg017"', '*TM_ROW0': ['1.0000', '0.0000', '0.0000'], '*CAMERA_TYPE': 'Target', '*NODE_TM': {'*NODE_NAME': '"30deg017.Target"', '*INHERIT_POS': '0', '0': '0', '*INHERIT_ROT': '0', '*INHERIT_SCL': '0', '*TM_ROW0': '1.0000', '0.0000': '0.0000', '*TM_ROW1': '0.0000', '1.0000': '1.0000', '*TM_ROW2': '0.0000', '*TM_ROW3': '0.0000', '*TM_POS': '0.0000', '*TM_ROTAXIS': '0.0000', '*TM_ROTANGLE': '0.0000', '*TM_SCALE': '1.0000', '*TM_SCALEAXIS': '0.0000', '*TM_SCALEAXISANG': '0.0000'}, '*CAMERA_SETTINGS': {'*TIMEVALUE': '0', '*CAMERA_NEAR': '0.0000', '*CAMERA_FAR': '1000000.0000', '*CAMERA_FOV': '1.1752', '*CAMERA_TDIST': '500.0000'}}