Question

给出元组列表的输入列表：

input_lol = [ [('x', 'AA'), ('y', 'AB')], 
              [('yy', 'AB'), ('..', 'Punct'), ('foo', 'ZZ')], 
              [('y', 'AB')] 
            ]

所需的输出是重新组合元组的内部列表，以便在元组的第二个元素中包含'Punct'的元组被挑选出来，并且在包含元组的'Punct'之前的任何元组被分组到上一个清单。

E.g。期望的输出：

desired_lolol = [ [[('x', 'AA'), ('y', 'AB')], [('yy', 'AB')]], 
                 ('..', 'Punct'), 
                [[('foo', 'ZZ')], [('y', 'AB')]] 
              ]

另一个例子：

input_lol = [ [('x', 'AA'), ('y', 'AB')], 
              [('yy', 'AB'), ('..', 'Punct'), ('foo', 'ZZ')], 
              [('y', 'AB')], 
              [('ybar', 'CC')],
              [('z', 'NJ'), ('!', 'Punct')],
              [('pals', 'AJB')], 
            ]

desired_lolol = [ [[('x', 'AA'), ('y', 'AB')], [('yy', 'AB')]], 
                  ('..', 'Punct'), 
                  [[('foo', 'ZZ')], [('y', 'AB')]], [('ybar', 'CC')], [('z', 'NJ')],
                  ('!', 'Punct'),
                  [('pals', 'AJB')], 
              ]

另一个例子：

input_lol = [ [('x', 'AA'), ('y', 'AB')], 
              [('yy', 'AB'), ('..', 'Punct'), ('bar', 'YY'), ('..', 'Punct'), ('foo', 'ZZ')], 
              [('y', 'AB')], 
              [('ybar', 'CC')],
              [('z', 'NJ'), ('!', 'Punct')],
              [('pals', 'AJB')], 
            ]

desired_lolol = [ [[('x', 'AA'), ('y', 'AB')], [('yy', 'AB')]], 
                  ('..', 'Punct'), 
                  [('bar', 'YY')], 
                  ('..', 'Punct'), 
                  [[('foo', 'ZZ')], [('y', 'AB')]], [('ybar', 'CC')], [('z', 'NJ')],
                  ('!', 'Punct'),
                  [('pals', 'AJB')], 
              ]

我试过了：

desired_lolol = []
output_inner_lol = []

for inner_list_of_tuple in input_lol:
    if any(tag == 'Punct' for s, tag in inner_list_of_tuple):
        pending = []
        for s, tag in inner_list_of_tuple:
            if tag == 'Punct':
                desired_lolol.append(pending)
                desired_lolol.append((s,tag))
                pending = []
            else:
                pending.append((s,tag))
        if pending:
            desired_lolol.append((s,tag))
    else:
        desired_lolol.append(inner_list_of_tuple)

得到了这个：

[[('x', 'AA'), ('y', 'AB')],
 [('yy', 'AB')],
 ('..', 'Punct'),
 ('foo', 'ZZ'),
 [('y', 'AB')]]

但是[('x', 'AA'), ('y', 'AB')]和[('yy', 'AB')]不能归入一个列表。所以我不得不做一些后期处理：

desired_lolol = []
output_inner_lol = []

for inner_list_of_tuple in input_lol:
    if any(tag == 'Punct' for s, tag in inner_list_of_tuple):
        pending = []
        for s, tag in inner_list_of_tuple:
            if tag == 'Punct':
                desired_lolol.append(pending)
                desired_lolol.append((s,tag))
                pending = []
            else:
                pending.append((s,tag))
        if pending:
            desired_lolol.append((s,tag))
    else:
        desired_lolol.append(inner_list_of_tuple)

really_desired_lolol = []
pending = []
for x in desired_lolol:
    if type(x) == tuple and x[1] == 'Punct':
        really_desired_lolol.append(pending)
        really_desired_lolol.append(x)
        pending = []
    else:
        pending += x

if pending:
    really_desired_lolol.append(pending)

是否有更简单的方法来获取desired_lolol？

Answer 1

您可以对任意深度的数据使用递归解决方案：

def get_punc(current):
    updated_data = [get_punc(i) if any(isinstance(c, list) for c in i) else i for i in current]
    new_data = iter(updated_data)
    final_data = []
    while True:
        result = next(new_data, None)
        if not result:
            return final_data

        if any(a == 'Punct' for _,a  in result):
            the_index = [i for i, a in enumerate(result) if a[1] == 'Punct'][0]
            if the_index > 0:
                final_data[-1].append(result[:the_index])
            final_data.append(result[the_index])
            if the_index < len(result)-1:
                final_data.append([result[the_index+1:]])
        else:

            if final_data and isinstance(final_data[-1], list):
                final_data[-1].append(result)
            else :
                if not final_data or not isinstance(final_data[-1], tuple):
                    final_data.append([result])  
                else: 
                    final_data.append([result])

输出：

[[[('x', 'AA'), ('y', 'AB')], [('yy', 'AB')]], ('..', 'Punct'), [[('foo', 'ZZ')], [('y', 'AB')]]]
--------------------
[[[('x', 'AA'), ('y', 'AB')], [('yy', 'AB')]], ('..', 'Punct'), [[('foo', 'ZZ')], [('y', 'AB')], [('ybar', 'CC')], [('z', 'NJ')]], ('..', 'Punct'), [('pals', 'AJB')]]
--------------------

如何通过检查其中一个元组值重新组合元组列表列表中的元组？

1 个答案: