UNCLEANED = [
['1 - 32/', 'Highway', '403', '43.167233',
'-80.275567', '1965', '2014', '2009', '4',
'Total=64 (1)=12;(2)=19;(3)=21;(4)=12;', '65', '04/13/2012', '72.3', '',
'72.3', '', '69.5', '', '70', '', '70.3', '', '70.5', '', '70.7', '72.9',
''],
['1 - 43/', 'WEST', '403', '43.164531', '-80.251582',
'1963', '2014', '2007', '4',
'Total=60.4 (1)=12.2;(2)=18;(3)=18;(4)=12.2;', '61', '04/13/2012',
'71.5', '', '71.5', '', '68.1', '', '69', '', '69.4', '', '69.4', '',
'70.3', '73.3', ''],
['2 - 4/', 'STOKES', '6', '45.036739', '-81.33579', '1958',
'2013', '', '1', 'Total=16 (1)=16;', '18.4', '08/28/2013', '85.1',
'85.1', '', '67.8', '', '67.4', '', '69.2', '70', '70.5', '', '75.1', '',
'90.1', '']
]
上面是包含三个子列表的列表的原始版本... 而且我需要将其转换为看起来更简洁的版本:
CLEANED = [[1, 'Highway', '403', 43.167233,
-80.275567, '1965', '2014', '2009', 4,
[12.0, 19.0, 21.0, 12.0], 65.0, '04/13/2012',
[72.3, 69.5, 70.0, 70.3, 70.5, 70.7, 72.9]],
[2, 'WEST', '403', 43.164531, -80.251582,
'1963', '2014', '2007', 4, [12.2, 18.0, 18.0, 12.2], 61.0,
'04/13/2012', [71.5, 68.1, 69.0, 69.4, 69.4, 70.3,
73.3]],
[3, 'STOKES', '6', 45.036739, -81.33579, '1958',
'2013', '', 1, [16.0], 18.4, '08/28/2013',
[85.1, 67.8, 67.4, 69.2, 70.0, 70.5, 75.1, 90.1]]
]
,我发现该模式适用于未清理版本的index [0],我只保留第一个字符。 index[1]
,[2]
保持不变,将index[3]
和[4]
变成int .....
然后到达index[9]
,我必须忽略总数,仅提取其余数字,然后放入子列表中。....
最后一件事是将日期之后的数字放入子列表中,并且还要排除第一个数字。
我对如何不断循环执行直到完成“清除” 未许可中的所有内容感到困惑?
如果 UNCLEANED 不只是这三个要素,该怎么办?如果很长,我将如何遍历它?
非常感谢您的帮助
答案 0 :(得分:0)
这里是进行上述转换的解决方案。这是一个简单的for
循环:
UNCLEANED = [
['1 - 32/', 'Highway', '403', '43.167233',
'-80.275567', '1965', '2014', '2009', '4',
'Total=64 (1)=12;(2)=19;(3)=21;(4)=12;', '65', '04/13/2012', '72.3', '',
'72.3', '', '69.5', '', '70', '', '70.3', '', '70.5', '', '70.7', '72.9',
''],
['1 - 43/', 'WEST', '403', '43.164531', '-80.251582',
'1963', '2014', '2007', '4',
'Total=60.4 (1)=12.2;(2)=18;(3)=18;(4)=12.2;', '61', '04/13/2012',
'71.5', '', '71.5', '', '68.1', '', '69', '', '69.4', '', '69.4', '',
'70.3', '73.3', ''],
['2 - 4/', 'STOKES', '6', '45.036739', '-81.33579', '1958',
'2013', '', '1', 'Total=16 (1)=16;', '18.4', '08/28/2013', '85.1',
'85.1', '', '67.8', '', '67.4', '', '69.2', '70', '70.5', '', '75.1', '',
'90.1', '']
]
# Function that performs the conversion described above.
def cleanElement(elem):
elem[0] = elem[0].split(' - ')[0]
elem[3] = float(elem[3])
elem[4] = float(elem[4])
elem[8] = int(elem[8])
tempList = elem[9].split(' ')[1].split(';')
tempList = [float(i.split('=')[1]) for i in tempList if not i=='']
elem[9] = tempList
elem[10] = float(elem[10])
elem[13] = [float(i) for i in elem[13:] if not i=='']
elem.pop(12)
return elem[:13]
# Function that loops in the uncleaned list and performs the conversion for each element.
def cleanList(uncleaned):
return [cleanElement(elem) for elem in uncleaned]
cleaned = cleanList(UNCLEANED)
for i in cleaned:
print(i)
输出:
['1', 'Highway', '403', 43.167233, -80.275567, '1965', '2014', '2009', 4, [12.0, 19.0, 21.0, 12.0], 65.0, '04/13/2012', [72.3, 69.5, 70.0, 70.3, 70.5, 70.7, 72.9]]
['1', 'WEST', '403', 43.164531, -80.251582, '1963', '2014', '2007', 4, [12.2, 18.0, 18.0, 12.2], 61.0, '04/13/2012', [71.5, 68.1, 69.0, 69.4, 69.4, 70.3, 73.3]]
['2', 'STOKES', '6', 45.036739, -81.33579, '1958', '2013', '', 1, [16.0], 18.4, '08/28/2013', [85.1, 67.8, 67.4, 69.2, 70.0, 70.5, 75.1, 90.1]]
答案 1 :(得分:0)
这是使用函数集合清理列表列表的另一种方法。
棘手的是要考虑列表最后一部分的切片情况,其中必须将交替的字符串收集到数组中并过滤掉空字符串。
我假设每个子数组尾部的前3个项目中的非空字符串值都是所需的值。 arrange
负责将前3项按顺序返回一致的值。
恕我直言,这种方式的优势在于,如果您想对任何特定项目做任何不同的事情,更改代码会变得更加容易。
import itertools as it
def get_first_char_int(item):
first_char, *_ = item
return int(first_char)
def identity(item):
return item
def get_floats(item):
tokens = ''.join(item.split(' ')[2:]).split('=')[1:]
return [float(token.split(';')[0]) for token in tokens]
def get_float(item):
return float(item) if item else item
UNCLEANED = [
['1 - 32/', 'Highway', '403', '43.167233',
'-80.275567', '1965', '2014', '2009', '4',
'Total=64 (1)=12;(2)=19;(3)=21;(4)=12;', '65', '04/13/2012', '72.3', '',
'72.3', '', '69.5', '', '70', '', '70.3', '', '70.5', '', '70.7', '72.9',
''],
['1 - 43/', 'WEST', '403', '43.164531', '-80.251582',
'1963', '2014', '2007', '4',
'Total=60.4 (1)=12.2;(2)=18;(3)=18;(4)=12.2;', '61', '04/13/2012',
'71.5', '', '71.5', '', '68.1', '', '69', '', '69.4', '', '69.4', '',
'70.3', '73.3', ''],
['2 - 4/', 'STOKES', '6', '45.036739', '-81.33579', '1958',
'2013', '', '1', 'Total=16 (1)=16;', '18.4', '08/28/2013', '85.1',
'85.1', '', '67.8', '', '67.4', '', '69.2', '70', '70.5', '', '75.1', '',
'90.1', ''],
]
functions = [ # 1:1 mapping of functions to items in each list in UNCLEANED.
get_first_char_int,
identity,
identity,
float,
float,
identity,
identity,
identity,
int,
get_floats,
float,
identity,
]
end = len(functions)
item_length, = {len(items) for items in UNCLEANED}
# Calculate argument to pass to it.islice
extra_count = item_length - end
# Extend functions by extra_count times with get_float
functions.extend(list(it.repeat(get_float, extra_count)))
#
# Handle items up to start of alternating strings and empty strings.
head_results = (
[f(item)
for f, item
in zip(functions[0:end], collection[0:end])]
for collection in UNCLEANED
)
def arrange(items):
"""Handle varying order of first 3 items of items."""
item, *_ = items
items[0:3] = [item, '', item]
return items
#
# Apply arrange to the tail of each sublist
collection_ = it.chain.from_iterable(arrange(collection[end:])
for collection in UNCLEANED)
#
# Handle items starting with alternating strings and empty strings.
tail_results = (
[f(item)
for f, item
in it.islice(zip(functions[end:], collection_), 2, item_length)]
for collection in UNCLEANED
)
results = [[head, [item for item in tail if item]]
for head, tail in zip(head_results, tail_results)]
for item in results:
print(item)
输出:
[[1, 'Highway', '403', 43.167233, -80.275567, '1965', '2014', '2009', 4, [12.0, 19.0, 21.0, 12.0], 65.0, '04/13/2012'], [72.3, 69.5, 70.0, 70.3, 70.5, 70.7, 72.9]]
[[1, 'WEST', '403', 43.164531, -80.251582, '1963', '2014', '2007', 4, [12.2, 18.0, 18.0, 12.2], 61.0, '04/13/2012'], [71.5, 68.1, 69.0, 69.4, 69.4, 70.3, 73.3]]
[[2, 'STOKES', '6', 45.036739, -81.33579, '1958', '2013', '', 1, [16.0], 18.4, '08/28/2013'], [85.1, 67.8, 67.4, 69.2, 70.0, 70.5, 75.1, 90.1]]
答案 2 :(得分:0)
创建一个clean_row(row)函数,然后应从此处调用所有“干净规则”。然后,您可以执行CLEANED = [clean_row(uncleaned) for uncleaned in UNCLEANED]
。