我有一个列表列表,其中每个列表包含日志文件中一行报告的值。 数据集的值跨越报告的n行。这些值将插入sqlite3表中。 在这些行之间有来自num_datasets的值,其他数据集也跨越报告的n行。 最后,日志报告的最后x行将插入另一个sqlite3表中。
日志文件中的报告示例,其中n = 3且x = 3:
Line 1: 300, 0, 1, 798, 53928, 283
Line 2: 300, 0, 2, 210, 36160, 207
Line 3: 300, 0, 3, 78, 12620, 217
Line 4: 300, 0, 4, 67, 11364, 228
Line 5: 300, 0, 1, 34304, 0, 0
Line 6: 300, 0, 2, 27808, 0, 0
Line 7: 300, 0, 3, 30108, 0, 0
Line 8: 300, 0, 4, 31180, 0, 0
Line 9: 300, 0, 1, 0, 0, 0, 50
Line 10: 300, 0, 2, 0, 0, 0, 99
Line 11: 300, 0, 3, 0, 0, 0, 212
Line 12: 300, 0, 4, 0, 0, 0, 111
Line 13: 2287, 225372, 1324, 173308
Line 14: 0, 0, 0, 0
Line 15: 0, 2404
此数据将存储在以下列表中:
sourcelist = [['300', '0', '1', '798', '53928', '283'],
['300', '0', '2', '210', '36160', '207'],
['300', '0', '3', '78', '12620', '217'],
['300', '0', '4', '67', '11364', '228'],
['300', '0', '1', '34304', '0', '0'],
['300', '0', '2', '27808', '0', '0'],
['300', '0', '3', '30108', '0', '0'],
['300', '0', '4', '31180', '0', '0'],
['300', '0', '1', '0', '0', '0', '50'],
['300', '0', '2', '0', '0', '0', '99'],
['300', '0', '3', '0', '0', '0', '212'],
['300', '0', '4', '0', '0', '0', '111'],
['2287', '225372', '1324', '173308'],
['0', '0', '0', '0'],
['0', '2404']]
我正在寻找一种有效且快速的方法来组合相应的行,以便将它们插入到一个sqlite3表中。 我想出了以下python代码:
>>> sourcelist = [['300', '0', '1', '798', '53928', '283'],
... ['300', '0', '2', '210', '36160', '207'],
... ['300', '0', '3', '78', '12620', '217'],
... ['300', '0', '4', '67', '11364', '228'],
... ['300', '0', '1', '34304', '0', '0'],
... ['300', '0', '2', '27808', '0', '0'],
... ['300', '0', '3', '30108', '0', '0'],
... ['300', '0', '4', '31180', '0', '0'],
... ['300', '0', '1', '0', '0', '0', '50'],
... ['300', '0', '2', '0', '0', '0', '99'],
... ['300', '0', '3', '0', '0', '0', '212'],
... ['300', '0', '4', '0', '0', '0', '111'],
... ['2287', '225372', '1324', '173308'],
... ['0', '0', '0', '0'],
... ['0', '2404']]
>>> n = 3
>>> x = 3
>>> num_datasets = (len(sourcelist) - x) // n
>>> parts = zip(*[iter(sourcelist[:-x])]*num_datasets)
>>> combine_parts_per_dataset = []
>>> for i in range(num_datasets):
... for e in parts:
... combine_parts_per_dataset.append(e[i])
...
>>> dataset_list = zip(*[iter(combine_parts_per_dataset)]*n)
>>> dataset_list[0]
(['300', '0', '1', '798', '53928', '283'], ['300', '0', '1', '34304', '0', '0'], ['300', '0', '1', '0', '0', '0', '50'])
>>> dataset_list[1]
(['300', '0', '2', '210', '36160', '207'], ['300', '0', '2', '27808', '0', '0'], ['300', '0', '2', '0', '0', '0', '99'])
>>> dataset_list[2]
(['300', '0', '3', '78', '12620', '217'], ['300', '0', '3', '30108', '0', '0'], ['300', '0', '3', '0', '0', '0', '212'])
>>> dataset_list[3]
(['300', '0', '4', '67', '11364', '228'], ['300', '0', '4', '31180', '0', '0'], ['300', '0', '4', '0', '0', '0', '111'])
现在我能够检索每个数据集的有趣字段,以便将它们插入到sqlite3表中:
>>> for e in dataset_list:
... print e[0][0], e[0][2], e[0][4], e[1][3], e[2][6]
...
300 1 53928 34304 50
300 2 36160 27808 99
300 3 12620 30108 212
300 4 11364 31180 111
>>> print sourcelist[-3][0], sourcelist[-3][1], sourcelist[-2][2], sourcelist[-1][1]
2287 225372 0 2404
以下代码正在做同样的事情:
>>> x = 3
>>> n = 3
>>> num_datasets = (len(sourcelist) - x) // n
>>> l = []
>>> for s in range(0, n+1):
... for i in range(s, len(sourcelist) - x, num_datasets):
... l.append(sourcelist[i])
...
>>> dataset_list = zip(*[iter(l)]*n)
>>> for e in dataset_list:
... print e[0][0], e[0][2], e[0][4], e[1][3], e[2][6]
...
300 1 53928 34304 50
300 2 36160 27808 99
300 3 12620 30108 212
300 4 11364 31180 111
但是,我认为它可以更简单,更有效。有什么建议吗? 感谢。
答案 0 :(得分:0)
以下是我使用itemgetter
和groupby
:
>>> from operator import itemgetter
>>> from itertools import groupby
>>> sourcelist = [['300', '0', '1', '798', '53928', '283'],
... ['300', '0', '2', '210', '36160', '207'],
... ['300', '0', '3', '78', '12620', '217'],
... ['300', '0', '4', '67', '11364', '228'],
... ['300', '0', '1', '34304', '0', '0'],
... ['300', '0', '2', '27808', '0', '0'],
... ['300', '0', '3', '30108', '0', '0'],
... ['300', '0', '4', '31180', '0', '0'],
... ['300', '0', '1', '0', '0', '0', '50'],
... ['300', '0', '2', '0', '0', '0', '99'],
... ['300', '0', '3', '0', '0', '0', '212'],
... ['300', '0', '4', '0', '0', '0', '111'],
... ['2287', '225372', '1324', '173308'],
... ['0', '0', '0', '0'],
... ['0', '2404']]
>>> n = 3
>>> n_list = []
>>> for last in range(n):
... n_list.append(sourcelist.pop(-1))
...
>>> print(n_list)
[['0', '2404'], ['0', '0', '0', '0'], ['2287', '225372', '1324', '173308']]
>>> print(sourcelist)
[['300', '0', '1', '798', '53928', '283'],
['300', '0', '2', '210', '36160', '207'],
['300', '0', '3', '78', '12620', '217'],
['300', '0', '4', '67', '11364', '228'],
['300', '0', '1', '34304', '0', '0'],
['300', '0', '2', '27808', '0', '0'],
['300', '0', '3', '30108', '0', '0'],
['300', '0', '4', '31180', '0', '0'],
['300', '0', '1', '0', '0', '0', '50'],
['300', '0', '2', '0', '0', '0', '99'],
['300', '0', '3', '0', '0', '0', '212'],
['300', '0', '4', '0', '0', '0', '111']]
>>> groupby_column = 2
>>> s = sorted(sourcelist,key=itemgetter(groupby_column))
>>> print(s)
[['300', '0', '1', '798', '53928', '283'],
['300', '0', '1', '34304', '0', '0'],
['300', '0', '1', '0', '0', '0', '50'],
['300', '0', '2', '210', '36160', '207'],
['300', '0', '2', '27808', '0', '0'],
['300', '0', '2', '0', '0', '0', '99'],
['300', '0', '3', '78', '12620', '217'],
['300', '0', '3', '30108', '0', '0'],
['300', '0', '3', '0', '0', '0', '212'],
['300', '0', '4', '67', '11364', '228'],
['300', '0', '4', '31180', '0', '0'],
['300', '0', '4', '0', '0', '0', '111']]
>>> for key, group in groupby(s, lambda x: x[groupby_column]):
... print(key,list(group))
...
('1', [['300', '0', '1', '798', '53928', '283'], ['300', '0', '1', '34304', '0', '0'], ['300', '0', '1', '0', '0', '0', '50']])
('2', [['300', '0', '2', '210', '36160', '207'], ['300', '0', '2', '27808', '0', '0'], ['300', '0', '2', '0', '0', '0', '99']])
('3', [['300', '0', '3', '78', '12620', '217'], ['300', '0', '3', '30108', '0', '0'], ['300', '0', '3', '0', '0', '0', '212']])
('4', [['300', '0', '4', '67', '11364', '228'], ['300', '0', '4', '31180', '0', '0'], ['300', '0', '4', '0', '0', '0', '111']])
然后你可以根据感兴趣的数据在另一个列上做另一个sortby和/或groupby。