我有一个嵌套列表,我需要验证数据是通过第2列(票证的严重性)排序,然后是每个Severity组中的反向时间顺序日期排序。
我认为我几乎已经使用groupby达到了正确的解决方案,但是由于某种原因,groupby正在删除每个组中的第一个元素。没有多少咖啡可以帮助我的大脑理解为什么。
all_case_data = [
('01', 2, '2253415', datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1'),
('02', 3, '3824819', datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'),
('03', 3, '3824715', datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'),
('04', 3, '3824707', datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'),
('05', 3, '3824549', datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'),
('06', 3, '3824061', datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'),
('07', 3, '3822989', datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'),
('08', 3, '3822385', datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'),
('09', 3, '3822377', datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'),
('10', 3, '3820965', datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'),
('11', 3, '3820963', datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'),
('12', 3, '3767961', datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'),
('13', 3, '3767841', datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'),
('14', 3, '3767839', datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'),
('15', 3, '3767837', datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'),
('16', 3, '3767835', datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'),
('17', 3, '3767833', datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'),
('18', 3, '3767831', datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'),
('19', 3, '3767803', datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'),
('20', 3, '3767809', datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'),
('21', 3, '3767801', datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'),
('22', 3, '3767807', datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'),
('23', 3, '3767805', datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'),
('24', 3, '2257019', datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'),
('25', 3, '2256663', datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'),
('26', 3, '2252573', datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'),
('27', 3, '2252571', datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'),
('28', 3, '2252569', datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'),
('29', 3, '2252531', datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'),
('30', 3, '2252533', datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'),
('31', 3, '2252535', datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'),
('32', 3, '2252539', datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'),
('33', 3, '2252567', datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'),
('34', 3, '2252565', datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'),
('35', 3, '2252563', datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'),
('36', 3, '2252537', datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'),
('37', 3, '1168027', datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1'),
('38', 4, '3824817', datetime(2015, 4, 9, 14, 35, 36), 'New', 'user1'),
('39', 4, '3824717', datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'),
('40', 4, '3824709', datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'),
('41', 4, '3824065', datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'),
('42', 4, '3824063', datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1'),
]
from itertools import groupby
import operator
# Create new list composed of initial list, but sorted by Severity
all_case_data_sorted_by_severity = sorted(all_case_data, key=operator.itemgetter(1))
# Leveraging groupby, create a new list composed of sorted data, sorted in reverse chronological order within each group
groups = [] # Contains list of sub-lists grouped by the unique key (Severity)
uniquekeys = [] # Contains concise list of all of the unique keys (Severity)
# Note: groupby requires the list to first be sorted by the key (Severity) so use all_case_data_sorted_by_severity
for key, group in groupby(all_case_data_sorted_by_severity, operator.itemgetter(1)): # x[1] = Severity
for thing in group:
groups.append(sorted(list(group), key=operator.itemgetter(3), reverse=True)) # Within each group, sort by date
uniquekeys.append(key)
print "Uniquekeys:", uniquekeys
print "Groups:", groups
这是输出。请注意,我在组中缺少三个元素。缺少每个唯一键的第一个元素,因此第一个案例是严重性2(案例#01),第一个案例是严重性3(案例#02),第一个案例是严重性4(案例#38)。
Uniquekeys: [2, 3, 4]
Groups: [[], [('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), ('04', 3, '3824707', datetime.datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'), ('05', 3, '3824549', datetime.datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'), ('06', 3, '3824061', datetime.datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'), ('07', 3, '3822989', datetime.datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'), ('08', 3, '3822385', datetime.datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'), ('09', 3, '3822377', datetime.datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'), ('10', 3, '3820965', datetime.datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'), ('11', 3, '3820963', datetime.datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'), ('12', 3, '3767961', datetime.datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'), ('24', 3, '2257019', datetime.datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'), ('25', 3, '2256663', datetime.datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'), ('13', 3, '3767841', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('26', 3, '2252573', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('27', 3, '2252571', datetime.datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'), ('14', 3, '3767839', datetime.datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'), ('29', 3, '2252531', datetime.datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'), ('15', 3, '3767837', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('28', 3, '2252569', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('21', 3, '3767801', datetime.datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'), ('23', 3, '3767805', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('30', 3, '2252533', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('19', 3, '3767803', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('31', 3, '2252535', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('20', 3, '3767809', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('32', 3, '2252539', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('16', 3, '3767835', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('33', 3, '2252567', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('34', 3, '2252565', datetime.datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'), ('17', 3, '3767833', datetime.datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'), ('35', 3, '2252563', datetime.datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'), ('18', 3, '3767831', datetime.datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'), ('36', 3, '2252537', datetime.datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'), ('22', 3, '3767807', datetime.datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'), ('37', 3, '1168027', datetime.datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1')], [('39', 4, '3824717', datetime.datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'), ('40', 4, '3824709', datetime.datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'), ('41', 4, '3824065', datetime.datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'), ('42', 4, '3824063', datetime.datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1')]]
答案 0 :(得分:0)
“ ...我需要验证数据是按类别显示的...... ”您不需要groupby
,只需要对数据进行排序:
all_case_data_sorted = sorted(
all_case_data, key = lambda x:(x[1],datetime.max-x[3]))
如果出于某种原因,您无法通过否定选择键(如上所述),则可以对每个键进行多次排序。这是有效的,因为Python排序是稳定的:
all_case_data_sorted = sorted(all_case_data, key = lambda x: x[3], reverse=True)
all_case_data_sorted = sorted(all_case_data_sorted, key = lambda x: x[1])
答案 1 :(得分:0)
如果你没有在循环中引用你的迭代变量,那通常表明你的逻辑出现了问题:
for thing in group:
groups.append(sorted(list(group), key=operator.itemgetter(3), reverse=True)) # Within each group, sort by date
uniquekeys.append(key)
为什么要循环group
?如果你删除那个循环,看起来你应该得到你想要的。这就是为什么你错过了第一个元素:你在那里消费它,因为thing
被绑定了。没有它,我得到(改变缩进以使其更容易阅读):
Uniquekeys: [2, 3, 4]
Groups: [[('01', 2, '2253415', datetime.datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1')],
[('02', 3, '3824819', datetime.datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'),
('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), [and so on]
[旁白:使用pandas通常可以更轻松地处理表格数据;按严重性和反向日期排序将为data.sort(["severity", "date"], ascending=[True, False])
,依此类推。)
答案 2 :(得分:0)
作为此类问题的替代方案和更加pythonic方式,您可以使用collection.defaultdict
:
>>> from collections import defaultdict
>>> d=defaultdict(list)
>>> for i in all_case_data :
... d[i[1]].append(i)
...
>>> d.keys()
[2, 3, 4]
>>> d.values()
[[('01', 2, '2253415', datetime.datetime(2015, 1, 14, 8, 8, 18), 'New', 'user1')], [('02', 3, '3824819', datetime.datetime(2015, 4, 9, 14, 38, 54), 'New', 'user1'), ('03', 3, '3824715', datetime.datetime(2015, 4, 9, 6, 6, 7), 'New', 'user1'), ('04', 3, '3824707', datetime.datetime(2015, 4, 9, 5, 55, 27), 'New', 'user1'), ('05', 3, '3824549', datetime.datetime(2015, 4, 8, 6, 7, 7), 'New', 'user1'), ('06', 3, '3824061', datetime.datetime(2015, 4, 7, 15, 31, 26), 'Updated', 'user1'), ('07', 3, '3822989', datetime.datetime(2015, 3, 24, 5, 29, 50), 'New', 'user1'), ('08', 3, '3822385', datetime.datetime(2015, 3, 20, 6, 2, 44), 'New', 'user1'), ('09', 3, '3822377', datetime.datetime(2015, 3, 20, 5, 54, 33), 'New', 'user1'), ('10', 3, '3820965', datetime.datetime(2015, 3, 6, 18, 52, 43), 'New', 'user1'), ('11', 3, '3820963', datetime.datetime(2015, 3, 6, 18, 50, 10), 'New', 'user1'), ('12', 3, '3767961', datetime.datetime(2015, 2, 18, 9, 9, 12), 'Updated', 'user1'), ('13', 3, '3767841', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('14', 3, '3767839', datetime.datetime(2014, 11, 20, 6, 27, 16), 'New', 'user1'), ('15', 3, '3767837', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('16', 3, '3767835', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('17', 3, '3767833', datetime.datetime(2014, 11, 20, 6, 0, 25), 'New', 'user1'), ('18', 3, '3767831', datetime.datetime(2014, 11, 20, 5, 57, 11), 'New', 'user1'), ('19', 3, '3767803', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('20', 3, '3767809', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('21', 3, '3767801', datetime.datetime(2014, 11, 20, 6, 18, 10), 'New', 'user1'), ('22', 3, '3767807', datetime.datetime(2014, 11, 20, 5, 50, 40), 'New', 'user1'), ('23', 3, '3767805', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('24', 3, '2257019', datetime.datetime(2015, 2, 10, 8, 36, 13), 'New', 'user1'), ('25', 3, '2256663', datetime.datetime(2015, 2, 8, 18, 47, 48), 'New', 'user1'), ('26', 3, '2252573', datetime.datetime(2014, 11, 20, 6, 32, 12), 'Pending', 'user1'), ('27', 3, '2252571', datetime.datetime(2014, 11, 20, 6, 27, 31), 'Pending', 'user1'), ('28', 3, '2252569', datetime.datetime(2014, 11, 20, 6, 21, 24), 'Pending', 'user1'), ('29', 3, '2252531', datetime.datetime(2014, 11, 20, 6, 21, 27), 'Pending', 'user1'), ('30', 3, '2252533', datetime.datetime(2014, 11, 20, 6, 16, 41), 'Pending', 'user1'), ('31', 3, '2252535', datetime.datetime(2014, 11, 20, 6, 11, 27), 'Pending', 'user1'), ('32', 3, '2252539', datetime.datetime(2014, 11, 20, 6, 7, 45), 'Pending', 'user1'), ('33', 3, '2252567', datetime.datetime(2014, 11, 20, 6, 5, 48), 'Pending', 'user1'), ('34', 3, '2252565', datetime.datetime(2014, 11, 20, 6, 1, 7), 'Pending', 'user1'), ('35', 3, '2252563', datetime.datetime(2014, 11, 20, 5, 57, 29), 'Pending', 'user1'), ('36', 3, '2252537', datetime.datetime(2014, 11, 20, 5, 50, 59), 'Pending', 'user1'), ('37', 3, '1168027', datetime.datetime(2014, 9, 7, 10, 4, 4), 'New', 'user1')], [('38', 4, '3824817', datetime.datetime(2015, 4, 9, 14, 35, 36), 'New', 'user1'), ('39', 4, '3824717', datetime.datetime(2015, 4, 9, 6, 14, 6), 'New', 'user1'), ('40', 4, '3824709', datetime.datetime(2015, 4, 9, 5, 56, 55), 'New', 'user1'), ('41', 4, '3824065', datetime.datetime(2015, 4, 7, 15, 37, 45), 'Updated', 'user1'), ('42', 4, '3824063', datetime.datetime(2015, 4, 2, 8, 42, 43), 'New', 'user1')]]