Question

假设有一个包含这样的子列表的列表

[[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]......]

这是def list_of_names()的输出，其中2013年为年，M为性别，1356为M出生人数等。

我想创建一个字典，输出名称作为键，值为元组（year，number_of_males，number_of_females）。例如：

{ .. ’Patric’:[... , (1993, 0, 7), (2013, 1356, 6), ... ], ... }.

技术上1993年是年份，0是男性人数，7是女性人数，元组应按年份排列。

我仍然坚持如何将这些信息添加到词典中

def name_Index(names):
    d = dict()
    L = readNames() #the list with from previous def which outputs different names and info as above
    newlist = []
    for sublist in L:

Answer 1

from collections import defaultdict

def list_of_names():
    return [[2013, 'Patric', 'M', 1356],
            [2013, 'Helena', 'F', 202],
            [2013, 'Patric', 'F', 6],
            [1993, 'Patric', 'F', 7]]

def name_Index():
    tmp = defaultdict(lambda:defaultdict(lambda: [0,0]))

    for year, name, sex, N in list_of_names():
        i = 0 if sex == 'M' else 1
        tmp[name][year][i] += N

    d = {}
    for name, entries in tmp.items():
        d[name] = [(year, M, F) for (year, (M,F)) in entries.items()]

    return d

print name_Index()

Answer 2

这是我对这个问题的尝试：

from collections import defaultdict, namedtuple
from itertools import groupby

data = [[2013, 'Patric', 'M', 1356],
        [2013, 'Helena', 'F', 202],
        [2013, 'Patric', 'F', 6],
        [1993, 'Patric', 'F', 7]]

names = defaultdict(list)
datum = namedtuple('datum', 'year gender number')
for k, g in groupby(data, key=lambda x: x[1]):
    for l in g:
        year, name, gender, number = l
        names[k].append(datum(year, gender, number))

final_dict = defaultdict(list)
for n in names:
    for k, g in groupby(names[n], lambda x: x.year):
        males = 0
        females = 0
        for l in g:
            if l.gender == 'M':
                males += l.number
            elif l.gender == 'F':
                females += l.number
        final_dict[n].append((k, males, females))

print(final_dict)

Answer 3

最方便的是使用collections.defauldict。它返回类似字典的对象，如果它没有找到密钥则返回默认值。在您的情况下，您使用list作为默认值，并在循环中向其附加元组：

from collections import defaultdict

names = [ [2013, 'Patric', 'M', 1356], 
        [2013, 'Helena', 'F', 202], 
        [2013, 'Patric', 'F', 6],
        [1993, 'Patric', 'F', 7]    ]

def name_Index(data):
    # name => year => sex
    d = defaultdict(lambda: defaultdict(lambda: {'F': 0, 'M': 0})) 
    for year, name, sex, births in data:
        d[name][year][sex] += births

    # if you are fine with defauldict result: return d
    # else collect results into tuples:

    result = {}
    for name, data in d.items():
        result[name] = [(year, c['M'], c['F']) for year, c in data.items()]
    return result

    print name_Index(names)
    # {'Helena': [(2013, 0, 202)], 'Patric': [(1993, 0, 7), (2013, 1356, 6)]}

Answer 4

我不明白为什么你把名字作为name_Index函数的参数然后调用readNames，你的工作必须有一些必要性。因此，我只是放了一个虚拟的readNames函数，并将None作为参数发送给name_Index。使用类是解决复杂数据结构的好方法。顺便说一下，我必须承认，这是一个写得很好的问题。

def readNames ():
    return [[2013, 'Patric', 'M', 1356], [2013, 'Helena', 'F', 202], [2013, 'Patric', 'F', 6],[1993, 'Patric', 'F', 7]]

class YearOb(object):
    def __init__(self): 
        self.male = 0
        self.female = 0

    def add_birth_data(self, gender, birth_count):
        if gender == "M":
            self.male += birth_count
        else:
            self.female += birth_count

class NameOb(object):
    def __init__(self):
        self.yearobs = dict()

    def add_record(self, year, gender, birth_count):
        if year not in self.yearobs:
            self.yearobs[year]=YearOb()

        self.yearobs[year].add_birth_data(gender, birth_count)           

    def get_as_list(self):
        list_data = []
        for year, yearob in self.yearobs.items():
            list_data.append((year, yearob.male, yearob.female))
        return list_data

def name_Index(names):
    d = dict()
    L = readNames() #the list with from previous def which outputs different names and info as above
    newlist = []
    for sublist in L:
        name = sublist[1]
        if name not in d:
            d[name]=NameOb()
        d[name].add_record(sublist[0], sublist[2], sublist[3])

    for name, nameob in d.items():
        d[name] = nameob.get_as_list() 

    return d 


print(name_Index(None))

将元组形式的列表添加到字典中

4 个答案: