我正在学习Map Reduce ...但我没有Python的经验......我正在努力: 如果城市是相同的,总和合同女人的数量。 获得女性合同数量最多的城市。 (我正在使用Dumbo) 这是我的代码:
import csv
from dumbo import main
def load_contratos_mapper(contratos_files):
contratos = {}
try:
# Read table - medal|prize|country|year
with open(contratos_files) as f:
reader = csv.reader(f, delimiter=';', quotechar='"', doublequote=False)
reader.next()
for line in reader:
contratos[(line[0], line[1], line[2], lin[3], line[4])] = line[5]
except:
pass
return contratos
class Join_contratos_reduce:
def __init__(self):
self.contratos = load_contratos('./Contratos_por_municipio.csv')
def __call__(self, key, values):
try:
values.groupby([1])[5].sum()
yield values
except:
pass
def runner(job):
inout_opts = [("inputformat", "text"), ("outputformat", "text")]
o1 = job.additer(load_contratos_mapper, Join_contratos_reduce, opts=inout_opts)
if __name__ == "__main__":
main(runner)