Map Reduce - 如何求和并获得最高价值

时间:2018-05-21 12:31:56

标签: python mapreduce bigdata

我正在学习Map Reduce ...但我没有Python的经验......我正在努力: 如果城市是相同的,总和合同女人的数量。 获得女性合同数量最多的城市。 (我正在使用Dumbo) 这是我的代码:

import csv

from dumbo import main


def load_contratos_mapper(contratos_files):
    contratos = {}
    try:
        # Read table - medal|prize|country|year
        with open(contratos_files) as f:
            reader = csv.reader(f, delimiter=';', quotechar='"', doublequote=False)
            reader.next()
            for line in reader:
                contratos[(line[0], line[1], line[2], lin[3], line[4])] = line[5]

    except:
        pass

    return contratos


class Join_contratos_reduce:
    def __init__(self):
        self.contratos = load_contratos('./Contratos_por_municipio.csv')

    def __call__(self, key, values):
        try:
            values.groupby([1])[5].sum()
            yield values

        except:
            pass

def runner(job):
    inout_opts = [("inputformat", "text"), ("outputformat", "text")]
    o1 = job.additer(load_contratos_mapper, Join_contratos_reduce, opts=inout_opts)


if __name__ == "__main__":
    main(runner)

0 个答案:

没有答案