Question

    cust_id = semi_final_df['0_x'].tolist()
    date = semi_final_df[1].tolist()
    total_amount = semi_final_df[0].tolist()
    prod_num = semi_final_df['0_y'].tolist()

    prod_deduped = []
    quant_cleaned = []
    product_net_amount = []
    cust_id_final = []
    date_final = []
    for row in total_amount:
        quant_cleaned.append(float(row))  

    for unique_prodz in prod_num:
        if unique_prodz not in prod_deduped:
            prod_deduped.append(unique_prodz)

    for unique_product in prod_deduped:
        indices = [i for i, x in enumerate(prod_num) if x == unique_product]
        product_total = 0
        for index in indices:
            product_total += quant_cleaned[index]
        product_net_amount.append(product_total)
        first_index = prod_num.index(unique_product)
        cust_id_final.append(cust_id[first_index])
        date_final.append(date[first_index])

以上代码按一种条件计算总和，以便对发票进行总和。数据有多行，但共享相同的发票/产品编号。

问题：我需要修改以下代码，以便可以按唯一产品和唯一日期进行汇总。

我已经试过了，但出现值错误-

说x，y不在列表中

据我所知，问题在于我将两个不同长度的重复数据删除列表压缩在一起，然后试图循环遍历结果内联。

此行会导致错误

对于枚举（压缩列表）中的[x，y]：

衷心感谢您的帮助。这是第二批带有注释的代码。

    from itertools import zip_longest

    #I have not included the code for the three lists below but you can assume they are populated as these are the lists that I will be #working off of. They are of the same length.
    prod_numbers = []
    datesz = []
    converted_quant = []
    #Code to dedupe date and product which will end up being different lengths. These two lists are populated by the two for loops below
    prod_deduped = []
    dates_deduped = []

    for unique_prodz in prod_numbers:
        if unique_prodz not in prod_deduped:
            prod_deduped.append(unique_prodz)

    for unique_date in datesz:
        if unique_date not in dates_deduped:
            dates_deduped.append(unique_date)


    #Now for the fun part. Time to sum by date and product. The three lists below are empty until we run the code
    converted_net_amount = []
    prod_id_final = []
    date_final = []

    #I zipped the list together using itertools which I imported at the top
    for unique_product, unique_date in zip_longest(prod_deduped, dates_deduped, fillvalue = ''):
        indices = []
        zipped_object = zip(prod_numbers, datesz)
        zipped_list = list(zipped_object)
        for i,[x, y] in enumerate(zipped_list):
            if x == unique_product and y == unique_date:
                indices.append(i)
        converted_total = 0
        for index in indices:
            converted_total += converted_quant[index]
        converted_net_amount.append[converted_total]
        first_index = zipped_list.index([unique_product, unique_date])
        prod_id_final.append(prod_numbers[first_index])
        date_final.append(datesz[first_index])

Answer 1

from collections import defaultdict
summed_dictionary = defaultdict(int)
for x, y, z in list:
    summed_dictionary[(x,y)] += z

使用defaultdict应该可以解决您的问题，并且比上面的所有代码容易得多。我今天早上在reddit上看到了这个，并认为你是交叉张贴的。归功于/ r / learnpython上的reddit上的家伙

清单清单中两个条件的总和

1 个答案: