cust_id = semi_final_df['0_x'].tolist()
date = semi_final_df[1].tolist()
total_amount = semi_final_df[0].tolist()
prod_num = semi_final_df['0_y'].tolist()
prod_deduped = []
quant_cleaned = []
product_net_amount = []
cust_id_final = []
date_final = []
for row in total_amount:
quant_cleaned.append(float(row))
for unique_prodz in prod_num:
if unique_prodz not in prod_deduped:
prod_deduped.append(unique_prodz)
for unique_product in prod_deduped:
indices = [i for i, x in enumerate(prod_num) if x == unique_product]
product_total = 0
for index in indices:
product_total += quant_cleaned[index]
product_net_amount.append(product_total)
first_index = prod_num.index(unique_product)
cust_id_final.append(cust_id[first_index])
date_final.append(date[first_index])
以上代码按一种条件计算总和,以便对发票进行总和。 数据有多行,但共享相同的发票/产品编号。
问题: 我需要修改以下代码,以便可以按唯一产品和唯一日期进行汇总。
我已经试过了,但出现值错误-
说x,y不在列表中
据我所知,问题在于我将两个不同长度的重复数据删除列表压缩在一起,然后试图循环遍历结果内联。
此行会导致错误
对于枚举(压缩列表)中的[x,y]:
衷心感谢您的帮助。这是第二批带有注释的代码。
from itertools import zip_longest
#I have not included the code for the three lists below but you can assume they are populated as these are the lists that I will be #working off of. They are of the same length.
prod_numbers = []
datesz = []
converted_quant = []
#Code to dedupe date and product which will end up being different lengths. These two lists are populated by the two for loops below
prod_deduped = []
dates_deduped = []
for unique_prodz in prod_numbers:
if unique_prodz not in prod_deduped:
prod_deduped.append(unique_prodz)
for unique_date in datesz:
if unique_date not in dates_deduped:
dates_deduped.append(unique_date)
#Now for the fun part. Time to sum by date and product. The three lists below are empty until we run the code
converted_net_amount = []
prod_id_final = []
date_final = []
#I zipped the list together using itertools which I imported at the top
for unique_product, unique_date in zip_longest(prod_deduped, dates_deduped, fillvalue = ''):
indices = []
zipped_object = zip(prod_numbers, datesz)
zipped_list = list(zipped_object)
for i,[x, y] in enumerate(zipped_list):
if x == unique_product and y == unique_date:
indices.append(i)
converted_total = 0
for index in indices:
converted_total += converted_quant[index]
converted_net_amount.append[converted_total]
first_index = zipped_list.index([unique_product, unique_date])
prod_id_final.append(prod_numbers[first_index])
date_final.append(datesz[first_index])
答案 0 :(得分:0)
from collections import defaultdict
summed_dictionary = defaultdict(int)
for x, y, z in list:
summed_dictionary[(x,y)] += z
使用defaultdict应该可以解决您的问题,并且比上面的所有代码容易得多。我今天早上在reddit上看到了这个,并认为你是交叉张贴的。归功于/ r / learnpython上的reddit上的家伙