我有一个这样的列表元组:
tup_list = [('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 21.325), ('UL00628', 6.675), ('UL00628', 22.5), ('UL00628', 5.5), ('UL00628', 15.525), ('UL00628', 12.475), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00428', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00628', 28.0), ('UL00428-OGA', 28.0)]
我想计算每个元组中的相同项目,如UL00628,UL00428? 我可以使用什么迭代?
顺便说一下,tup_list来自excel文件。代码如下:
load_all = dict()
file_name ='***.xls'
wb = xlrd.open_workbook('d:\**%s'%file_name)
table = wb.sheet_by_name('***')
date_start_month= int(input('Pls enter the date of start month:'))
date_start_day= int(input('Pls enter the date of start day:'))
date_end_month= int(input('Pls enter the date of end month:'))
date_end_day = int(input('Pls enter the date of end day:'))
count = 0
tup_list = list()
tup = tuple()
nrows = table.nrows
if table.cell(1, 11).value == '****':
for num in range(2,nrows):
date_of_load = table.cell(num,11).value #this is a date value
#print(date_of_load)
year,month,day,hous,minute,second = xlrd.xldate.xldate_as_tuple(date_of_load,0) # date is a tuple, (y,m,d,h,min,second)
if month in range(date_start_month,date_end_month+1) :# the month is OK?
if day in range(date_start_day, date_end_day+1): # find the day
grade_name = table.cell(num,3).value #grade
grade_num = table.cell(num,5).value #quanlity
tup = (grade_name, grade_num)
tup_list.append(tup)
count +=1
^^^^^^^^^ 这是原始数据
NO Grade quantity Loadday
9 UL00628 28.0000 2018/2/7
10 UL00628 28.0000 2018/2/7
11 UL00628 28.0000 2018/2/7
12 EVA-OGC 28.0000 2018/2/7
13 EVA-OGC 28.0000 2018/2/7
14 UL00628 28.0000 2018/2/8
15 UL00628 28.0000 2018/2/8
16 UL00628 28.0000 2018/2/19
17 UL00628 28.0000 2018/2/19
18 UL00628 28.0000 2018/2/19
19 UL00628 28.0000 2018/2/19
20 UL00628 28.0000 2018/2/19
21 UL00628 28.0000 2018/2/19
22 UL00628 28.0000 2018/2/19
23 UL00628 28.0000 2018/2/19
24 UL00628 28.0000 2018/2/20
25 UL00628 28.0000 2018/2/20
26 UL00628 28.0000 2018/2/20
27 UL00628 28.0000 2018/2/20
28 UL00628 28.0000 2018/2/20
我需要找出正确的装载日,然后得到等级和数量,并计算每个等级。
答案 0 :(得分:0)
对于数据修改,特别是涉及Excel或CSV文件,我会使用pandas
而不是直接使用openpyxl
除此之外,一旦你有了元组列表,就可以使用defaultdict
from collections import defaultdict
results = defaultdict(list)
for grade, quantity in tup_list:
results[grade].append(quantity)
我会做的其他改变是
int
值时处理,dict
中,因此,如果有一天您从另一个脚本或程序的一部分获得所需数据的输入,则可以轻松地重复使用with
声明pathlib.Path
处理文件和文件名if __name__ == "__main__"
测试
from collections import defaultdict
from pathlib import Path
import xlrd
def get_int_inputs(questions):
for key, msg in questions.items():
answer = None
while not answer:
try:
answer = int(input(msg))
yield key, answer
except ValueError:
pass
def parse_file(filename, inputs):
with xlrd.open_workbook(filename) as wb:
table = wb.sheet_by_name('***')
if table.cell(1, 11).value == '****':
for num in range(2,nrows):
year, month, day, *_ = xlrd.xldate.xldate_as_tuple(date_of_load, 0)
if not inputs['date_start_month'] < month <= inputs['date_end_month']:
continue
if not inputs['date_start_day'] < day <= inputs['date_end_day']:
continue
grade_name = table.cell(num,3).value
grade_num = table.cell(num,5).value
yield grade_name, grade_num
def aggregate(quantities):
results = defaultdict(list)
for grade_name, grade_num in quantities:
results[grade_name].append(grade_num)
return {grade_name: sum(val) for grade_name, val in results.items()}
if __name__ == '__main__':
wanted_input = {
'date_start_month': 'Pls enter the date of start month:',
'date_start_day': 'Pls enter the date of start day:',
'date_end_month': 'Pls enter the date of end month:',
'date_end_day': 'Pls enter the date of end day:',
}
inputs = dict(get_int_inputs(wanted_input))
filename = Path('D:/' , '***.xls')
quantities = parse_file(filename, inputs)
result = aggregate(quantities)
没有样本数据,我无法测试代码,因此可能充满了错误
另一种方法是使用pandas
进行数据处理
然后你会得到类似的东西
from pathlib import Path
import pandas as pd
def parse_data(df, inputs):
if df.columns[11] != '****': # index might be different, depending on whether there is an index-col and 0- or 1-based indexing
return None
dates = df[<date_column_label>]
# or if it needs conversion to datetime
# dates = pd.to_datetime(df[<date_column_label>])
date_correct = dates.dt.month.between(
inputs['date_start_month'],
inputs['date_end_month'] + 1,
inclusive = False,
) & dates.dt.day.between(
inputs['date_start_day'],
inputs['date_end_day'] + 1,
inclusive = False,
)
return df[date_correct].groupby(<grade_name_label>)[<quantity_label>].sum()
if __name__ == '__main__':
wanted_input = {
'date_start_month': 'Pls enter the date of start month:',
'date_start_day': 'Pls enter the date of start day:',
'date_end_month': 'Pls enter the date of end month:',
'date_end_day': 'Pls enter the date of end day:',
}
inputs = dict(get_int_inputs(wanted_input))
filename = Path('D:/' , '***.xls')
df = pd.read_excel(filename, sheet_name='', header=0)
result = parse_data(df)