我需要从几个工作表的几个文件中提取列,但是某些列在某些文件中,而在其他文件中则不是。所以我必须先检查列是否为条件。
pd_file = pd.ExcelFile(join(data_path, _file))
for sheet in pd_file.sheet_names:
df = pd.read_excel(pd_file, sheet_name=sheet, header=3)
if(sheet == 'PLAN REVIEW 02' or sheet == 'ANNUAL PR 2002' or sheet == 'Plan Review' or sheet == 'Annual Reviews'
or sheet == 'PR 2004' or sheet == 'Annual PR 2004' or sheet == 'PR 2005' or sheet == 'Annual PR 05'
or sheet == 'PR 2006' or sheet == 'Annual PR' or sheet =='PR 2007' or sheet =='PR 2008' or sheet == 'PR 2009'
or sheet == 'PR 2010' or sheet == 'PR 2012' or sheet == 'PR 2013' ):
for i, row in df.iterrows():
current_permit_status =row['FEE']
current_permit_description = row['PROJECT DESCRIPTION']
current_house_owner_name= row['PROP OWNER']
current_permit_fee =row['FEE']
current_house_street_address = pre_process_text(row['ST#']) + ' ' + pre_process_text(row['STREET'])
else:
if(sheet == 'BP 2002' or sheet == 'ANNUAL BP 02' or sheet == 'Building Permit' or sheet == 'Annual Permits'
or sheet == 'BP 2004' or sheet == 'Annual BP 2004' or sheet == 'BP 2005' or sheet == 'BP 2006'
or sheet == 'Annual BP 2006' or sheet == 'BP 2007' or sheet =='Annual BP 2007' or sheet =='BP 2008' or sheet == 'Annual BP'
or sheet == 'BP 2009' or sheet == 'Annual BP 2009' or sheet == 'BP 2010' or sheet== 'Annual BP 2010'
or sheet == 'BP 2013' or sheet == 'Annual BP 2009' or sheet == 'BP 2010' or sheet== 'Annual BP 2010'):
for i, row in df.iterrows():
current_permit_status =row['FEE']
current_permit_description =row['PROJECT DESCRIPTION']
current_house_owner_name= row['OWNERS NAME']
current_permit_fee =row['FEE']
current_house_street_address = pre_process_text(row['ST#']) + ' ' + pre_process_text(row['STREET'])
output['permit_number'].append(current_permit_number)
output['permit_date'].append(current_permit_date)
output['house_owner_name'].append(current_house_owner_name)
output['house_street_address'].append(current_house_street_address)
output['permit_description'].append(current_permit_description)
output['cost'].append(current_cost)
output['contractor_name'].append(current_contractor_name)
output['phone'].append(current_phone)
output['permit_fee'].append(current_permit_fee)
output_dataframe = pd.DataFrame(output)
我怀疑for
是否在序列内。同样不会很好地打印列,有人会知道如何提出这一点?