我需要更新现有的Python Pandas连接脚本,该脚本当前使用列映射(字典)连接30多个MS Excel电子表格。当前,该脚本仅使用1个选项卡进行连接,该选项卡在pd.read_excel语句中命名。我需要保持完整的列映射,并将另一个选项卡中的另一列引入整个串联中,所以我想我需要解析数据框中的工作表。我意识到这可能很简单,但仍然可以拔掉头发。现有的Excel选项卡名称为“贷款水平明细”,我需要从另一个选项卡中引入另一列(“还款日期”)(“ S214每日收益”)。当前代码发布在下面。预先感谢您提供的任何帮助。
import os, pandas as pd
os.chdir(r'M:\Operations\ABC Database\Accounting FMV Recon\Monthly Report Templates Input\Servicer Reports\201908\XYZ')
df_list = []
col_map = {'XYZ Msp Bank': 'XYZMspBank',
'XYZ Loan No' : 'XYZLoanNo',
'Prior Loan' : 'AOLoanNo',
'Borrowers Name' : 'BorrName',
'MBA Delinquency Status' : 'MBADelqStatus',
'Note Prin' : 'NotePrincipal',
'Interest Rate' : 'IntRate',
'Current P&I' : 'CurrentPI',
'Beginning UPB' : 'BegUPB',
'Ending UPB' : 'EndUPB',
'Ending Deferred Principal' : 'EndDefPrincipal',
'Boarded Principal' : 'BoardedPrincipal',
'UPB Transfer In' : 'UPBTransferIn',
'UPB Transfer Out' : 'UPBTransferOut',
'Non-Monetary Adjustments' : 'NonMonetaryAdj',
'Next Payment' : 'NextPayment',
'S215 Principal Collected' : 'S215PrinCollected',
'S215 Interest Collected' : 'S215IntCollected',
'S213 Curtailment Collected' : 'S213CurtCollected',
'S214 PIF Principal Collected' : 'S214PIFPrinCollected',
'S214 PIF Servicing Fee Collected' : 'S214PIFServicingFeeCollected',
'S214 PIF Interest Collected' : 'S214PIFIntCollected',
'S214 PIF Prepmt Penalty Collected' : 'S214PIFPrepmtPenaltyCollected',
'S214 Daily Payoffs (Non Securitized)' : 'S214DailyPayoffsNonSec',
'S214 Daily Payoffs (Securitized)' : 'S214DailyPayoffsSec',
'Total Remittance' : 'TotalRemittance'
}
for f in os.listdir():
temp = pd.read_excel(f, sheet_name='Loan Level Detail')
temp = temp[temp['ZYZ Loan No'].notnull()]
temp = temp[col_map.keys()]
temp.rename(columns=col_map, inplace=True)
temp['SourceFile'] = f
df_list.append(temp)
df = pd.concat(df_list, ignore_index=True)
df.drop_duplicates(inplace=True)
df['ReportDate'] = 201908
df.to_excel(r'M:\Operations\ABC Database\Accounting FMV Recon\Monthly Report Templates Output\Servicer Reports\XYZ Concatenated 201908.xlsx', index=False)
答案 0 :(得分:0)
大致使用未经测试的伪代码(因为您没有要复制的数据)...
在循环之内
# existing code
temp = pd.read_excel(f, sheet_name='Loan Level Detail')
temp = temp[temp['ZYZ Loan No'].notnull()]
temp = temp[col_map.keys()]
temp.rename(columns=col_map, inplace=True)
temp['SourceFile'] = f
# Following the same approach for a second sheet
col_map2 = {
'XYZ Loan No' : 'XYZLoanNo',
'Payoff Date' : 'PayoffDate'}
temp2 = pd.read_excel(f, sheet_name='S214 Daily Payoffs')
temp2 = temp2[temp2['ZYZ Loan No'].notnull()]
temp2 = temp2[col_map2.keys]
temp2.rename(columns=col_map2, inplace=True)
# assume that 'ZYZ Loan No' appears in both sheets, so join
temp3 = temp.merge(temp2, how='left', on='ZYZ Loan No')
df_list.append(temp3)