Question

我需要更新现有的Python Pandas连接脚本，该脚本当前使用列映射（字典）连接30多个MS Excel电子表格。当前，该脚本仅使用1个选项卡进行连接，该选项卡在pd.read_excel语句中命名。我需要保持完整的列映射，并将另一个选项卡中的另一列引入整个串联中，所以我想我需要解析数据框中的工作表。我意识到这可能很简单，但仍然可以拔掉头发。现有的Excel选项卡名称为“贷款水平明细”，我需要从另一个选项卡中引入另一列（“还款日期”）（“ S214每日收益”）。当前代码发布在下面。预先感谢您提供的任何帮助。

import os, pandas as pd

os.chdir(r'M:\Operations\ABC Database\Accounting FMV Recon\Monthly Report Templates Input\Servicer Reports\201908\XYZ')
df_list = []

col_map = {'XYZ Msp Bank': 'XYZMspBank',
           'XYZ Loan No' : 'XYZLoanNo',
           'Prior Loan' : 'AOLoanNo',
           'Borrowers Name' : 'BorrName',
           'MBA Delinquency Status' : 'MBADelqStatus',
           'Note Prin' : 'NotePrincipal',
           'Interest Rate' : 'IntRate',
           'Current P&I' : 'CurrentPI',
           'Beginning UPB' : 'BegUPB',
           'Ending UPB' : 'EndUPB',
           'Ending Deferred Principal' : 'EndDefPrincipal',
           'Boarded Principal' : 'BoardedPrincipal',
           'UPB Transfer In' : 'UPBTransferIn',
           'UPB Transfer Out' : 'UPBTransferOut',
           'Non-Monetary Adjustments' : 'NonMonetaryAdj',
           'Next Payment' : 'NextPayment',
           'S215 Principal Collected' : 'S215PrinCollected',
           'S215 Interest Collected' : 'S215IntCollected',
           'S213 Curtailment Collected' : 'S213CurtCollected',
           'S214 PIF Principal Collected' : 'S214PIFPrinCollected',
           'S214 PIF Servicing Fee Collected' : 'S214PIFServicingFeeCollected',
           'S214 PIF Interest Collected' : 'S214PIFIntCollected',
           'S214 PIF Prepmt Penalty Collected' : 'S214PIFPrepmtPenaltyCollected',
           'S214 Daily Payoffs (Non Securitized)' : 'S214DailyPayoffsNonSec',
           'S214 Daily Payoffs (Securitized)' : 'S214DailyPayoffsSec',
           'Total Remittance' : 'TotalRemittance'
           }

for f in os.listdir():

    temp = pd.read_excel(f, sheet_name='Loan Level Detail')
    temp = temp[temp['ZYZ Loan No'].notnull()]
    temp = temp[col_map.keys()]
    temp.rename(columns=col_map, inplace=True)
    temp['SourceFile'] = f
    df_list.append(temp)

df = pd.concat(df_list, ignore_index=True)
df.drop_duplicates(inplace=True)
df['ReportDate'] = 201908
df.to_excel(r'M:\Operations\ABC Database\Accounting FMV Recon\Monthly Report Templates Output\Servicer Reports\XYZ Concatenated 201908.xlsx', index=False)

Answer 1

大致使用未经测试的伪代码（因为您没有要复制的数据）...

在循环之内

# existing code
temp = pd.read_excel(f, sheet_name='Loan Level Detail')
temp = temp[temp['ZYZ Loan No'].notnull()]
temp = temp[col_map.keys()]
temp.rename(columns=col_map, inplace=True)
temp['SourceFile'] = f

# Following the same approach for a second sheet
col_map2 = {
       'XYZ Loan No' : 'XYZLoanNo',
       'Payoff Date' : 'PayoffDate'}
temp2 = pd.read_excel(f, sheet_name='S214 Daily Payoffs')
temp2 = temp2[temp2['ZYZ Loan No'].notnull()]
temp2 = temp2[col_map2.keys]
temp2.rename(columns=col_map2, inplace=True)


# assume that 'ZYZ Loan No' appears in both sheets, so join
temp3 = temp.merge(temp2, how='left', on='ZYZ Loan No')

df_list.append(temp3)

具有2个电子表格标签的Python串联

1 个答案: