使用xlswriter在python中模拟自动拟合

时间:2019-08-27 16:40:55

标签: python xlsxwriter

我尝试遵循建议的here。但是,当我尝试以下任一功能时,似乎只有几列会生效:

def get_col_widths(dataframe):
    # First we find the maximum length of the index column   
    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
    # Then, we concatenate this to the max of the lengths of column name and its values for each column, left to right
    return [idx_max] + [max([len(str(s)) for s in dataframe[col].values] + [len(col)]) for col in dataframe.columns]

for i, width in enumerate(get_col_widths(df)):
    worksheet.set_column(i, i, width)

def set_column_width(df):
    length_list = [len(x) for x in df.columns]
    for i, width in enumerate(length_list):
        worksheet.set_column(i, i, width)

我可能没有正确使用上述功能。因此,这是我的整个代码:

import openpyxl
import sql_utils as sql
from datetime import date
from datetime import timedelta
from dateutil.relativedelta import relativedelta
from pandas.tseries.holiday import USFederalHolidayCalendar
import pyodbc
import pandas as pd
import datetime
import numpy as np
import xlsxwriter
from openpyxl import load_workbook
import sys
import win32com.client as win32
from win32com.client import Dispatch

cal = USFederalHolidayCalendar()

# pylint: disable=no-member
cnxn = pyodbc.connect(sql.connection_string)

# Set dates for the queries
cutoff_date = datetime.date(2019, 6, 30)

output_file = ".\\pa-jpm-reporting\\output\\jpm_morgans_report.xlsx"

if len(sys.argv) > 1:    
    cutoff_date = datetime.datetime.strptime(sys.argv[1], '%Y-%m-%d').date()
    output_file = sys.argv[2]

writer = pd.ExcelWriter(output_file)

def get_first_business_day_of_month(start_date, end_date):
    return [
        get_business_day(d).date()
        for d in pd.date_range(start_date, end_date, freq="BMS")
    ]

def get_business_day(date):
    while date.isoweekday() > 5 or date in cal.holidays():
        date += timedelta(days=1)
    return date

# Get as_of_date and archive_date
archive_date = get_first_business_day_of_month(cutoff_date, cutoff_date + relativedelta(days=+10))[0]
as_of_date = datetime.date(archive_date.year, 1, 1)

# Pull date
def get_sql(cutoff_date, archive_date, as_of_date):
    return sql.jpm_rate_query.format(cutoff_date, archive_date, as_of_date)

def get_sql2(utoff_date, archive_date, as_of_date):
    return sql.jpm_query.format(cutoff_date, archive_date, as_of_date)

# Get data into a pandas dataframe
def get_dataframe(cutoff_date, archive_date, as_of_date):
    cnxn.execute(get_sql(cutoff_date, archive_date, as_of_date)) 
    data = pd.read_sql(get_sql2(cutoff_date, archive_date, as_of_date), cnxn)
    return data

df = get_dataframe(cutoff_date, archive_date, as_of_date)
df['ModEffectiveDate'] = pd.to_datetime(df['ModEffectiveDate'])
df['StepDate1'] = pd.to_datetime(df['StepDate1'])

# Fix step date
def date_check(date1, date2):
    if date1 > date2:
        return 'X'
    else:
        return ' '

df['Step date prior to mod'] = df.apply(lambda x: date_check(x['ModEffectiveDate'], x['StepDate1']), axis=1)


# Fix step check 
cols = df.filter(regex='StepRate').columns
df['Later Step Rate not higher than previous'] = ' '
for i, col in enumerate(cols):
    if i <= len(cols) - 2:
        df['Later Step Rate not higher than previous'] = np.where(df[col] > df[cols[i+1]],'X',df['Later Step Rate not higher than previous'])
    else:
        break

# Format the Dates
df['CutoffDate'].astype('datetime64[ns]')
df['CutoffDate'] = pd.to_datetime(df['CutoffDate']).dt.strftime("%m/%d/%Y")
df['ModEffectiveDate'].astype('datetime64[ns]')
df['ModEffectiveDate'] = pd.to_datetime(df['ModEffectiveDate']).dt.strftime("%m/%d/%Y")
df['StepDate1'].astype('datetime64[ns]')
df['StepDate1'] = pd.to_datetime(df['StepDate1']).dt.strftime("%m/%d/%Y")
df = df.replace('NaT', '', regex=True)

# clean up (remove) zero step rates that follow the last true step
cols = df.filter(regex='StepRate').columns
for i, col in enumerate(cols):
    df[col] = df[col].replace(0, '', regex=True)


# remove repeated loan number shown immediately before the step dates
df = df.drop(['loanid'], axis=1)


# Add color to column
def highlight(s):
    same = s == df['CutoffDate']
    return ['background-color: lightblue' if x else '' for x in same] 
df.style.highlight_null(null_color='green')

df.to_excel(writer, index=False)

workbook = writer.book
worksheet = writer.sheets['Sheet1']

# Freeze 1st row
worksheet.freeze_panes(1,0)

# Place dollar amounts where relevant
num_format = workbook.add_format({'num_format': '#,##0.00'})
# Find the columns (numbers) with float64 type and set the number format
nametypemap = df.dtypes.apply(lambda x: x.name).to_dict()
for i, (k, v) in enumerate(nametypemap.items()):
    # print("index: {}, key: {}, value: {}".format(i, k, v))
    if v == 'float64':   
        worksheet.set_column(i, i, 12, num_format)


def get_col_widths(dataframe):
    # First we find the maximum length of the index column   
    idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
    # Then, we concatenate this to the max of the lengths of column name and its values for each column, left to right
    return [idx_max] + [max([len(str(s)) for s in dataframe[col].values] + [len(col)]) for col in dataframe.columns]

for i, width in enumerate(get_col_widths(df)):
    worksheet.set_column(i, i, width)

def set_column_width(df):
    length_list = [len(x) for x in df.columns]
    for i, width in enumerate(length_list):
        worksheet.set_column(i, i, width)

set_column_width(df)

writer.save()

请告知我是否有办法。我也很难在数据框中突出显示一列。但是我将为此另外发表一篇文章。

0 个答案:

没有答案