Pandas包提供DataFrame.to_html()方法。此方法以表格格式获取数据并将其显示为html表。
我想修改此方法的行为。但是,我不想更改包的源代码,我想扩展它。
为了澄清这一点,我想保持方法DataFrame.to_html()不变并创建一个新方法DataFrame.to_html2()。
这里的问题是,此方法的行为是在类的另一个方法中定义的,该类是DataFrame的间接超类。
这是结构:
类DataFrame
方法to_html#在我的脚本中调用此方法
类的实例DataFrameFormatter#此实例在方法to_html
中启动请忽略类别和DEF的压痕:
class DataFrame(NDFrame):
@Appender(fmt.docstring_to_string, indents=1)
def to_html(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, index_names=True,
justify=None, force_unicode=None, bold_rows=True,
classes=None, escape=True):
"""
to_html-specific options
bold_rows : boolean, default True
Make the row labels bold in the output
classes : str or list or tuple, default None
CSS class(es) to apply to the resulting html table
escape : boolean, default True
Convert the characters <, >, and & to HTML-safe sequences.
Render a DataFrame as an HTML table.
"""
import warnings
if force_unicode is not None: # pragma: no cover
warnings.warn("force_unicode is deprecated, it will have no "
"effect", FutureWarning)
if colSpace is not None: # pragma: no cover
warnings.warn("colSpace is deprecated, use col_space",
FutureWarning)
col_space = colSpace
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
col_space=col_space, na_rep=na_rep,
formatters=formatters,
float_format=float_format,
sparsify=sparsify,
justify=justify,
index_names=index_names,
header=header, index=index,
bold_rows=bold_rows,
escape=escape)
formatter.to_html(classes=classes)
if buf is None:
return formatter.buf.getvalue()
class DataFrameFormatter
类DataFrameFormatter的方法to_html
类的实例HTMLFormatter#此实例在方法to_html
中启动class DataFrameFormatter(TableFormatter):
def to_html(self, classes=None):
"""
Render a DataFrame to a html table.
"""
html_renderer = HTMLFormatter(self, classes=classes)
if hasattr(self.buf, 'write'):
html_renderer.write_result(self.buf)
elif isinstance(self.buf, basestring):
with open(self.buf, 'w') as f:
html_renderer.write_result(f)
else:
raise TypeError('buf is not a file name and it has no write '
' method')
def _get_formatted_column_labels(self):
from pandas.core.index import _sparsify
def is_numeric_dtype(dtype):
return issubclass(dtype.type, np.number)
if isinstance(self.columns, MultiIndex):
fmt_columns = self.columns.format(sparsify=False, adjoin=False)
fmt_columns = zip(*fmt_columns)
dtypes = self.frame.dtypes.values
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = zip(*[[' ' + y
if y not in self.formatters and need_leadsp[x]
else y for y in x]
for x in fmt_columns])
if self.sparsify:
str_columns = _sparsify(str_columns)
str_columns = [list(x) for x in zip(*str_columns)]
else:
fmt_columns = self.columns.format()
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [[' ' + x
if not self._get_formatter(i) and need_leadsp[x]
else x]
for i, (col, x) in
enumerate(zip(self.columns, fmt_columns))]
if self.show_index_names and self.has_index_names:
for x in str_columns:
x.append('')
return str_columns
类HTMLFormatter
方法write_td#这是我必须修改的方法
class HTMLFormatter(TableFormatter):
indent_delta = 2
def __init__(self, formatter, classes=None):
self.fmt = formatter
self.classes = classes
self.frame = self.fmt.frame
self.columns = formatter.columns
self.elements = []
self.bold_rows = self.fmt.kwds.get('bold_rows', False)
self.escape = self.fmt.kwds.get('escape', True)
def write(self, s, indent=0):
rs = com.pprint_thing(s)
self.elements.append(' ' * indent + rs)
def write_th(self, s, indent=0, tags=None):
if (self.fmt.col_space is not None
and self.fmt.col_space > 0):
tags = (tags or "")
tags += 'style="min-width: %s;"' % self.fmt.col_space
return self._write_cell(s, kind='th', indent=indent, tags=tags)
def write_td(self, s, indent=0, tags=None):
return self._write_cell(s, kind='td', indent=indent, tags=tags)
问题是关于 - 如何处理这个问题?
我正在考虑创建所涉及的每个类的第二个版本,并修改负责启动实例的代码的这一部分。
这是处理此问题的最有效方法吗?
答案 0 :(得分:2)
您可以在致电pandas.core.format.HTMLFormatter
时将to_html2
班级更改为您自己的班级:
import pandas as pd
import functools
class HTMLFormatter2(pd.core.format.HTMLFormatter):
def write_th(self, s, indent=0, tags=None):
super(HTMLFormatter2, self).write_th(s.upper(), indent, tags)
def replace_html_formatter(func, formatter):
@functools.wraps(func)
def wrapped_f(*args, **kw):
try:
old, pd.core.format.HTMLFormatter = pd.core.format.HTMLFormatter, formatter
return func(*args, **kw)
finally:
pd.core.format.HTMLFormatter = old
return wrapped_f
pd.DataFrame.to_html2 = replace_html_formatter(pd.DataFrame.to_html, HTMLFormatter2)
df = pd.DataFrame([[1,2,3],[4,5,6]], columns=["a", "b", "c"])
print df.to_html2()