我在下面收到以下错误。我正在尝试实现以下代码,并将返回的df排名中继到原始的Av_ret_rank df中,以得出:
Top1-XLE(1)
Top2-XLK(1)
其余所有列仅在该日期时间为0
我的目标是为每个datetimeindex行填充Av_ret_rank2 df:
XLP XLE XLV XLI XLB XLK IYT XLU
Date
2018-06-10 0 1 0 0 0 1 0 0
在Find names of top-n highest-value columns in each pandas dataframe row中
top1 top2 top3
id
1 p2 p4 p3
2 p4 p3 p2
3 p3 p4 p2
4 p2 p3 p1
5 p4 p3 p2
我的代码不起作用: 原始的Av_ret_rank df-
XLP XLE XLV XLI XLB XLK IYT XLU
Date
2018-06-10 -6.7 15.4 3.1 6.0 4.2 13.2 10.9 -2.3
2018-07-10 -1.7 11.4 3.7 0.3 3.2 13.2 2.2 2.3
........
我的代码:
nlargest=3
order= np.argsort(-Av_ret_rank.values,axis=1)[:,:nlargest]
result=pd.DataFrame(Av_ret_rank.columns[order],columns=['top{}'.format(i) for i in range(1,nlargest+1)],index=Av_ret_rank)
result
这是我的错误:
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
C:\ProgramData\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
398 if cls is not object \
399 and callable(cls.__dict__.get('__repr__')):
--> 400 return _repr_pprint(obj, self, cycle)
401
402 return _default_pprint(obj, self, cycle)
C:\ProgramData\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
693 """A pprint that just redirects to the normal repr function."""
694 # Find newlines and replace them with p.break_()
--> 695 output = repr(obj)
696 for idx,output_line in enumerate(output.splitlines()):
697 if idx:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\base.py in __repr__(self)
80 Yields Bytestring in Py2, Unicode String in py3.
81 """
---> 82 return str(self)
83
84
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\base.py in __str__(self)
59
60 if compat.PY3:
---> 61 return self.__unicode__()
62 return self.__bytes__()
63
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __unicode__(self)
661 width = None
662 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 663 line_width=width, show_dimensions=show_dimensions)
664
665 return buf.getvalue()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, line_width, max_rows, max_cols, show_dimensions)
1966 max_cols=max_cols,
1967 show_dimensions=show_dimensions)
-> 1968 formatter.to_string()
1969
1970 if buf is None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in to_string(self)
609 else:
610
--> 611 strcols = self._to_str_columns()
612 if self.line_width is None: # no need to wrap around just print
613 # the whole frame
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _to_str_columns(self)
518 # may include levels names also
519
--> 520 str_index = self._get_formatted_index(frame)
521
522 if not is_list_like(self.header) and not self.header:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _get_formatted_index(self, frame)
822 names=show_index_names, formatter=fmt)
823 else:
--> 824 fmt_index = [index.format(name=show_index_names, formatter=fmt)]
825 fmt_index = [tuple(_make_fixed_width(list(x), justify='left',
826 minimum=(self.col_space or 0),
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in format(self, name, formatter, **kwargs)
2363 return header + list(self.map(formatter))
2364
-> 2365 return self._format_with_header(header, **kwargs)
2366
2367 def _format_with_header(self, header, na_rep='NaN', **kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _format_with_header(self, header, na_rep, **kwargs)
2388
2389 else:
-> 2390 result = _trim_front(format_array(values, None, justify='left'))
2391 return header + result
2392
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal)
890 space=space, justify=justify, decimal=decimal)
891
--> 892 return fmt_obj.get_result()
893
894
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in get_result(self)
910
911 def get_result(self):
--> 912 fmt_values = self._format_strings()
913 return _make_fixed_width(fmt_values, self.justify)
914
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _format_strings(self)
1098 return [self.formatter(x) for x in self.values]
1099
-> 1100 return list(self.get_result_as_array())
1101
1102
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in get_result_as_array(self)
1062 float_format = lambda value: self.float_format % value
1063
-> 1064 formatted_values = format_values_with(float_format)
1065
1066 if not self.fixed_width:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in format_values_with(float_format)
1047
1048 if self.fixed_width:
-> 1049 return _trim_zeros(values, self.na_rep)
1050
1051 return values
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _trim_zeros(str_floats, na_rep)
1401 not (any(('e' in x) or ('E' in x) for x in non_na)))
1402
-> 1403 while _cond(trimmed):
1404 trimmed = [x[:-1] if x != na_rep else x for x in trimmed]
1405
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _cond(values)
1397
1398 def _cond(values):
-> 1399 non_na = [x for x in values if x != na_rep]
1400 return (len(non_na) > 0 and all(x.endswith('0') for x in non_na) and
1401 not (any(('e' in x) or ('E' in x) for x in non_na)))
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in <listcomp>(.0)
1397
1398 def _cond(values):
-> 1399 non_na = [x for x in values if x != na_rep]
1400 return (len(non_na) > 0 and all(x.endswith('0') for x in non_na) and
1401 not (any(('e' in x) or ('E' in x) for x in non_na)))
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in _repr_html_(self)
694
695 return self.to_html(max_rows=max_rows, max_cols=max_cols,
--> 696 show_dimensions=show_dimensions, notebook=True)
697 else:
698 return None
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in to_html(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, bold_rows, classes, escape, max_rows, max_cols, show_dimensions, notebook, decimal, border, table_id)
2032 decimal=decimal, table_id=table_id)
2033 # TODO: a generic formatter wld b in DataFrameFormatter
-> 2034 formatter.to_html(classes=classes, notebook=notebook, border=border)
2035
2036 if buf is None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in to_html(self, classes, notebook, border)
749 table_id=self.table_id)
750 if hasattr(self.buf, 'write'):
--> 751 html_renderer.write_result(self.buf)
752 elif isinstance(self.buf, compat.string_types):
753 with open(self.buf, 'w') as f:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\html.py in write_result(self, buf)
178 indent += self.indent_delta
179 indent = self._write_header(indent)
--> 180 indent = self._write_body(indent)
181
182 self.write('</table>', indent)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\html.py in _write_body(self, indent)
341 self._write_hierarchical_rows(fmt_values, indent)
342 else:
--> 343 self._write_regular_rows(fmt_values, indent)
344 else:
345 for i in range(min(len(self.frame), self.max_rows)):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\html.py in _write_regular_rows(self, fmt_values, indent)
363 index_values = self.fmt.tr_frame.index.map(fmt)
364 else:
--> 365 index_values = self.fmt.tr_frame.index.format()
366
367 row = []
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in format(self, name, formatter, **kwargs)
2363 return header + list(self.map(formatter))
2364
-> 2365 return self._format_with_header(header, **kwargs)
2366
2367 def _format_with_header(self, header, na_rep='NaN', **kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _format_with_header(self, header, na_rep, **kwargs)
2388
2389 else:
-> 2390 result = _trim_front(format_array(values, None, justify='left'))
2391 return header + result
2392
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify, decimal)
890 space=space, justify=justify, decimal=decimal)
891
--> 892 return fmt_obj.get_result()
893
894
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in get_result(self)
910
911 def get_result(self):
--> 912 fmt_values = self._format_strings()
913 return _make_fixed_width(fmt_values, self.justify)
914
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _format_strings(self)
1098 return [self.formatter(x) for x in self.values]
1099
-> 1100 return list(self.get_result_as_array())
1101
1102
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in get_result_as_array(self)
1062 float_format = lambda value: self.float_format % value
1063
-> 1064 formatted_values = format_values_with(float_format)
1065
1066 if not self.fixed_width:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in format_values_with(float_format)
1047
1048 if self.fixed_width:
-> 1049 return _trim_zeros(values, self.na_rep)
1050
1051 return values
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _trim_zeros(str_floats, na_rep)
1401 not (any(('e' in x) or ('E' in x) for x in non_na)))
1402
-> 1403 while _cond(trimmed):
1404 trimmed = [x[:-1] if x != na_rep else x for x in trimmed]
1405
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in _cond(values)
1397
1398 def _cond(values):
-> 1399 non_na = [x for x in values if x != na_rep]
1400 return (len(non_na) > 0 and all(x.endswith('0') for x in non_na) and
1401 not (any(('e' in x) or ('E' in x) for x in non_na)))
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py in <listcomp>(.0)
1397
1398 def _cond(values):
-> 1399 non_na = [x for x in values if x != na_rep]
1400 return (len(non_na) > 0 and all(x.endswith('0') for x in non_na) and
1401 not (any(('e' in x) or ('E' in x) for x in non_na)))
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
答案 0 :(得分:0)
您参考的问题中提出的解决方案效果很好。随后只有2个步骤:
sklearn
库进行一次热编码; 第二步由于某种原因,您需要在第一步中存储“前3个”值。这是一个工作示例:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
df = pd.DataFrame({'XLP': [-6.7, 1.7], 'XLE': [15.4, 11.4],
'XLV': [3.1, 3.7], 'XLI': [6.0, 0.3],
'XLB': [4.2, 3.2], 'XLK': [13.2, 13.2],
'IYT': [10.9, 2.2], 'XLU': [-2.3, 2.3]},
index=['2018-06-10', '2018-07-10'])
nlargest = 3
order = np.argsort(-df.values, axis=1)[:, :nlargest]
result = pd.DataFrame(df.columns[order],
columns=['top{}'.format(i) for i in range(1, nlargest+1)],
index=df.index)
mlb = MultiLabelBinarizer()
res = pd.DataFrame(mlb.fit_transform(result.iloc[:, :2].values),
columns=mlb.classes_,
index=result.index)
res = res.reindex(df.columns, axis=1).fillna(0).astype(int)
print(res)
XLP XLE XLV XLI XLB XLK IYT XLU
2018-06-10 0 1 0 0 0 1 0 0
2018-07-10 0 1 0 0 0 1 0 0