该函数可将数据帧df的行和列的总数相加,并确定变量的哪一列
def Find_Total_CrossTab(df,Variable_name_index):
df.loc[:,'Total'] = df.sum(numeric_only=True, axis=1)
df=df.reset_index()
df=df.append(df.sum(), ignore_index=True).set_index(Variable_name_index)
return df
这将继续返回错误代码:<built-in function _abc_instancecheck> returned a result with an error set
。有什么我可以解决的吗?
我的数据集非常大。我一直在使用交叉列表,因此使用了以下内容:
# Defining the dataframe
df_1=pd.crosstab(index=df["Interval of Sales"], columns=df['Type of Customer'])
# Defining the index
name_of_index='Interval of Sales'
# Using the function
df_Cross_Table=Find_Total_CrossTab(df_1, name_of_index)
df_Cross_Table
我得到的错误代码是:
TypeError Traceback (most recent call last)
TypeError: unsupported operand type(s) for +: 'pandas._libs.interval.Interval' and 'pandas._libs.interval.Interval'
The above exception was the direct cause of the following exception:
SystemError Traceback (most recent call last)
pandas/_libs/interval.pyx in pandas._libs.interval.Interval.__add__()
~/opt/anaconda3/lib/python3.8/abc.py in __instancecheck__(cls, instance)
97 """Override for isinstance(instance, cls)."""
---> 98 return _abc_instancecheck(cls, instance)
99
SystemError: <built-in function _abc_instancecheck> returned a result with an error set
The above exception was the direct cause of the following exception:
SystemError Traceback (most recent call last)
pandas/_libs/interval.pyx in pandas._libs.interval.Interval.__add__()
~/opt/anaconda3/lib/python3.8/abc.py in __instancecheck__(cls, instance)
97 """Override for isinstance(instance, cls)."""
---> 98 return _abc_instancecheck(cls, instance)
99
SystemError: <built-in function _abc_instancecheck> returned a result with an error set
The above exception was the direct cause of the following exception:
SystemError Traceback (most recent call last)
<ipython-input-33-bbc580caf84c> in <module>
6
7 # Using the function
----> 8 df_Cross_Table=Find_Total_CrossTab(df_1, name_of_index)
9 df_Cross_Table
<ipython-input-32-188c1525f216> in Find_Total_CrossTab(df, Variable_name_index)
3 df.loc[:,'Total'] = df.sum(numeric_only=True, axis=1)
4 df=df.reset_index()
----> 5 df=df.append(df.sum(), ignore_index=True).set_index(Variable_name_index)
6 return df
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py in stat_func(self, axis, skipna, level, numeric_only, min_count, **kwargs)
11174 name, axis=axis, level=level, skipna=skipna, min_count=min_count
11175 )
> 11176 return self._reduce(
11177 f,
11178 name,
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in _reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
7911 values = self.values
7912 try:
-> 7913 result = f(values)
7914
7915 if filter_type == "bool" and is_object_dtype(values) and axis is None:
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in f(x)
7863
7864 def f(x):
-> 7865 return op(x, axis=axis, skipna=skipna, **kwds)
7866
7867 def _get_data(axis_matters):
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/nanops.py in _f(*args, **kwargs)
67 try:
68 with np.errstate(invalid="ignore"):
---> 69 return f(*args, **kwargs)
70 except ValueError as e:
71 # we want to transform an object array
~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/nanops.py in nansum(values, axis, skipna, min_count, mask)
491 elif is_timedelta64_dtype(dtype):
492 dtype_sum = np.float64
--> 493 the_sum = values.sum(axis, dtype=dtype_sum)
494 the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count)
495
~/opt/anaconda3/lib/python3.8/site-packages/numpy/core/_methods.py in _sum(a, axis, dtype, out, keepdims, initial, where)
36 def _sum(a, axis=None, dtype=None, out=None, keepdims=False,
37 initial=_NoValue, where=True):
---> 38 return umr_sum(a, axis, dtype, out, keepdims, initial, where)
39
40 def _prod(a, axis=None, dtype=None, out=None, keepdims=False,
pandas/_libs/interval.pyx in pandas._libs.interval.Interval.__add__()
~/opt/anaconda3/lib/python3.8/abc.py in __instancecheck__(cls, instance)
96 def __instancecheck__(cls, instance):
97 """Override for isinstance(instance, cls)."""
---> 98 return _abc_instancecheck(cls, instance)
99
100 def __subclasscheck__(cls, subclass):
SystemError: <built-in function _abc_instancecheck> returned a result with an error set
答案 0 :(得分:0)
我使用一些示例数据运行了代码,并且对我有用。看看我做了什么。您能否提供数据集样本并提及如何调用该函数?我假设Variable_name_index
应该是带有标题名称的列表。如果不能,您能否说出您的打算?
import pandas as pd
df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
'num_wings': [2, 0, 0, 0],
'num_specimen_seen': [10, 2, 1, 8]},
index=['falcon', 'dog', 'spider', 'fish'])
df
Out[1]
num_legs num_wings num_specimen_seen Total
falcon 2 2 10 14
dog 4 0 2 6
spider 8 0 1 9
fish 0 0 8 8
def Find_Total_CrossTab(df,Variable_name_index):
df.loc[:,'Total'] = df.sum(numeric_only=True, axis=1)
df=df.reset_index()
df=df.append(df.sum(), ignore_index=True).set_index(Variable_name_index)
return df
运行它后,将得到以下输出:
Find_Total_CrossTab(df, ['num_legs', 'num_wings'])
Out[2]:
index num_specimen_seen Total
num_legs num_wings
2 2 falcon 10 14
4 0 dog 2 6
8 0 spider 1 9
0 0 fish 8 8
14 2 falcondogspiderfish 21 37