我正在尝试根据数据确定模型的保形预测。但这给了我在icp.calibrate发生的以下错误:
Exception: Data must be 1-dimensional
下面您可以找到有关此问题的最新追溯错误。不幸的是,根据上面的代码,我不确定这实际上是什么。我为此使用了一个熊猫数据框。
代码:
from sklearn.tree import DecisionTreeRegressor
from nonconformist.cp import IcpRegressor
from nonconformist.base import RegressorAdapter
from nonconformist.nc import RegressorNc, AbsErrorErrFunc, RegressorNormalizer, NcFactory
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
# -----------------------------------------------------------------------------
# Setup training, calibration and test data
# -----------------------------------------------------------------------------
df = pd.read_csv ("prepared_data.csv")
# Initial split into train/test data
train = df.loc[df['split']== 'train']
valid = df.loc[df['split']== 'valid']
# Proper Validation Set (Split the Validation set into features and target)
X_valid = valid.drop(['expression'], axis = 1)
y_valid = valid.drop(columns = ['new_host', 'split', 'sequence'])
# Create Training Set (Split the Training set into features and target)
X_train = valid.drop(['expression'], axis = 1)
y_train = valid.drop(columns = ['new_host', 'split', 'sequence'])
# Split Training set into further training set and calibration set
X_train, X_cal, y_train, y_cal = train_test_split(X_train, y_train, test_size =0.2)
# -----------------------------------------------------------------------------
# Train and calibrate underlying model
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
print("Underlying model loaded")
model = RegressorAdapter(underlying_model)
nc = RegressorNc(model, AbsErrorErrFunc())
print("Nonconformity Function Applied")
icp = IcpRegressor(nc) # Create an inductive conformal Regressor
print("ICP Regressor Created")
#Dataset Review
print('{} instances, {} features, {} classes'.format(y_train.size,
X_train.shape[1],
np.unique(y_train).size))
icp.fit(X_train, y_train)
icp.calibrate(X_cal, y_cal)
#Example数据框
new_host split sequence expression
FALSE train AQVPYGVS 0.039267878
FALSE train ASVPYGVSI 0.039267878
FALSE train STNLYGSGR 0.261456561
FALSE valid NLYGSGLVR 0.265188519
FALSE valid SLGPSNLYG 0.419680588
FALSE valid ATSLGTTNG 0.145710993
我曾尝试以多种方式拆分数据集,但与此同时,我仍然遇到麻烦。在这种情况下,我想根据观察值的“数据拆分”值将数据拆分为训练集和测试集。之后,我将在第二步中将火车设置为火车和校准。我的功能X_train和我的目标y_train
#Traceback错误
Traceback (most recent call last)
<ipython-input-68-083e5dd0b0b6> in <module>
4 print(type(y_cal))
5 print(y_cal.index)
----> 6 icp.calibrate(X_cal, y_cal)
7 print("ICP Calibrated")
~/.local/lib/python3.8/site-packages/nonconformist/icp.py in calibrate(self, x, y, increment)
102 else:
103 self.categories = np.array([0])
--> 104 cal_scores = self.nc_function.score(self.cal_x, self.cal_y)
105 self.cal_scores = {0: np.sort(cal_scores)[::-1]}
106
~/.local/lib/python3.8/site-packages/nonconformist/nc.py in score(self, x, y)
370 norm = np.ones(n_test)
371
--> 372 return self.err_func.apply(prediction, y) / norm
373
374
~/.local/lib/python3.8/site-packages/nonconformist/nc.py in apply(self, prediction, y)
156
157 def apply(self, prediction, y):
--> 158 return np.abs(prediction - y)
159
160 def apply_inverse(self, nc, significance):
~/.local/lib/python3.8/site-packages/pandas/core/series.py in __array_ufunc__(self, ufunc, method, *inputs, **kwargs)
633
634 # for binary ops, use our custom dunder methods
--> 635 result = ops.maybe_dispatch_ufunc_to_dunder_op(
636 self, ufunc, method, *inputs, **kwargs
637 )
pandas/_libs/ops_dispatch.pyx in pandas._libs.ops_dispatch.maybe_dispatch_ufunc_to_dunder_op()
~/.local/lib/python3.8/site-packages/pandas/core/ops/common.py in new_method(self, other)
62 other = item_from_zerodim(other)
63
---> 64 return method(self, other)
65
66 return new_method
~/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py in wrapper(left, right)
503 result = arithmetic_op(lvalues, rvalues, op, str_rep)
504
--> 505 return _construct_result(left, result, index=left.index, name=res_name)
506
507 wrapper.__name__ = op_name
~/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py in _construct_result(left, result, index, name)
476 # We do not pass dtype to ensure that the Series constructor
477 # does inference in the case where `result` has object-dtype.
--> 478 out = left._constructor(result, index=index)
479 out = out.__finalize__(left)
480
~/.local/lib/python3.8/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
303 data = data.copy()
304 else:
--> 305 data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
306
307 data = SingleBlockManager(data, index, fastpath=True)
~/.local/lib/python3.8/site-packages/pandas/core/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure)
480 elif subarr.ndim > 1:
481 if isinstance(data, np.ndarray):
--> 482 raise Exception("Data must be 1-dimensional")
483 else:
484 subarr = com.asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
答案 0 :(得分:1)
pandas.DataFrame.drop()返回本质上是二维的pandas.DataFrame对象。因此,当您分配y_train = valid.drop()
时,您仍然拥有一个二维数组(尽管仅包含1列)。另一方面,pandas.Series对象是一维的,您可以通过引用特定的列来获得pandas.Series(即valid['expression']
将返回一维的pandas.Series)。
将y_train = valid.drop()
更改为y_train = valid['expression']
,应该没问题。
另外,仅供参考,您正在使用 valid DataFrame作为X_train,y_train(我想您可能想使用 train DataFrame)