
时间:2020-06-23 13:29:02

标签: python pandas machine-learning scikit-learn


Exception: Data must be 1-dimensional



from sklearn.tree import DecisionTreeRegressor
from nonconformist.cp import IcpRegressor
from nonconformist.base import RegressorAdapter
from nonconformist.nc import RegressorNc, AbsErrorErrFunc, RegressorNormalizer, NcFactory
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# -----------------------------------------------------------------------------
# Setup training, calibration and test data
# -----------------------------------------------------------------------------
df = pd.read_csv ("prepared_data.csv")

# Initial split into train/test data
train = df.loc[df['split']== 'train']
valid = df.loc[df['split']== 'valid']

# Proper Validation Set (Split the Validation set into features and target)
X_valid = valid.drop(['expression'], axis = 1)
y_valid = valid.drop(columns = ['new_host', 'split', 'sequence'])

# Create Training Set (Split the Training set into features and target)
X_train = valid.drop(['expression'], axis = 1)
y_train = valid.drop(columns = ['new_host', 'split', 'sequence'])

# Split Training set into further training set and calibration set
X_train, X_cal, y_train, y_cal = train_test_split(X_train, y_train, test_size =0.2)

# -----------------------------------------------------------------------------
# Train and calibrate underlying model
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
print("Underlying model loaded")
model = RegressorAdapter(underlying_model)
nc = RegressorNc(model, AbsErrorErrFunc())

print("Nonconformity Function Applied")
icp = IcpRegressor(nc)  # Create an inductive conformal Regressor
print("ICP Regressor Created")

#Dataset Review
print('{} instances, {} features, {} classes'.format(y_train.size,

icp.fit(X_train, y_train)
icp.calibrate(X_cal, y_cal)


new_host  split     sequence    expression
FALSE     train     AQVPYGVS    0.039267878
FALSE     train     ASVPYGVSI   0.039267878
FALSE     train     STNLYGSGR   0.261456561
FALSE     valid     NLYGSGLVR   0.265188519
FALSE     valid     SLGPSNLYG   0.419680588
FALSE     valid     ATSLGTTNG   0.145710993



Traceback (most recent call last)
<ipython-input-68-083e5dd0b0b6> in <module>
      4 print(type(y_cal))
      5 print(y_cal.index)
----> 6 icp.calibrate(X_cal, y_cal)
      7 print("ICP Calibrated")

~/.local/lib/python3.8/site-packages/nonconformist/icp.py in calibrate(self, x, y, increment)
    102                 else:
    103                         self.categories = np.array([0])
--> 104                         cal_scores = self.nc_function.score(self.cal_x, self.cal_y)
    105                         self.cal_scores = {0: np.sort(cal_scores)[::-1]}

~/.local/lib/python3.8/site-packages/nonconformist/nc.py in score(self, x, y)
    370                         norm = np.ones(n_test)
--> 372                 return self.err_func.apply(prediction, y) / norm

~/.local/lib/python3.8/site-packages/nonconformist/nc.py in apply(self, prediction, y)
    157         def apply(self, prediction, y):
--> 158                 return np.abs(prediction - y)
    160         def apply_inverse(self, nc, significance):

~/.local/lib/python3.8/site-packages/pandas/core/series.py in __array_ufunc__(self, ufunc, method, *inputs, **kwargs)
    634         # for binary ops, use our custom dunder methods
--> 635         result = ops.maybe_dispatch_ufunc_to_dunder_op(
    636             self, ufunc, method, *inputs, **kwargs
    637         )

pandas/_libs/ops_dispatch.pyx in pandas._libs.ops_dispatch.maybe_dispatch_ufunc_to_dunder_op()

~/.local/lib/python3.8/site-packages/pandas/core/ops/common.py in new_method(self, other)
     62         other = item_from_zerodim(other)
---> 64         return method(self, other)
     66     return new_method

~/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py in wrapper(left, right)
    503         result = arithmetic_op(lvalues, rvalues, op, str_rep)
--> 505         return _construct_result(left, result, index=left.index, name=res_name)
    507     wrapper.__name__ = op_name

~/.local/lib/python3.8/site-packages/pandas/core/ops/__init__.py in _construct_result(left, result, index, name)
    476     # We do not pass dtype to ensure that the Series constructor
    477     #  does inference in the case where `result` has object-dtype.
--> 478     out = left._constructor(result, index=index)
    479     out = out.__finalize__(left)

~/.local/lib/python3.8/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    303                     data = data.copy()
    304             else:
--> 305                 data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
    307                 data = SingleBlockManager(data, index, fastpath=True)

~/.local/lib/python3.8/site-packages/pandas/core/construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure)
    480     elif subarr.ndim > 1:
    481         if isinstance(data, np.ndarray):
--> 482             raise Exception("Data must be 1-dimensional")
    483         else:
    484             subarr = com.asarray_tuplesafe(data, dtype=dtype)

Exception: Data must be 1-dimensional

1 个答案:

答案 0 :(得分:1)

pandas.DataFrame.drop()返回本质上是二维的pandas.DataFrame对象。因此,当您分配y_train = valid.drop()时,您仍然拥有一个二维数组(尽管仅包含1列)。另一方面,pandas.Series对象是一维的,您可以通过引用特定的列来获得pandas.Series(即valid['expression']将返回一维的pandas.Series)。

y_train = valid.drop()更改为y_train = valid['expression'],应该没问题。

另外,仅供参考,您正在使用 valid DataFrame作为X_train,y_train(我想您可能想使用 train DataFrame)