我在Pipelines流程中的predict
期间遇到问题,每个管道步骤都有自定义类。
class MyFeatureSelector():
def __init__(self, features=5, method='pca'):
self.features = features
self.method = method
def fit(self, X, Y):
return self
def transform(self, X, Y=None):
try:
if self.features < X.shape[1]:
if self.method == 'pca':
selector = PCA(n_components=self.features)
elif self.method == 'rfe':
selector = RFE(estimator=LinearRegression(n_jobs=-1),
n_features_to_select=self.features,
step=1)
selector.fit(X, Y)
return selector.transform(X)
except Exception as err:
print('MyFeatureSelector.transform(): {}'.format(err))
return X
def fit_transform(self, X, Y=None):
self.fit(X, Y)
return self.transform(X, Y)
model = Pipeline([
("DATA_CLEANER", MyDataCleaner(demo='', mode='strict')),
("DATA_ENCODING", MyEncoder(encoder_name='code')),
("FEATURE_SELECTION", MyFeatureSelector(features=15, method='rfe')),
("HUBER_MODELLING", HuberRegressor())
])
因此,上面的代码在这里非常有用:
model.fit(X, _Y)
但我在这里有错误
prediction = model.predict(XT)
错误:形状(672,107)和(15)未对齐:107(暗淡1)!= 15(暗淡 0)
调试在此处显示此问题:selector.fit(X, Y)
,因为在MyFeatureSelector
步骤中创建了predict()
的新实例,此时Y
不存在。
我哪里错了?
答案 0 :(得分:2)
下面发布的工作版本:
class MyFeatureSelector():
def __init__(self, features=5, method='pca'):
self.features = features
self.method = method
self.selector = None
self.init_selector()
def init_selector():
if self.method == 'pca':
self.selector = PCA(n_components=self.features)
elif self.method == 'rfe':
self.selector = RFE(estimator=LinearRegression(n_jobs=-1),
n_features_to_select=self.features,
step=1)
def fit(self, X, Y):
return self
def transform(self, X, Y=None):
try:
if self.features < X.shape[1]:
if Y is not None:
self.selector.fit(X, Y)
return selector.transform(X)
except Exception as err:
print('MyFeatureSelector.transform(): {}'.format(err))
return X
def fit_transform(self, X, Y=None):
self.fit(X, Y)
return self.transform(X, Y)