我有三个功能:
feature_one -> number of tokens in the given sentence.
feature_two -> number of verbs in the given sentence.
feature_three -> number of tokens - number of verbs in the given sentence.
(feature_one - feature_two)
我为feature_one
和feature_two
编写了自定义转换器,并希望为feature_three
编写自定义转换器,以便我可以使用feature_one
和feature_two
的结果通过运行管道:
Pipeline([
#input to feature_one and feature_two is list of sentences.
("feature", FeatureUnion([
("feature_one", feature_one_transformer()),
("feature_two", feature_two_transformer())
])),
("feature_three", feature_three_transformer())
])
feature_one_transformer:
class feature_one_transformer(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, x, y):
return self
def transform(self, sentence_list):
number_of_tokens_in_sentence_list = list()
for sentence in sentence_list:
number_of_tokens = compute_number_of_tokens
number_of_tokens_in_sentence_lista.append(number_of_tokens)
return pandas.DataFrame(number_of_tokens_in_sentence_list)
feature_two_transformer:
class feature_two_transformer(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, x, y):
return self
def transform(self, sentence_list):
number_of_verbs_in_sentence_list = list()
for sentence in sentence_list:
number_of_verbs = compute_number_of_verbs_in_sentence
number_of_verbs_in_sentence_lista.append(number_of_verbs)
return pandas.DataFrame(number_of_verbs_in_sentence_list)
有人可以告诉我如何为feature_three编写自定义变换器以及如何在管道中使用,以便我可以使用feature_one和feature_two变换器的结果。谢谢。
答案 0 :(得分:1)
我不清楚你为什么要这么复杂。我只会使用一台变压器来完成你想要的一切。像这样:
class features_transformer(BaseEstimator, TransformerMixin):
def __init__(self, variable):
self.variable = variable
def fit(self, X):
return self
def transform(self, X):
X['number_of_tokens'] = X[self.variable].apply(lambda cell: compute_number_of_tokens(cell))
X['number_of_verbs'] = X[self.variable].apply(lambda cell: compute_number_of_verbs(cell))
X['tokens_minus_verbs'] = X['number_of_tokens'] - X['number_of_verbs']
return X
new_X = features_transformer('sentences').fit_transform(X)