Question

我为功能联合定义了一个类。 python 2.7抱怨＆＃34; AttributeError：＆＃39; module＆＃39; object没有属性＆＃34; TextTransformer＆＃34;。代码可以在Kaggle平台上运行，但不能在我的本地ipython上运行。

from sklearn.base import BaseEstimator, TransformerMixin
class TextTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, key):
        self.key = key
    def fit(self, x, y=None):
        return self
    def transform(self, data_dict):
        return data_dict[self.key].apply(str)

rfr = RandomForestRegressor()
tfidf = TfidfVectorizer()
tsvd = TruncatedSVD(n_components=10)
clf = pipeline.Pipeline([
    ('union', FeatureUnion(
                transformer_list = [
                    ('txt1', pipeline.Pipeline([('s1', TextTransformer(key='search_term')), ('tfidf1', tfidf), ('tsvd1', tsvd)])),
                    ('txt2', pipeline.Pipeline([('s2', TextTransformer(key='product_title')), ('tfidf2', tfidf), ('tsvd2', tsvd)])),
                    ('txt3', pipeline.Pipeline([('s3', TextTransformer(key='product_description')), ('tfidf3', tfidf), ('tsvd3', tsvd)])),
                    ('txt4', pipeline.Pipeline([('s4', TextTransformer(key='brand')), ('tfidf4', tfidf), ('tsvd4', tsvd)]))
                    ],
                transformer_weights = {
                    'txt1': 0.5,
                    'txt2': 0.25,
                    'txt3': 0.25,
                    'txt4': 0.5
                    },
            n_jobs = -1
            )), 
    ('rfr', rfr)])
param_grid = {'rfr__max_features': [10], 'rfr__max_depth': [20]}
model = grid_search.GridSearchCV(estimator = clf, param_grid = param_grid,n_jobs = -1, cv = 10)         
model.fit(X_train, y_train)

Answer 1

你可能忘记了一些导入。试试这个，它对我有用。

from sklearn.base import TransformerMixin
from sklearn.ensemble  import  RandomForestRegressor
from sklearn.feature_extraction import *
from sklearn.feature_extraction.text import *
from sklearn.decomposition import  *
from sklearn.pipeline import *
from sklearn.grid_search import *

class TextTransformer(TransformerMixin):
    def __init__(self, key):
        self.key = key

    def fit(self, x, y=None):
        return self

    def transform(self, data_dict):
        return data_dict[self.key].apply(str)

rfr = RandomForestRegressor()
tfidf = TfidfVectorizer()
tsvd = TruncatedSVD(n_components=10)
clf = Pipeline([
    ('union', FeatureUnion(
                transformer_list = [
                    ('txt1', Pipeline([('s1', TextTransformer(key='search_term')), ('tfidf1', tfidf), ('tsvd1', tsvd)])),
                    ('txt2', Pipeline([('s2', TextTransformer(key='product_title')), ('tfidf2', tfidf), ('tsvd2', tsvd)])),
                    ('txt3', Pipeline([('s3', TextTransformer(key='product_description')), ('tfidf3', tfidf), ('tsvd3', tsvd)])),
                    ('txt4', Pipeline([('s4', TextTransformer(key='brand')), ('tfidf4', tfidf), ('tsvd4', tsvd)]))
                    ],
                transformer_weights = {
                    'txt1': 0.5,
                    'txt2': 0.25,
                    'txt3': 0.25,
                    'txt4': 0.5
                    },
            n_jobs = -1
            )), 
    ('rfr', rfr)])
param_grid = {'rfr__max_features': [10], 'rfr__max_depth': [20]}
model = GridSearchCV(estimator = clf, param_grid = param_grid,n_jobs = -1, cv = 10)         
model.fit(X_train, y_train)

运行自定义函数显示＆＃34; AttributeError：＆＃39; module＆＃39;对象没有属性＆＃34;

1 个答案: