Question

尝试使用Sci-Kit Learn和Tensorflow完成动手ML的第2章中的项目时，在构建模型之前尝试通过管道运行数据时遇到类型错误。

我不断收到TypeError告诉我fit_transform（）接受2个位置参数，但给出了3个。不确定我做错了什么，因为我会尽我最大的努力进行练习。请告知，让我知道，或者在我尝试遵循产生错误的最小代码量时，是否需要更多信息。感谢您可能提供的见解。

代码如下

#Load the Data
import os
import tarfile
from six.moves import urllib
download_root = "https://raw.githubusercontent.com/ageron/handson-ml/master/"
housing_path = os.path.join('datasets', 'housing')
housing_url = download_root + "datasets/housing/housing.tgz"
def fetch_housing_data(housing_url=housing_url, housing_path = housing_path):
    if not os.path.isdir(housing_path):
        os.makedirs(housing_path)
    tgz_path = os.path.join(housing_path, 'housing.tgz')
    urllib.request.urlretrieve(housing_url, tgz_path)
    housing_tgz = tarfile.open(tgz_path)
    housing_tgz.extractall(path=housing_path)
    housing_tgz.close()

# Pipeline Build
from sklearn.preprocessing import Imputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelBinarizer

from sklearn.base import BaseEstimator, TransformerMixin

class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
        def fit(self, x, y=None):
            return self
        def transform(self, x):
            return x[self.attribute_names].values

from sklearn.base import BaseEstimator, TransformerMixin

rooms_ix, bedrooms_ix, population_ix, household_ix = 3, 4, 5, 6

class CombinedAttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_bedrooms_per_room=True):
        self.add_bedrooms_per_room = add_bedrooms_per_room
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        rooms_per_house = X[:, rooms_ix] / X[:, household_ix]
        pop_per_house = X[:, population_ix] / X[:, household_ix]
        if self.add_bedrooms_per_room:
            bedrooms_per_room = X[:, bedrooms_ix]/X[:, rooms_ix]
            return np.c_[X, rooms_per_house, pop_per_house,
                        bedrooms_per_room]
        else:
            return np.c_[X, rooms_per_house, pop_per_house]

num_attribs = list(house_num)
cat_attribs = ['ocean_proximity']

num_pipeline = Pipeline([
    ('selector', DataFrameSelector(num_attribs)),
    ('imputer', Imputer(strategy='median')),
    ('attribs_adder', CombinedAttributesAdder()),
    ('std_scaler', StandardScaler())
])

cat_pipeline = Pipeline([
    ('selector', DataFrameSelector(cat_attribs)),
    ('label_binarizer', LabelBinarizer())
])

from sklearn.compose import ColumnTransformer

full_pipeline = ColumnTransformer([
    ('num_pipeline', num_pipeline, num_attribs),
    ('cat_pipeline', cat_pipeline, cat_attribs),
])

Type Error encountered is as follows
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-50-925e65d2e69a> in <module>
----> 1 house_prep = full_pipeline.fit_transform(house)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in fit_transform(self, X, y)
    474         self._validate_remainder(X)
    475 
--> 476         result = self._fit_transform(X, y, _fit_transform_one)
    477 
    478         if not result:

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in _fit_transform(self, X, y, func, fitted)
    418                     message=self._log_message(name, idx, len(transformers)))
    419                 for idx, (name, trans, column, weight) in enumerate(
--> 420                         self._iter(fitted=fitted, replace_strings=True), 1))
    421         except ValueError as e:
    422             if "Expected 2D array, got 1D array instead" in str(e):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
    922                 self._iterating = self._original_iterator is not None
    923 
--> 924             while self.dispatch_one_batch(iterator):
    925                 pass
    926 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    714     with _print_elapsed_time(message_clsname, message):
    715         if hasattr(transformer, 'fit_transform'):
--> 716             res = transformer.fit_transform(X, y, **fit_params)
    717         else:
    718             res = transformer.fit(X, y, **fit_params).transform(X)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
    391                 return Xt
    392             if hasattr(last_step, 'fit_transform'):
--> 393                 return last_step.fit_transform(Xt, y, **fit_params)
    394             else:
    395                 return last_step.fit(Xt, y, **fit_params).transform(Xt)

TypeError: fit_transform() takes 2 positional arguments but 3 were given

TypeError，使用Sci-Kit进行ML学习回归

0 个答案: