我想训练和预测一个人的性别。我有两个功能'name'和'randint',每个来自不同的Pandas专栏。我想 1)将它们组合成一个管道/功能组合。以及 2)将预测标签添加到原始pandas数据框中。虽然我收到了前一个目标的错误1):
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegressionCV
from sklearn.cross_validation import train_test_split
from sklearn.base import TransformerMixin
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import FeatureUnion
import numpy as np
clf = make_pipeline(CountVectorizer(), LogisticRegressionCV(cv=2))
data = {
'Bruce Lee': 'Male',
'Bruce Banner': 'Male',
'Peter Parker': 'Male',
'Peter Poker': 'Male',
'Peter Springsteen': 'Male',
'Bruce Willis': 'Male',
'Sarah McLaughlin': 'Female',
'Sarah Silverman': 'Female',
'Sarah Palin': 'Female',
'Sarah Hyland': 'Female',
'Bruce Li': 'Male',
'Bruce Milk': 'Male',
'Bruce Springsteen': 'Male',
'Bruce Willis': 'Male',
'Sally Juice': 'Female',
'Sarah Silverwoman': 'Female',
'Sarah Palin': 'Female',
'Sarah Hyland': 'Female',
'Bruce Paul': 'Male',
'Bruce Lame': 'Male',
'Bruce Springsteen': 'Male',
'Bruce Willis': 'Male',
'Sarah Willis': 'Female',
'Sarah Goldman': 'Female',
'Sarah Palin': 'Female',
'Sally Hyland': 'Female',
'Bruce McDonald': 'Male',
'Bruce Lane': 'Male',
'Peter Springsteen': 'Male',
'Bruce Willis': 'Male',
'Sarah McLaughlin': 'Female',
'Sarah Goldwoman': 'Female',
'Sarah Palin': 'Female',
'Sarah Hylie': 'Female'
}
df = pd.DataFrame.from_dict(data, orient='index').reset_index()
df.columns = ['name', 'gender']
df['randomInt'] = np.random.choice(range(1, 6), df.shape[0])
class ExtractNames(TransformerMixin):
def transform(self, X, *args):
return [{'first': name.split()[0],
'last': name.split()[-1]}
for name in X]
def fit(self, *args):
return self
class ExtractRandInt(TransformerMixin):
def transform(self, X2, *args):
return [{'randInt': num} for num in X2]
def fit(self, *args):
return self
trans = ExtractNames()
trans2 = ExtractRandInt()
Combined = FeatureUnion([trans, trans2])
clf = make_pipeline(Combined(), DictVectorizer(), LogisticRegressionCV())
df_train, df_test = train_test_split(df, train_size=0.5, random_state=68)
clf.fit(df_train['name'], df_train['randomInt'], df_train['gender'])
错误:
Traceback (most recent call last):
File "C:\Users\KubiK\Desktop\test5.py", line 74, in <module>
clf = make_pipeline(Combined(), DictVectorizer(), LogisticRegressionCV())
TypeError: 'FeatureUnion' object is not callable
答案 0 :(得分:0)
你不能在Combined对象上调用()你可以在Classes上调用它,因为它是它们的构造函数,但在Combined对象中你没有__call__
方法)所以该行必须是:
clf = make_pipeline(Combined, DictVectorizer(), LogisticRegressionCV())