from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import numpy as np
X = [['male', 0, 3], ['male', 1, 0], ['female', 2, 1], ['female', 0, 2]]
# 字符串编码为整形
sex_enc = OrdinalEncoder(dtype = np.int)
# 独热编码
one_hot_enc = OneHotEncoder(sparse=False, handle_unknown='ignore', dtype=np.int)
# 对第0列的字符串做整形转换, 然后对所有列做one-hot
col_transformer = ColumnTransformer(transformers = [('sex_enc', sex_enc, [0]), ('one_hot_enc', one_hot_enc, [0])])
# 训练编码
col_transformer.fit(X)
X_trans = col_transformer.transform(X)
print(X_trans)
[[1 0 1] [1 0 1] [0 1 0] [0 1 0]]
feature0的值为male
和female
,为什么使用columntransformer一口气输出三元cols?