我的意图是对categorical
列表中的功能进行1-热编码。我的代码的问题在于,它对以前的迭代中已编码的功能保持1热编码。如何防止这种情况发生?
import pandas as pd
import numpy as np
data = {
'apples': [3, 2, 0, np.nan, 2],
'oranges': [0, 7, 7, 2, 7],
'figs':[1, np.nan, 10, np.nan, 10],
'key-customer':['N','Y','Y','N','N'],
'rating':['L','L','H','L','M'],
'frequent-cust':['Y', 'N', 'N', 'N', 'Y']
}
purchases = pd.DataFrame(data)
purchases = pd.DataFrame(data, index=['June', 'Robert', 'Lily', 'David', 'Bob'])
print(purchases)
#one-hot-encode just the features in a given subset
categorical = ['rating', 'key-customer']
for item in categorical:
d = pd.get_dummies(purchases[item], prefix=item)
purchases = pd.concat([purchases, d], axis=1)
purchases.drop(columns=item, inplace=True)
print(purchases)
答案 0 :(得分:1)
我会加入并加入:
for item in categorical:
d = pd.get_dummies(purchases[item], prefix=item)
purchases = purchases.drop(item, axis=1).join(d)
print(purchases)
输出:
apples oranges figs frequent-cust rating_H rating_L rating_M \
June 3.0 0 1.0 Y 0 1 0
Robert 2.0 7 NaN N 0 1 0
Lily 0.0 7 10.0 N 1 0 0
David NaN 2 NaN N 0 1 0
Bob 2.0 7 10.0 Y 0 0 1
key-customer_N key-customer_Y
June 1 0
Robert 0 1
Lily 0 1
David 1 0
Bob 1 0