在行pandas

时间:2017-11-24 10:18:43

标签: python-3.x pandas

我构建了一个函数,用于对由dummy设置的属性数据进行规范化。如果每行的一个值的数量> 1,我希望将值从0变为1,从1变为0。数字为零:

def dummy_data(data, columns):
    for column in columns:
        data = pd.concat([data, pd.get_dummies(data[column], prefix=column)], axis=1)
        data = data.drop(column, axis=1)
    n_zero = (data == 0).astype(int).sum(axis=1)
    n_uno = (data == 1).astype(int).sum(axis=1)
    for i in range(len(n_zero)):
        if n_uno[i] > n_zero[i]:
           #replace_values = {0: 1, 1: 0}   
           #data.iloc[i] = data.iloc[i].replace({data.iloc[i]: replace_values}) 
           data.iloc[i] = data.iloc[i].map({0 : 1})
           data.iloc[i] = data.iloc[i].map({1 : 0})
    return data


dummy_columns = ["ATTRIBUTE1",..."ATTRIBIUTE N"]
df=dummy_data(df, dummy_columns)

该功能不能取代我的零和一个值

2 个答案:

答案 0 :(得分:1)

我认为你需要:

def dummy_data(data, columns):
    #get_dummies with all columns together
    data =  pd.concat([data, pd.get_dummies(data[columns])], axis=1).drop(columns, axis=1)
    #convert to int not necessary
    n_zero = (data == 0).sum(axis=1)
    n_uno = (data == 1).sum(axis=1)
    #replace by condition without loop
    m = n_uno > n_zero
    data = data.mask(m, data.replace({0:1,1:0}))

    return data

样品:

df = pd.DataFrame({'A':list('abb'),
                   'B':list('bbb'),
                   'C':list('baa'),
                   'D':list('aaa')})

print (df)
   A  B  C  D
0  a  b  b  a
1  b  b  a  a
2  b  b  a  a
def dummy_data(data, columns):
    data =  pd.concat([data, pd.get_dummies(data[columns])], axis=1).drop(columns, axis=1)
    print (data)

   D  A_a  A_b  B_b  C_a  C_b
0  a    1    0    1    0    1
1  a    0    1    1    1    0
2  a    0    1    1    1    0

    n_zero = (data == 0).sum(axis=1)
    n_uno = (data == 1).sum(axis=1)
    m = n_uno > n_zero
    print (m)

0    True
1    True
2    True
dtype: bool

    data = data.mask(m, data.replace({0:1,1:0}))

    return data

dummy_columns = ['A','B', 'C']
df = dummy_data(df, dummy_columns)
print (df)

   D  A_a  A_b  B_b  C_a  C_b
0  a    0    1    0    1    0
1  a    1    0    0    0    1
2  a    1    0    0    0    1

答案 1 :(得分:0)

使用numpy logical_not:

快速查找和反转1和0的方法
def dummy_data(data_df, dummy_columns):

    static_df = data_df[list(set(data.columns) - set(dummy_columns))]
    df = pd.get_dummies(data_df[dummy_columns])
    vals = df.values
    ones_count = np.add.reduce(vals, axis=1)
    zeros_count = np.add.reduce(np.logical_not(vals), axis=1)
    idx = np.where(ones_count > zeros_count)[0]

    vals[idx, :] = np.logical_not(vals[idx, :])
    result_df = pd.concat([static_df, pd.DataFrame(vals, index=df.index, columns=df.columns)], axis=1)

    return result_df