我的数据框有超过200列虚拟变量:
Row1 Feature1 Feature2 Feature3 Feature4 Feature5
A 0 1 1 1 0
B 0 0 1 1 1
C 1 0 1 0 1
D 0 1 0 1 0
我想进行迭代以分离每个特征以创建额外的3个数据帧,df1仅包含将第一个特征= 1保持为1并将所有后面的列更改为0而df2仅包含将第二个特征保持为= 1 1并将所有前一列和后一列更改为0.
我已经创建了代码来完成它,但我认为必须有更好的方法来实现它。请帮助我更有效地解决这个问题。谢谢!
以下是我的代码:
for index, row in hcit1.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
hcit1.loc[index,title2]=0
else:
pass
for index, row in hcit2.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
if row[title2]==1:
for k in range(j+1,261):
title3="feature"+str(k)
hcit1.loc[index,title3]=0
hcit1.loc[index,title]=0
else:
pass
for index, row in hcit3.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
if row[title2]==1:
for k in range(j+1,261):
title3="feature"+str(k)
if row[title3]==1:
for l in range(k+1,261):
title4="feature"+str(l)
hcit1.loc[index,title4]=0
hcit1.loc[index,title2]=0
hcit1.loc[index,title]=0
else:
pass
for index, row in hcit4.iterrows():
for i in range(1,261):
title="feature"+str(i)
if int(row[title])==1:
for j in range(i+1,261):
title2="feature"+str(j)
if row[title2]==1:
for k in range(j+1,261):
title3="feature"+str(k)
if row[title3]==1:
for l in range(k+1,261):
title4="feature"+str(l)
if row[title4]==1:
for m in range(l+1,261):
title5="feature"+str(m)
hcit1.loc[index,title5]=0
hcit1.loc[index,title3]=0
hcit1.loc[index,title2]=0
hcit1.loc[index,title]=0
else:
pass
答案 0 :(得分:0)
下面:
df1 = df[df['Feature1'] == 1]
df1.iloc[:, :] = 0
df1.loc[:, 'Feature1'] = 1
df2 = df[df['Feature2'] == 1]
df2.iloc[:, :] = 0
df2.loc[:, 'Feature2'] = 1
df3 = df[df['Feature2'] == 1]
df3.iloc[:, :] = 0
df3.loc[:, 'Feature3'] = 1
那应该是你要找的东西。