我的数据帧只有0和127秒。如示例中所示,这127个群集在一起。
df = DataFrame({'f1' : [0,0,0,0,0,0],
'f2' : [0,0,0,0,0,0],
'f3' : [0,0,127,127,0,0],
'f4' : [0,127,127,127,0,0],
'f5' : [0,127,127,127,127,0],
'f6' : [0,127,127,127,127,0],
'f7' : [0,0,127,127,127,0],
'f8' : [0,0,127,127,0,0],
'f9' : [0,0,127,0,0,0],
'f10' : [0,0,0,0,0,0]
})
f1 f2 f3 f4 f5 f6 f7 f8 f9 f10
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 127 127 127 0 0 0 0
2 0 0 127 127 127 127 127 127 127 0
3 0 0 127 127 127 127 127 127 0 0
4 0 0 0 0 127 127 127 0 0 0
5 0 0 0 0 0 0 0 0 0 0
给出一个数字列表num_of_cells_to_del
,我想随机清除特定列randomly from top or bottom
中的许多单元格。
num_of_cells_to_del = [0,0,0,1,1,2,2,1,0,0]
结果:
f1 f2 f3 f4 f5 f6 f7 f8 f9 f10
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 127 0 0 0 0 0
2 0 0 127 127 127 0 0 0 127 0
3 0 0 127 127 127 127 127 127 0 0
4 0 0 0 0 0 127 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0
答案 0 :(得分:0)
不完全了解您的示例 您要从上至下或从行向左放置0。 如果是第一个,那么您的结果是不正确的 如果是秒,则num_of_cells_to_del
中的值不足在任何情况下,以下代码均适用于这两个代码:
import pandas as pd
df = pd.DataFrame({'f1' : [0,0,0,0,0,0],
'f2' : [0,0,0,0,0,0],
'f3' : [0,0,127,127,0,0],
'f4' : [0,127,127,127,0,0],
'f5' : [0,127,127,127,127,0],
'f6' : [0,127,127,127,127,0],
'f7' : [0,0,127,127,127,0],
'f8' : [0,0,127,127,0,0],
'f9' : [0,0,127,0,0,0],
'f10' : [0,0,0,0,0,0]
})
print(df)
f1 f2 f3 f4 f5 f6 f7 f8 f9 f10
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 127 127 127 0 0 0 0
2 0 0 127 127 127 127 127 127 127 0
3 0 0 127 127 127 127 127 127 0 0
4 0 0 0 0 127 127 127 0 0 0
5 0 0 0 0 0 0 0 0 0 0
num_of_cells_to_del = [0,1,1,2,2,0]
for i, r in enumerate(df.iterrows()):
if i<len(num_of_cells_to_del):
df.iloc[0:num_of_cells_to_del[i],i]=0
print(df)
f1 f2 f3 f4 f5 f6 f7 f8 f9 f10
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 127 0 0 0 0
2 0 0 127 127 127 127 127 127 127 0
3 0 0 127 127 127 127 127 127 0 0
4 0 0 0 0 127 127 127 0 0 0
5 0 0 0 0 0 0 0 0 0 0
for i, c in enumerate(df.keys()):
if i<len(num_of_cells_to_del):
df.loc[0:num_of_cells_to_del[i],c]=0
print(df)
f1 f2 f3 f4 f5 f6 f7 f8 f9 f10
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 127 0 0 0 0
2 0 0 127 0 0 127 127 127 127 0
3 0 0 127 127 127 127 127 127 0 0
4 0 0 0 0 127 127 127 0 0 0
5 0 0 0 0 0 0 0 0 0 0
for i, c in enumerate(df.keys()):
if i<len(num_of_cells_to_del):
if np.random.rand()>0.5:
df.loc[0:num_of_cells_to_del[i],c]=0
elif num_of_cells_to_del[i]>0:
df.loc[-num_of_cells_to_del[i]:,c]=0
print(df)
f1 f2 f3 f4 f5 f6 f7 f8 f9 f10
0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0
2 0 0 127 127 127 0 0 127 127 0
3 0 0 127 127 127 127 127 127 0 0
4 0 0 0 0 127 127 127 0 0 0
5 0 0 0 0 0 0 0 0 0 0
答案 1 :(得分:0)
我的解决方案
for col, cells in zip(df.columns, num_of_cells_to_del):
col_vals = df[col].values
non_zero = np.where(col_vals == 127)[0] # find which indices have 127
if len(non_zero) < cells: # can't delete more that what's present!
raise Exception('Not enough 127 in the column!')
if len(non_zero) == 0:
continue
replace_indices = np.random.choice(non_zero, size=cells, replace=False) # choose random indices to delete
col_vals[replace_indices] = 0
df[col] = col_vals