我有以下数据框:
c1 e c2 d s scr_s scr_c1 scr_d scr_c2 scr_e r1 r2
12 6 13 3 2 11 22 19 5 5 19 scr_s scr_d
13 11 2 6 13 3 9 24 15 9 13 scr_c1 scr_s
我正在尝试获取评估列 scr_s,scr_c1,scr_d,scr_c2,scr_e的最大值和最小值的列的名称。因此,具有最高值的列名称将存储在 r1 和具有最低值的列名称将存储在 r2
中要计算r1和r2,我使用:
data['r1'] = data[['scr_s','scr_c1','scr_d','scr_c2','scr_e']].idxmax(axis=1)
data['r2'] = data[['scr_s','scr_c1','scr_d','scr_c2','scr_e']].idxmin(axis=1)
问题:当两列具有最大或最小值时,我必须选择具有 max 的列名称此列之一'c1','e','c2','d','s'
在这种情况下,分析列r2时,结果应该是不同的:
我该如何解决这个问题?
预期输出:
c1 e c2 d s scr_s scr_c1 scr_d scr_c2 scr_e r1 r2
12 6 13 3 2 11 22 19 5 5 19 scr_s *scr_c2*
13 11 2 6 13 3 9 24 15 9 13 scr_c1 *scr_c2*
答案 0 :(得分:3)
base = ['c1', 'e', 'c2', 'd', 's']
extd = [f'scr_{c}' for c in base]
base_tups = [*zip(*map(df.get, base))]
extd_tups = [*zip(*map(df.get, extd))]
zvals = [[*zip(*t)] for t in zip(extd_tups, base_tups)]
看起来像这样
[[(19, 6), (19, 13), (5, 3), (5, 2), (22, 11)],
[(24, 11), (13, 2), (9, 6), (15, 13), (9, 3)]]
idxmax
/ idxmin
类型的东西设置r = range(len(base))
rvals = {}
rvals['r1'] = [extd[max(r, key=lambda i: x[i])] for x in zvals]
# flip sign on second element to get what OP wants v
rvals['r2'] = [extd[min(r, key=lambda i: (x[i][0], -x[i][1]))] for x in zvals]
df.assign(**rvals)
c1 e c2 d s scr_s scr_c1 scr_d scr_c2 scr_e r1 r2
12 6 13 3 2 11 22 19 5 5 19 scr_s scr_c2
13 11 2 6 13 3 9 24 15 9 13 scr_c1 scr_c2
答案 1 :(得分:1)
您可以这样开始:
r1
或r2
的字符串def fmax(r, ind):
l=[]
for i in ind:
l.append(r[i])
m = max(l)
nl=[i for i, j in enumerate(l) if j == m]
if(len(nl)==2):
e1=ind[nl[0]]
e2=ind[nl[1]]
e1=e1.replace('scr_','') # element 1
e2=e2.replace('scr_','') # element 2
if(r[e1]> r[e2]):
return 'scr_'+e1
else:
return 'scr_'+e2
else:
return ind[l.index(max(l))] #returns single max
def fmin(r, ind):
l=[]
for i in ind:
l.append(r[i])
m = min(l)
nl=[i for i, j in enumerate(l) if j == m]
if(len(nl)==2):
e1=ind[nl[0]]
e2=ind[nl[1]]
e1=e1.replace('scr_','') # element 1
e2=e2.replace('scr_','') # element 2
if(r[e1]> r[e2]):
return 'scr_'+e1
else:
return 'scr_'+e2
else:
return ind[l.index(min(l))] #retuns single min
df['r1']= df.apply(lambda r: fmax(r, ['scr_s','scr_c1','scr_d','scr_c2','scr_e']), axis=1)
df['r2']= df.apply(lambda r: fmin(r, ['scr_s','scr_c1','scr_d','scr_c2','scr_e']), axis=1)
print(df)
c1 e c2 d s scr_s scr_c1 scr_d scr_c2 scr_e r1 r2
12 6 13 3 2 11 22 19 5 5 19 . .
13 11 2 6 13 3 9 24 15 9 13 . .
c1 e c2 d s scr_s scr_c1 scr_d scr_c2 scr_e r1 r2
12 6 13 3 2 11 22 19 5 5 19 scr_s scr_c2
13 11 2 6 13 3 9 24 15 9 13 scr_c1 scr_c2