我有一个包含浮点数的 4 列数据框,每列 10 行。这些列代表一个人过去做过的第一份、第二份、第三份和第四份工作。这些行代表工作类别 1-10,所有工作都属于其中一个类别。该表说明了数据库中的人员将工作 1-10 作为第一份工作、第二份工作等的概率。
我已经将第一份工作的概率作为数据框中的第一行,以及第二份工作的条件概率,这取决于人们在第一份工作中从事的工作类别,例如有工作类别 2 的概率,前提是工作 1 类别是 3 等(参见第二个代码块)
在第三步中,我想根据一个人在第一个、第二个和第三个工作中所做的工作来确定工作类别 1-10 的概率。我在第三个代码块中手动执行此操作,但希望对所有 1000 个组合“自动”执行此操作。
有人可以帮我解决这个问题吗?如果问题的解释不直观,请与我们联系,很难用几行来解释。我很感谢任何提示!
在第二步中,我创建了一个新的数据框
prob_all_dict = {'prob_1': {1.0: 0.03409090909090909,
2.0: 0.022727272727272728,
3.0: 0.045454545454545456,
4.0: 0.5340909090909091,
5.0: 0.06818181818181818,
6.0: 0.011363636363636364,
7.0: 0.13636363636363635,
8.0: 0.06818181818181818,
9.0: 0.045454545454545456,
10.0: 0.03409090909090909},
'prob_2': {1.0: 0.045454545454545456,
2.0: 0.011363636363636364,
3.0: 0.03409090909090909,
4.0: 0.4659090909090909,
5.0: 0.11363636363636363,
6.0: 0.045454545454545456,
7.0: 0.1590909090909091,
8.0: 0.045454545454545456,
9.0: 0.03409090909090909,
10.0: 0.045454545454545456},
'prob_3': {1.0: 0.1111111111111111,
2.0: nan,
3.0: 0.06349206349206349,
4.0: 0.3968253968253968,
5.0: 0.07936507936507936,
6.0: nan,
7.0: 0.19047619047619047,
8.0: 0.1111111111111111,
9.0: nan,
10.0: 0.047619047619047616},
'prob_4': {1.0: nan,
2.0: nan,
3.0: 0.043478260869565216,
4.0: 0.391304347826087,
5.0: 0.13043478260869565,
6.0: nan,
7.0: 0.08695652173913043,
8.0: 0.2608695652173913,
9.0: nan,
10.0: 0.08695652173913043}}
prob_all = pd.DataFrame.from_dict(prob_all_dict)
out=[prob_all['prob_1']]+[prob_all['prob_2']*prob_all['prob_1'].iloc[x] for x in range(0,10)]
out=pd.concat(out,axis=1)
out=(out.join(pd.concat([prob_all['prob_3']*out.iloc[x,1] for x in range(0,10)],axis=1))
.join(pd.concat([prob_all['prob_3']*out.iloc[x,2] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,3] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,4] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,5] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,6] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,7] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,8] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,9] for x in range(0,10)],axis=1),rsuffix='x')
.join(pd.concat([prob_all['prob_3']*out.iloc[x,10] for x in range(0,10)],axis=1),rsuffix='x')
).values
out=pd.DataFrame(out).T
out.iloc[11,0]*prob_all['prob_4'][1]
out.iloc[11,0]*prob_all['prob_4'][2]
out.iloc[11,0]*prob_all['prob_4'][3]
out.iloc[11,0]*prob_all['prob_4'][4]
out.iloc[11,0]*prob_all['prob_4'][5]
out.iloc[11,0]*prob_all['prob_4'][6]
out.iloc[11,0]*prob_all['prob_4'][7]
out.iloc[11,0]*prob_all['prob_4'][8]
out.iloc[11,0]*prob_all['prob_4'][9]
out.iloc[11,0]*prob_all['prob_4'][10]
out.iloc[11,1]*prob_all['prob_4'][1]
out.iloc[11,1]*prob_all['prob_4'][2]
out.iloc[11,1]*prob_all['prob_4'][3]
out.iloc[11,1]*prob_all['prob_4'][4]
out.iloc[11,1]*prob_all['prob_4'][5]
out.iloc[11,1]*prob_all['prob_4'][6]
out.iloc[11,1]*prob_all['prob_4'][7]
out.iloc[11,1]*prob_all['prob_4'][8]
out.iloc[11,1]*prob_all['prob_4'][9]
out.iloc[11,1]*prob_all['prob_4'][10]
out.iloc[11,2]*prob_all['prob_4'][1]
out.iloc[11,2]*prob_all['prob_4'][2]
out.iloc[11,2]*prob_all['prob_4'][3]
out.iloc[11,2]*prob_all['prob_4'][4]
out.iloc[11,2]*prob_all['prob_4'][5]
out.iloc[11,2]*prob_all['prob_4'][6]
out.iloc[11,2]*prob_all['prob_4'][7]
out.iloc[11,2]*prob_all['prob_4'][8]
out.iloc[11,2]*prob_all['prob_4'][9]
out.iloc[11,2]*prob_all['prob_4'][10]
out.iloc[11,3]*prob_all['prob_4'][1]
out.iloc[11,3]*prob_all['prob_4'][2]
out.iloc[11,3]*prob_all['prob_4'][3]
out.iloc[11,3]*prob_all['prob_4'][4]
out.iloc[11,3]*prob_all['prob_4'][5]
out.iloc[11,3]*prob_all['prob_4'][6]
out.iloc[11,3]*prob_all['prob_4'][7]
out.iloc[11,3]*prob_all['prob_4'][8]
out.iloc[11,3]*prob_all['prob_4'][9]
out.iloc[11,3]*prob_all['prob_4'][10]
out.iloc[11,4]*prob_all['prob_4'][1]
out.iloc[11,4]*prob_all['prob_4'][2]
out.iloc[11,4]*prob_all['prob_4'][3]
out.iloc[11,4]*prob_all['prob_4'][4]
out.iloc[11,4]*prob_all['prob_4'][5]
out.iloc[11,4]*prob_all['prob_4'][6]
out.iloc[11,4]*prob_all['prob_4'][7]
out.iloc[11,4]*prob_all['prob_4'][8]
out.iloc[11,4]*prob_all['prob_4'][9]
out.iloc[11,4]*prob_all['prob_4'][10]
out.iloc[11,5]*prob_all['prob_4'][1]
out.iloc[11,5]*prob_all['prob_4'][2]
out.iloc[11,5]*prob_all['prob_4'][3]
out.iloc[11,5]*prob_all['prob_4'][4]
out.iloc[11,5]*prob_all['prob_4'][5]
out.iloc[11,5]*prob_all['prob_4'][6]
out.iloc[11,5]*prob_all['prob_4'][7]
out.iloc[11,5]*prob_all['prob_4'][8]
out.iloc[11,5]*prob_all['prob_4'][9]
out.iloc[11,5]*prob_all['prob_4'][10]
out.iloc[11,6]*prob_all['prob_4'][1]
out.iloc[11,6]*prob_all['prob_4'][2]
out.iloc[11,6]*prob_all['prob_4'][3]
out.iloc[11,6]*prob_all['prob_4'][4]
out.iloc[11,6]*prob_all['prob_4'][5]
out.iloc[11,6]*prob_all['prob_4'][6]
out.iloc[11,6]*prob_all['prob_4'][7]
out.iloc[11,6]*prob_all['prob_4'][8]
out.iloc[11,6]*prob_all['prob_4'][9]
out.iloc[11,6]*prob_all['prob_4'][10]
out.iloc[11,7]*prob_all['prob_4'][1]
out.iloc[11,7]*prob_all['prob_4'][2]
out.iloc[11,7]*prob_all['prob_4'][3]
out.iloc[11,7]*prob_all['prob_4'][4]
out.iloc[11,7]*prob_all['prob_4'][5]
out.iloc[11,7]*prob_all['prob_4'][6]
out.iloc[11,7]*prob_all['prob_4'][7]
out.iloc[11,7]*prob_all['prob_4'][8]
out.iloc[11,7]*prob_all['prob_4'][9]
out.iloc[11,7]*prob_all['prob_4'][10]
out.iloc[11,8]*prob_all['prob_4'][1]
out.iloc[11,8]*prob_all['prob_4'][2]
out.iloc[11,8]*prob_all['prob_4'][3]
out.iloc[11,8]*prob_all['prob_4'][4]
out.iloc[11,8]*prob_all['prob_4'][5]
out.iloc[11,8]*prob_all['prob_4'][6]
out.iloc[11,8]*prob_all['prob_4'][7]
out.iloc[11,8]*prob_all['prob_4'][8]
out.iloc[11,8]*prob_all['prob_4'][9]
out.iloc[11,8]*prob_all['prob_4'][10]
out.iloc[11,9]*prob_all['prob_4'][1]
out.iloc[11,9]*prob_all['prob_4'][2]
out.iloc[11,9]*prob_all['prob_4'][3]
out.iloc[11,9]*prob_all['prob_4'][4]
out.iloc[11,9]*prob_all['prob_4'][5]
out.iloc[11,9]*prob_all['prob_4'][6]
out.iloc[11,9]*prob_all['prob_4'][7]
out.iloc[11,9]*prob_all['prob_4'][8]
out.iloc[11,9]*prob_all['prob_4'][9]
out.iloc[11,9]*prob_all['prob_4'][10]
out.iloc[12,0]*prob_all['prob_4'][1]
out.iloc[12,0]*prob_all['prob_4'][2]
out.iloc[12,0]*prob_all['prob_4'][3]
out.iloc[12,0]*prob_all['prob_4'][4]
out.iloc[12,0]*prob_all['prob_4'][5]
out.iloc[12,0]*prob_all['prob_4'][6]
out.iloc[12,0]*prob_all['prob_4'][7]
out.iloc[12,0]*prob_all['prob_4'][8]
out.iloc[12,0]*prob_all['prob_4'][9]
out.iloc[12,0]*prob_all['prob_4'][10]
out.iloc[12,1]*prob_all['prob_4'][1]
out.iloc[12,1]*prob_all['prob_4'][2]
out.iloc[12,1]*prob_all['prob_4'][3]
out.iloc[12,1]*prob_all['prob_4'][4]
out.iloc[12,1]*prob_all['prob_4'][5]
out.iloc[12,1]*prob_all['prob_4'][6]
out.iloc[12,1]*prob_all['prob_4'][7]
out.iloc[12,1]*prob_all['prob_4'][8]
out.iloc[12,1]*prob_all['prob_4'][9]
out.iloc[12,1]*prob_all['prob_4'][10]