使用条件概率创建数据框并使用它来计算进一步的条件概率,python

时间:2021-07-05 15:23:15

标签: python conditional-statements

我有一个包含浮点数的 4 列数据框,每列 10 行。这些列代表一个人过去做过的第一份、第二份、第三份和第四份工作。这些行代表工作类别 1-10,所有工作都属于其中一个类别。该表说明了数据库中的人员将工作 1-10 作为第一份工作、第二份工作等的概率。

我已经将第一份工作的概率作为数据框中的第一行,以及第二份工作的条件概率,这取决于人们在第一份工作中从事的工作类别,例如有工作类别 2 的概率,前提是工作 1 类别是 3 等(参见第二个代码块)

在第三步中,我想根据一个人在第一个、第二个和第三个工作中所做的工作来确定工作类别 1-10 的概率。我在第三个代码块中手动执行此操作,但希望对所有 1000 个组合“自动”执行此操作。

有人可以帮我解决这个问题吗?如果问题的解释不直观,请与我们联系,很难用几行来解释。我很感谢任何提示!

在第二步中,我创建了一个新的数据框

prob_all_dict = {'prob_1': {1.0: 0.03409090909090909,
  2.0: 0.022727272727272728,
  3.0: 0.045454545454545456,
  4.0: 0.5340909090909091,
  5.0: 0.06818181818181818,
  6.0: 0.011363636363636364,
  7.0: 0.13636363636363635,
  8.0: 0.06818181818181818,
  9.0: 0.045454545454545456,
  10.0: 0.03409090909090909},
 'prob_2': {1.0: 0.045454545454545456,
  2.0: 0.011363636363636364,
  3.0: 0.03409090909090909,
  4.0: 0.4659090909090909,
  5.0: 0.11363636363636363,
  6.0: 0.045454545454545456,
  7.0: 0.1590909090909091,
  8.0: 0.045454545454545456,
  9.0: 0.03409090909090909,
  10.0: 0.045454545454545456},
 'prob_3': {1.0: 0.1111111111111111,
  2.0: nan,
  3.0: 0.06349206349206349,
  4.0: 0.3968253968253968,
  5.0: 0.07936507936507936,
  6.0: nan,
  7.0: 0.19047619047619047,
  8.0: 0.1111111111111111,
  9.0: nan,
  10.0: 0.047619047619047616},
 'prob_4': {1.0: nan,
  2.0: nan,
  3.0: 0.043478260869565216,
  4.0: 0.391304347826087,
  5.0: 0.13043478260869565,
  6.0: nan,
  7.0: 0.08695652173913043,
  8.0: 0.2608695652173913,
  9.0: nan,
  10.0: 0.08695652173913043}}
prob_all = pd.DataFrame.from_dict(prob_all_dict)
out=[prob_all['prob_1']]+[prob_all['prob_2']*prob_all['prob_1'].iloc[x] for x in range(0,10)]
out=pd.concat(out,axis=1)
out=(out.join(pd.concat([prob_all['prob_3']*out.iloc[x,1] for x in range(0,10)],axis=1))
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,2] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,3] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,4] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,5] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,6] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,7] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,8] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,9] for x in range(0,10)],axis=1),rsuffix='x')
        .join(pd.concat([prob_all['prob_3']*out.iloc[x,10] for x in range(0,10)],axis=1),rsuffix='x')
    ).values
out=pd.DataFrame(out).T
out.iloc[11,0]*prob_all['prob_4'][1]
out.iloc[11,0]*prob_all['prob_4'][2]
out.iloc[11,0]*prob_all['prob_4'][3]
out.iloc[11,0]*prob_all['prob_4'][4]
out.iloc[11,0]*prob_all['prob_4'][5]
out.iloc[11,0]*prob_all['prob_4'][6]
out.iloc[11,0]*prob_all['prob_4'][7]
out.iloc[11,0]*prob_all['prob_4'][8]
out.iloc[11,0]*prob_all['prob_4'][9]
out.iloc[11,0]*prob_all['prob_4'][10]

out.iloc[11,1]*prob_all['prob_4'][1]
out.iloc[11,1]*prob_all['prob_4'][2]
out.iloc[11,1]*prob_all['prob_4'][3]
out.iloc[11,1]*prob_all['prob_4'][4]
out.iloc[11,1]*prob_all['prob_4'][5]
out.iloc[11,1]*prob_all['prob_4'][6]
out.iloc[11,1]*prob_all['prob_4'][7]
out.iloc[11,1]*prob_all['prob_4'][8]
out.iloc[11,1]*prob_all['prob_4'][9]
out.iloc[11,1]*prob_all['prob_4'][10]

out.iloc[11,2]*prob_all['prob_4'][1]
out.iloc[11,2]*prob_all['prob_4'][2]
out.iloc[11,2]*prob_all['prob_4'][3]
out.iloc[11,2]*prob_all['prob_4'][4]
out.iloc[11,2]*prob_all['prob_4'][5]
out.iloc[11,2]*prob_all['prob_4'][6]
out.iloc[11,2]*prob_all['prob_4'][7]
out.iloc[11,2]*prob_all['prob_4'][8]
out.iloc[11,2]*prob_all['prob_4'][9]
out.iloc[11,2]*prob_all['prob_4'][10]

out.iloc[11,3]*prob_all['prob_4'][1]
out.iloc[11,3]*prob_all['prob_4'][2]
out.iloc[11,3]*prob_all['prob_4'][3]
out.iloc[11,3]*prob_all['prob_4'][4]
out.iloc[11,3]*prob_all['prob_4'][5]
out.iloc[11,3]*prob_all['prob_4'][6]
out.iloc[11,3]*prob_all['prob_4'][7]
out.iloc[11,3]*prob_all['prob_4'][8]
out.iloc[11,3]*prob_all['prob_4'][9]
out.iloc[11,3]*prob_all['prob_4'][10]

out.iloc[11,4]*prob_all['prob_4'][1]
out.iloc[11,4]*prob_all['prob_4'][2]
out.iloc[11,4]*prob_all['prob_4'][3]
out.iloc[11,4]*prob_all['prob_4'][4]
out.iloc[11,4]*prob_all['prob_4'][5]
out.iloc[11,4]*prob_all['prob_4'][6]
out.iloc[11,4]*prob_all['prob_4'][7]
out.iloc[11,4]*prob_all['prob_4'][8]
out.iloc[11,4]*prob_all['prob_4'][9]
out.iloc[11,4]*prob_all['prob_4'][10]

out.iloc[11,5]*prob_all['prob_4'][1]
out.iloc[11,5]*prob_all['prob_4'][2]
out.iloc[11,5]*prob_all['prob_4'][3]
out.iloc[11,5]*prob_all['prob_4'][4]
out.iloc[11,5]*prob_all['prob_4'][5]
out.iloc[11,5]*prob_all['prob_4'][6]
out.iloc[11,5]*prob_all['prob_4'][7]
out.iloc[11,5]*prob_all['prob_4'][8]
out.iloc[11,5]*prob_all['prob_4'][9]
out.iloc[11,5]*prob_all['prob_4'][10]

out.iloc[11,6]*prob_all['prob_4'][1]
out.iloc[11,6]*prob_all['prob_4'][2]
out.iloc[11,6]*prob_all['prob_4'][3]
out.iloc[11,6]*prob_all['prob_4'][4]
out.iloc[11,6]*prob_all['prob_4'][5]
out.iloc[11,6]*prob_all['prob_4'][6]
out.iloc[11,6]*prob_all['prob_4'][7]
out.iloc[11,6]*prob_all['prob_4'][8]
out.iloc[11,6]*prob_all['prob_4'][9]
out.iloc[11,6]*prob_all['prob_4'][10]

out.iloc[11,7]*prob_all['prob_4'][1]
out.iloc[11,7]*prob_all['prob_4'][2]
out.iloc[11,7]*prob_all['prob_4'][3]
out.iloc[11,7]*prob_all['prob_4'][4]
out.iloc[11,7]*prob_all['prob_4'][5]
out.iloc[11,7]*prob_all['prob_4'][6]
out.iloc[11,7]*prob_all['prob_4'][7]
out.iloc[11,7]*prob_all['prob_4'][8]
out.iloc[11,7]*prob_all['prob_4'][9]
out.iloc[11,7]*prob_all['prob_4'][10]

out.iloc[11,8]*prob_all['prob_4'][1]
out.iloc[11,8]*prob_all['prob_4'][2]
out.iloc[11,8]*prob_all['prob_4'][3]
out.iloc[11,8]*prob_all['prob_4'][4]
out.iloc[11,8]*prob_all['prob_4'][5]
out.iloc[11,8]*prob_all['prob_4'][6]
out.iloc[11,8]*prob_all['prob_4'][7]
out.iloc[11,8]*prob_all['prob_4'][8]
out.iloc[11,8]*prob_all['prob_4'][9]
out.iloc[11,8]*prob_all['prob_4'][10]

out.iloc[11,9]*prob_all['prob_4'][1]
out.iloc[11,9]*prob_all['prob_4'][2]
out.iloc[11,9]*prob_all['prob_4'][3]
out.iloc[11,9]*prob_all['prob_4'][4]
out.iloc[11,9]*prob_all['prob_4'][5]
out.iloc[11,9]*prob_all['prob_4'][6]
out.iloc[11,9]*prob_all['prob_4'][7]
out.iloc[11,9]*prob_all['prob_4'][8]
out.iloc[11,9]*prob_all['prob_4'][9]
out.iloc[11,9]*prob_all['prob_4'][10]

out.iloc[12,0]*prob_all['prob_4'][1]
out.iloc[12,0]*prob_all['prob_4'][2]
out.iloc[12,0]*prob_all['prob_4'][3]
out.iloc[12,0]*prob_all['prob_4'][4]
out.iloc[12,0]*prob_all['prob_4'][5]
out.iloc[12,0]*prob_all['prob_4'][6]
out.iloc[12,0]*prob_all['prob_4'][7]
out.iloc[12,0]*prob_all['prob_4'][8]
out.iloc[12,0]*prob_all['prob_4'][9]
out.iloc[12,0]*prob_all['prob_4'][10]

out.iloc[12,1]*prob_all['prob_4'][1]
out.iloc[12,1]*prob_all['prob_4'][2]
out.iloc[12,1]*prob_all['prob_4'][3]
out.iloc[12,1]*prob_all['prob_4'][4]
out.iloc[12,1]*prob_all['prob_4'][5]
out.iloc[12,1]*prob_all['prob_4'][6]
out.iloc[12,1]*prob_all['prob_4'][7]
out.iloc[12,1]*prob_all['prob_4'][8]
out.iloc[12,1]*prob_all['prob_4'][9]
out.iloc[12,1]*prob_all['prob_4'][10]

0 个答案:

没有答案