从pmml生成输出概率

时间:2016-11-28 12:34:43

标签: machine-learning logistic-regression pmml

我有一个pmml如下。我需要在通过pmml阅读器预测时获得输出概率值。但是这个pmml只生成BantStatus字段的值。

如何从中获取probability_1或probability_0的值作为输出?

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
    <Header>
        <Application name="JPMML-SkLearn" version="1.1.4"/>
        <Timestamp>2016-11-28T12:04:02Z</Timestamp>
    </Header>
    <DataDictionary>
        <DataField name="TitleCat" optype="continuous" dataType="double"/>
        <DataField name="RLMaxTitle" optype="continuous" dataType="double"/>
        <DataField name="Act1_rate" optype="continuous" dataType="double"/>
        <DataField name="Act2_rate" optype="continuous" dataType="double"/>
        <DataField name="Act3_rate" optype="continuous" dataType="double"/>
        <DataField name="Act4_rate" optype="continuous" dataType="double"/>
        <DataField name="Act5_rate" optype="continuous" dataType="double"/>
        <DataField name="Act6_rate" optype="continuous" dataType="double"/>
        <DataField name="AccntAct_rate" optype="continuous" dataType="double"/>
        <DataField name="sqlState" optype="continuous" dataType="double"/>
        <DataField name="BantStatus" optype="categorical" dataType="integer">
            <Value value="0"/>
            <Value value="1"/>
        </DataField>
    </DataDictionary>
    <TransformationDictionary>
        <DefineFunction name="logit" optype="continuous" dataType="double">
            <ParameterField name="value" optype="continuous" dataType="double"/>
            <Apply function="/">
                <Constant dataType="double">1</Constant>
                <Apply function="+">
                    <Constant dataType="double">1</Constant>
                    <Apply function="exp">
                        <Apply function="*">
                            <Constant dataType="double">-1</Constant>
                            <FieldRef field="value"/>
                        </Apply>
                    </Apply>
                </Apply>
            </Apply>
        </DefineFunction>
    </TransformationDictionary>
    <MiningModel functionName="classification">
        <MiningSchema>
            <MiningField name="BantStatus" usageType="target"/>
            <MiningField name="TitleCat"/>
            <MiningField name="RLMaxTitle"/>
            <MiningField name="Act1_rate"/>
            <MiningField name="Act2_rate"/>
            <MiningField name="Act3_rate"/>
            <MiningField name="Act4_rate"/>
            <MiningField name="Act5_rate"/>
            <MiningField name="Act6_rate"/>
            <MiningField name="AccntAct_rate"/>
            <MiningField name="sqlState"/>
        </MiningSchema>
        <Segmentation multipleModelMethod="modelChain">
            <Segment id="1">
                <True/>
                <RegressionModel functionName="regression">
                    <MiningSchema>
                        <MiningField name="TitleCat"/>
                        <MiningField name="RLMaxTitle"/>
                        <MiningField name="Act1_rate"/>
                        <MiningField name="Act2_rate"/>
                        <MiningField name="Act3_rate"/>
                        <MiningField name="Act4_rate"/>
                        <MiningField name="Act5_rate"/>
                        <MiningField name="Act6_rate"/>
                        <MiningField name="AccntAct_rate"/>
                        <MiningField name="sqlState"/>
                    </MiningSchema>
                    <Output>
                        <OutputField name="decisionFunction_1" optype="continuous" dataType="double" feature="predictedValue" isFinalResult="false"/>
                    </Output>
                    <RegressionTable intercept="-2.086708061828022">
                        <NumericPredictor name="TitleCat" coefficient="-0.08830152892846507"/>
                        <NumericPredictor name="RLMaxTitle" coefficient="0.1621566064638807"/>
                        <NumericPredictor name="Act1_rate" coefficient="-0.018238598197299193"/>
                        <NumericPredictor name="Act2_rate" coefficient="-0.016441453725557"/>
                        <NumericPredictor name="Act3_rate" coefficient="-0.045520608577430045"/>
                        <NumericPredictor name="Act4_rate" coefficient="0.33260315589120076"/>
                        <NumericPredictor name="Act5_rate" coefficient="-0.22925972334047728"/>
                        <NumericPredictor name="Act6_rate" coefficient="-0.03337501878673795"/>
                        <NumericPredictor name="AccntAct_rate" coefficient="0.013093373976464637"/>
                        <NumericPredictor name="sqlState" coefficient="1.0430128378571444"/>
                    </RegressionTable>
                </RegressionModel>
            </Segment>
            <Segment id="2">
                <True/>
                <RegressionModel functionName="classification" normalizationMethod="softmax">
                    <MiningSchema>
                        <MiningField name="BantStatus" usageType="target"/>
                        <MiningField name="decisionFunction_1"/>
                    </MiningSchema>
                    <Output>
                        <OutputField name="probability_0" optype="continuous" dataType="double" feature="probability" value="0"/>
                        <OutputField name="probability_1" optype="continuous" dataType="double" feature="probability" value="1"/>
                    </Output>
                    <RegressionTable intercept="0.0" targetCategory="0">
                        <NumericPredictor name="decisionFunction_1" coefficient="-1.0"/>
                    </RegressionTable>
                    <RegressionTable intercept="0.0" targetCategory="1"/>
                </RegressionModel>
            </Segment>
        </Segmentation>
    </MiningModel>
</PMML> 

1 个答案:

答案 0 :(得分:0)

此PMML是有效的标准模型链模型,最后一段的输出将是最终结果,这意味着两个输出字段probability_0probability_1的值将由兼容PMML阅读器。 以PyPMML为例:

from pypmml import Model

model = Model.fromString('''<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
    <Header>
        <Application name="JPMML-SkLearn" version="1.1.4"/>
        <Timestamp>2016-11-28T12:04:02Z</Timestamp>
    </Header>
    <DataDictionary>
        <DataField name="TitleCat" optype="continuous" dataType="double"/>
        <DataField name="RLMaxTitle" optype="continuous" dataType="double"/>
        <DataField name="Act1_rate" optype="continuous" dataType="double"/>
        <DataField name="Act2_rate" optype="continuous" dataType="double"/>
        <DataField name="Act3_rate" optype="continuous" dataType="double"/>
        <DataField name="Act4_rate" optype="continuous" dataType="double"/>
        <DataField name="Act5_rate" optype="continuous" dataType="double"/>
        <DataField name="Act6_rate" optype="continuous" dataType="double"/>
        <DataField name="AccntAct_rate" optype="continuous" dataType="double"/>
        <DataField name="sqlState" optype="continuous" dataType="double"/>
        <DataField name="BantStatus" optype="categorical" dataType="integer">
            <Value value="0"/>
            <Value value="1"/>
        </DataField>
    </DataDictionary>
    <TransformationDictionary>
        <DefineFunction name="logit" optype="continuous" dataType="double">
            <ParameterField name="value" optype="continuous" dataType="double"/>
            <Apply function="/">
                <Constant dataType="double">1</Constant>
                <Apply function="+">
                    <Constant dataType="double">1</Constant>
                    <Apply function="exp">
                        <Apply function="*">
                            <Constant dataType="double">-1</Constant>
                            <FieldRef field="value"/>
                        </Apply>
                    </Apply>
                </Apply>
            </Apply>
        </DefineFunction>
    </TransformationDictionary>
    <MiningModel functionName="classification">
        <MiningSchema>
            <MiningField name="BantStatus" usageType="target"/>
            <MiningField name="TitleCat"/>
            <MiningField name="RLMaxTitle"/>
            <MiningField name="Act1_rate"/>
            <MiningField name="Act2_rate"/>
            <MiningField name="Act3_rate"/>
            <MiningField name="Act4_rate"/>
            <MiningField name="Act5_rate"/>
            <MiningField name="Act6_rate"/>
            <MiningField name="AccntAct_rate"/>
            <MiningField name="sqlState"/>
        </MiningSchema>
        <Segmentation multipleModelMethod="modelChain">
            <Segment id="1">
                <True/>
                <RegressionModel functionName="regression">
                    <MiningSchema>
                        <MiningField name="TitleCat"/>
                        <MiningField name="RLMaxTitle"/>
                        <MiningField name="Act1_rate"/>
                        <MiningField name="Act2_rate"/>
                        <MiningField name="Act3_rate"/>
                        <MiningField name="Act4_rate"/>
                        <MiningField name="Act5_rate"/>
                        <MiningField name="Act6_rate"/>
                        <MiningField name="AccntAct_rate"/>
                        <MiningField name="sqlState"/>
                    </MiningSchema>
                    <Output>
                        <OutputField name="decisionFunction_1" optype="continuous" dataType="double" feature="predictedValue" isFinalResult="false"/>
                    </Output>
                    <RegressionTable intercept="-2.086708061828022">
                        <NumericPredictor name="TitleCat" coefficient="-0.08830152892846507"/>
                        <NumericPredictor name="RLMaxTitle" coefficient="0.1621566064638807"/>
                        <NumericPredictor name="Act1_rate" coefficient="-0.018238598197299193"/>
                        <NumericPredictor name="Act2_rate" coefficient="-0.016441453725557"/>
                        <NumericPredictor name="Act3_rate" coefficient="-0.045520608577430045"/>
                        <NumericPredictor name="Act4_rate" coefficient="0.33260315589120076"/>
                        <NumericPredictor name="Act5_rate" coefficient="-0.22925972334047728"/>
                        <NumericPredictor name="Act6_rate" coefficient="-0.03337501878673795"/>
                        <NumericPredictor name="AccntAct_rate" coefficient="0.013093373976464637"/>
                        <NumericPredictor name="sqlState" coefficient="1.0430128378571444"/>
                    </RegressionTable>
                </RegressionModel>
            </Segment>
            <Segment id="2">
                <True/>
                <RegressionModel functionName="classification" normalizationMethod="softmax">
                    <MiningSchema>
                        <MiningField name="BantStatus" usageType="target"/>
                        <MiningField name="decisionFunction_1"/>
                    </MiningSchema>
                    <Output>
                        <OutputField name="probability_0" optype="continuous" dataType="double" feature="probability" value="0"/>
                        <OutputField name="probability_1" optype="continuous" dataType="double" feature="probability" value="1"/>
                    </Output>
                    <RegressionTable intercept="0.0" targetCategory="0">
                        <NumericPredictor name="decisionFunction_1" coefficient="-1.0"/>
                    </RegressionTable>
                    <RegressionTable intercept="0.0" targetCategory="1"/>
                </RegressionModel>
            </Segment>
        </Segmentation>
    </MiningModel>
</PMML>''')

# all input fields take 1.0 for testing only.
result = model.predict({x: 1.0 for x in model.inputNames})

结果是同时包含probability_0probability_1的字典:

{'probability_0': 0.7245169429069838, 'probability_1': 0.2754830570930162}