基于现有列的新列

时间:2017-06-11 04:45:47

标签: rapidminer

我想基于现有列b

生成新列a

假设在现有列a中,我有以下值

3,3,5,5,7,7,9,9

在新列中,如果值为[0,25],(25,50],(50,75],(75,100]百分位数,我的值为1,2,3,4 所以对于新列b,我有值1,1,2,2,3,3,4,4

如何在rapidminer中生成这个新列?

1 个答案:

答案 0 :(得分:0)

NormalizeDiscretize by User Specification运算符的组合是一种方法。

Normalize运算符将属性的值缩放为0到1之间,Discretize运算符确定值的百分位数。

以下是您可以使用基础的示例流程。

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="7.5.000" expanded="true" height="82" name="Subprocess" width="90" x="179" y="85">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
            <list key="attribute_values">
              <parameter key="a" value="3"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
            <list key="attribute_values">
              <parameter key="a" value="5"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
            <list key="attribute_values">
              <parameter key="a" value="7"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="340">
            <list key="attribute_values">
              <parameter key="a" value="9"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="7.5.000" expanded="true" height="145" name="Append" width="90" x="313" y="34"/>
          <operator activated="true" class="multiply" compatibility="7.5.000" expanded="true" height="103" name="Multiply" width="90" x="447" y="34"/>
          <operator activated="true" class="append" compatibility="7.5.000" expanded="true" height="103" name="Append (2)" width="90" x="648" y="34"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Append" from_port="merged set" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Append (2)" to_port="example set 1"/>
          <connect from_op="Multiply" from_port="output 2" to_op="Append (2)" to_port="example set 2"/>
          <connect from_op="Append (2)" from_port="merged set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="normalize" compatibility="7.5.000" expanded="true" height="103" name="Normalize" width="90" x="380" y="85">
        <parameter key="attribute" value="a"/>
        <parameter key="method" value="range transformation"/>
      </operator>
      <operator activated="true" class="discretize_by_user_specification" compatibility="7.5.000" expanded="true" height="103" name="Discretize" width="90" x="581" y="85">
        <parameter key="attribute" value="a"/>
        <list key="classes">
          <parameter key="1" value="0.25"/>
          <parameter key="2" value="0.5"/>
          <parameter key="3" value="0.75"/>
          <parameter key="4" value="1.0"/>
        </list>
      </operator>
      <connect from_op="Subprocess" from_port="out 1" to_op="Normalize" to_port="example set input"/>
      <connect from_op="Normalize" from_port="example set output" to_op="Discretize" to_port="example set input"/>
      <connect from_op="Discretize" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>