我想基于现有列b
a
假设在现有列a
中,我有以下值
3,3,5,5,7,7,9,9
在新列中,如果值为[0,25],(25,50],(50,75],(75,100]
百分位数,我的值为1,2,3,4
所以对于新列b
,我有值1,1,2,2,3,3,4,4
如何在rapidminer中生成这个新列?
答案 0 :(得分:0)
Normalize
和Discretize by User Specification
运算符的组合是一种方法。
Normalize
运算符将属性的值缩放为0到1之间,Discretize
运算符确定值的百分位数。
以下是您可以使用基础的示例流程。
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.5.000" expanded="true" height="82" name="Subprocess" width="90" x="179" y="85">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="a" value="3"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="a" value="5"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238">
<list key="attribute_values">
<parameter key="a" value="7"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="340">
<list key="attribute_values">
<parameter key="a" value="9"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.5.000" expanded="true" height="145" name="Append" width="90" x="313" y="34"/>
<operator activated="true" class="multiply" compatibility="7.5.000" expanded="true" height="103" name="Multiply" width="90" x="447" y="34"/>
<operator activated="true" class="append" compatibility="7.5.000" expanded="true" height="103" name="Append (2)" width="90" x="648" y="34"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Append" from_port="merged set" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Append (2)" to_port="example set 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Append (2)" to_port="example set 2"/>
<connect from_op="Append (2)" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="normalize" compatibility="7.5.000" expanded="true" height="103" name="Normalize" width="90" x="380" y="85">
<parameter key="attribute" value="a"/>
<parameter key="method" value="range transformation"/>
</operator>
<operator activated="true" class="discretize_by_user_specification" compatibility="7.5.000" expanded="true" height="103" name="Discretize" width="90" x="581" y="85">
<parameter key="attribute" value="a"/>
<list key="classes">
<parameter key="1" value="0.25"/>
<parameter key="2" value="0.5"/>
<parameter key="3" value="0.75"/>
<parameter key="4" value="1.0"/>
</list>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Discretize" to_port="example set input"/>
<connect from_op="Discretize" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>