快速矿工行最大值

时间:2017-05-15 19:20:18

标签: max row rapidminer

对不起,我是RapidMiner的新手,只做了基础教程。

我有一个像

这样的数据集
MatchID   Value1   Value2   Value3
1            5        1        2
1           4.5      1.5       2
...

并且想知道是否有可能获得每列的最高值(例如Value1)并使用它进行进一步计算(生成属性)。

谢谢。

3 个答案:

答案 0 :(得分:3)

有很多方法可以实现。以下是使用Aggregate运算符查找最大值的Join将其与原始值Generate Attributes连接以进行计算。

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.2.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.2.003" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
    <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="7.2.003" expanded="true" height="82" name="Aggregate" width="90" x="179" y="34">
    <parameter key="use_default_aggregation" value="true"/>
    <parameter key="default_aggregation_function" value="maximum"/>
    <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.2.003" expanded="true" height="82" name="Join" width="90" x="313" y="34">
    <parameter key="join_type" value="outer"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.2.003" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
    <list key="function_descriptions">
      <parameter key="deltaA1" value="[maximum(a1)]-a1"/>
      <parameter key="deltaA2" value="[maximum(a2)]-a2"/>
      <parameter key="deltaA3" value="[maximum(a3)]-a3"/>
      <parameter key="deltaA4" value="[maximum(a4)]-a4"/>
    </list>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Aggregate" from_port="original" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

答案 1 :(得分:0)

另一种方法是将Extract Macro运算符与statistics设置max一起使用。这将给定属性的最大值存储为宏值,然后可以使用该值,例如,在Generate Attributes

优点是您不需要修改原始数据集,也不必使用joinmultiply运算符。

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.5.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="7.5.000" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34">
        <parameter key="macro" value="maxA1"/>
        <parameter key="macro_type" value="statistics"/>
        <parameter key="statistics" value="max"/>
        <parameter key="attribute_name" value="a1"/>
        <list key="additional_macros"/>
        <description align="center" color="transparent" colored="false" width="126">extract maximum of attribute a1 and store it in a macro</description>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.5.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
        <list key="function_descriptions">
          <parameter key="DifferenceA1" value="parse(%{maxA1})-a1"/>
        </list>
        <description align="center" color="transparent" colored="false" width="126">calculate the difference of a1 from the maximum using the macro value</description>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

提示:由于宏值存储为文本,因此首先必须parse使用它们的数值。

答案 2 :(得分:0)

第三个选项是Sort示例集,只保留带有Filter Example Range运算符的最大值的示例。如果您对其他属性的值感兴趣,当某个属性最大时,这会派上用场。

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.5.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="sort" compatibility="7.5.000" expanded="true" height="82" name="Sort" width="90" x="179" y="34">
        <parameter key="attribute_name" value="a1"/>
        <parameter key="sorting_direction" value="decreasing"/>
        <description align="center" color="transparent" colored="false" width="126">sorting the example set on a1 decreasing</description>
      </operator>
      <operator activated="true" class="filter_example_range" compatibility="7.5.000" expanded="true" height="82" name="Filter Example Range" width="90" x="313" y="34">
        <parameter key="first_example" value="1"/>
        <parameter key="last_example" value="1"/>
        <description align="center" color="transparent" colored="false" width="126">only keeping the first example, which has the maximum for a1</description>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Sort" to_port="example set input"/>
      <connect from_op="Sort" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>