Rapidminer从分割属性生成新示例

时间:2016-06-02 06:42:42

标签: rapidminer

我有一些数据,其中一个属性包含信息,必须分成新的示例(行)。

该属性被称为:'Group',其值如下:'A + B + C'

我设法拆分该属性,现在我有3个属性:

'Group_1':'A','Group_2':'B','Group_3':'C'

这个群体有另一个属性,例如'价值':'3'。 现在我希望我的示例集包含具有属性Group和Value的示例,而不是具有合并示例。

我的表格如下:

Example Set

我可以手动将我的过程相乘并为每个组选择一个子集,然后将其命名为Group并将它们合并在一起,但这是一个静态解决方案,并且Group_x的数量不固定。

有没有办法可以根据group_x属性的数量自动执行该过程? (使用宏操作符我只能计算所有属性)

ExampleSet应如下所示:

enter image description here

1 个答案:

答案 0 :(得分:0)

Pivot运算符会执行此操作。这是一个示例 - Subprocess是创建与您的数据匹配的数据。您需要的唯一操作符是Pivot来完成工作,Select Attributes来选择最终结果的属性。

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="7.0.000" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="187">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="179" y="85">
            <list key="attribute_values">
              <parameter key="Value" value="3"/>
              <parameter key="Group_1" value="&quot;A&quot;"/>
              <parameter key="Group_2" value="&quot;B&quot;"/>
              <parameter key="Group_3" value="&quot;C&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="187">
            <list key="attribute_values">
              <parameter key="Value" value="13"/>
              <parameter key="Group_1" value="&quot;B&quot;"/>
              <parameter key="Group_2" value="&quot;C&quot;"/>
              <parameter key="Group_3" value="&quot;D&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="289">
            <list key="attribute_values">
              <parameter key="Value" value="11"/>
              <parameter key="Group_1" value="&quot;D&quot;"/>
              <parameter key="Group_2" value="&quot;A&quot;"/>
              <parameter key="Group_3" value="&quot;B&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="179" y="391">
            <list key="attribute_values">
              <parameter key="Value" value="32"/>
              <parameter key="Group_1" value="&quot;C&quot;"/>
              <parameter key="Group_2" value="&quot;Missing&quot;"/>
              <parameter key="Group_3" value="&quot;Missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="179" y="493">
            <list key="attribute_values">
              <parameter key="Value" value="5"/>
              <parameter key="Group_1" value="&quot;A&quot;"/>
              <parameter key="Group_2" value="&quot;C&quot;"/>
              <parameter key="Group_3" value="&quot;Missing&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="166" name="Append" width="90" x="514" y="85"/>
          <operator activated="true" class="declare_missing_value" compatibility="7.0.000" expanded="true" height="82" name="Declare Missing Value" width="90" x="648" y="85">
            <parameter key="mode" value="nominal"/>
            <parameter key="nominal_value" value="Missing"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="7.0.000" expanded="true" height="82" name="Generate ID" width="90" x="782" y="85"/>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="de_pivot" compatibility="7.0.000" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="187">
        <list key="attribute_name">
          <parameter key="Group" value="Group.*"/>
        </list>
        <parameter key="index_attribute" value="Value1"/>
        <parameter key="create_nominal_index" value="true"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="187">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Group|Value|id"/>
      </operator>
      <connect from_op="Subprocess (2)" from_port="out 1" to_op="De-Pivot (2)" to_port="example set input"/>
      <connect from_op="De-Pivot (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

希望有所帮助。