在RapidMiner

时间:2016-06-11 08:37:48

标签: rapidminer

我有一个列表,其中属性的总量最初是未知的。

我想隐瞒所有属性对,而不知道有多少属性。

Loop Attribute Subsets运算符,但遗憾的是没有输出。

目前我的流程如下:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.1.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="7.1.001" expanded="true" height="82" name="Generate Data" width="90" x="45" y="75">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;A&quot;"/>
              <parameter key="Group_2" value="&quot;B&quot;"/>
              <parameter key="Group_3" value="&quot;C&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="180" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;B&quot;"/>
              <parameter key="Group_2" value="&quot;C&quot;"/>
              <parameter key="Group_3" value="&quot;D&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="315" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;D&quot;"/>
              <parameter key="Group_2" value="&quot;A&quot;"/>
              <parameter key="Group_3" value="&quot;B&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="450" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;A&quot;"/>
              <parameter key="Group_2" value="&quot;C&quot;"/>
              <parameter key="Group_3" value="&quot;M&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (5)" width="90" x="585" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;C&quot;"/>
              <parameter key="Group_2" value="&quot;M&quot;"/>
              <parameter key="Group_3" value="&quot;M&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="148" name="Append" width="90" x="720" y="30"/>
          <operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="855" y="30">
            <parameter key="mode" value="nominal"/>
            <parameter key="nominal_value" value="M"/>
          </operator>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="7.1.001" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="75">
        <parameter key="macro" value="num_attr"/>
        <parameter key="macro_type" value="number_of_attributes"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (2)" width="90" x="313" y="75"/>
      <operator activated="true" class="multiply" compatibility="7.1.001" expanded="true" height="124" name="Multiply (2)" width="90" x="179" y="300"/>
      <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="210">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="|att1|att2"/>
      </operator>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (5)" width="90" x="514" y="210"/>
      <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="380" y="300">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="att1||att3"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (4)" width="90" x="380" y="390">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="att2||att3"/>
      </operator>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (3)" width="90" x="514" y="390"/>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (4)" width="90" x="514" y="300"/>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="648" y="210">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (3)" width="90" x="648" y="300">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (4)" width="90" x="648" y="390">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (2)" width="90" x="782" y="390">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (3)" width="90" x="782" y="300">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (4)" width="90" x="782" y="210">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="124" name="Append (3)" width="90" x="916" y="255"/>
      <connect from_op="Generate Data" from_port="out 1" to_op="Extract Macro (2)" to_port="example set"/>
      <connect from_op="Extract Macro (2)" from_port="example set" to_op="Rename by Generic Names (2)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
      <connect from_op="Rename by Generic Names (2)" from_port="original" to_port="result 2"/>
      <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Multiply (2)" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Multiply (2)" from_port="output 3" to_op="Select Attributes (4)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Rename by Generic Names (5)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (5)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename by Generic Names (4)" to_port="example set input"/>
      <connect from_op="Select Attributes (4)" from_port="example set output" to_op="Rename by Generic Names (3)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (3)" from_port="example set output" to_op="Filter Examples (4)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (4)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/>
      <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Generate Concatenation (4)" to_port="example set input"/>
      <connect from_op="Filter Examples (3)" from_port="example set output" to_op="Generate Concatenation (3)" to_port="example set input"/>
      <connect from_op="Filter Examples (4)" from_port="example set output" to_op="Generate Concatenation (2)" to_port="example set input"/>
      <connect from_op="Generate Concatenation (2)" from_port="example set output" to_op="Append (3)" to_port="example set 3"/>
      <connect from_op="Generate Concatenation (3)" from_port="example set output" to_op="Append (3)" to_port="example set 2"/>
      <connect from_op="Generate Concatenation (4)" from_port="example set output" to_op="Append (3)" to_port="example set 1"/>
      <connect from_op="Append (3)" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

1 个答案:

答案 0 :(得分:1)

这是一个艰难的。 Loop Subsets运算符不会返回单个示例集,因为它将创建多个不同的示例集,每个示例集都包含由输入属性组合构成的属性。为了解决这个问题,RecallRemember运算符可用于存储运行总计。这个故事还没有结束,因为通常只需要一个示例集,所以这意味着需要一些极端的体操来重命名和加入。

简而言之,我已经附上了一个独立的示例流程,说明了所有这些。如果不适应您的数据,它就无法工作。

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
    <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="136"/>
      <operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
    <parameter key="exact_number_of_attributes" value="2"/>
    <parameter key="min_number_of_attributes" value="2"/>
    <parameter key="limit_max_number" value="true"/>
    <parameter key="max_number_of_attributes" value="2"/>
    <process expanded="true">
      <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="112" y="34">
        <list key="log">
          <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
        </list>
      </operator>
      <operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="112" y="238">
        <parameter key="log_name" value="Log"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="Subprocess" width="90" x="246" y="238">
        <process expanded="true">
          <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="136">
        <parameter key="macro" value="remember"/>
        <parameter key="macro_type" value="data_value"/>
        <parameter key="attribute_name" value="Attributes"/>
        <parameter key="example_index" value="1"/>
        <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="clear_log" compatibility="7.0.001" expanded="true" height="82" name="Clear Log" width="90" x="380" y="136">
        <parameter key="log_name" value="Log"/>
        <parameter key="delete_table" value="true"/>
          </operator>
          <connect from_port="in 1" to_port="out 1"/>
          <connect from_port="in 2" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Clear Log" to_port="through 1"/>
          <connect from_op="Clear Log" from_port="through 1" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="source_in 3" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="materialize_data" compatibility="7.0.001" expanded="true" height="82" name="Materialize Data" width="90" x="246" y="34"/>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.0.001" expanded="true" height="82" name="Rename by Generic Names" width="90" x="380" y="34"/>
      <operator activated="true" class="generate_concatenation" compatibility="7.0.001" expanded="true" height="82" name="Generate Concatenation" width="90" x="380" y="136">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="238">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="att2|att1"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="7.0.001" expanded="true" height="82" name="Rename" width="90" x="514" y="34">
        <parameter key="old_name" value="att1_att2"/>
        <parameter key="new_name" value="%{remember}"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="514" y="136">
        <process expanded="true">
          <operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="45" y="187">
        <parameter key="name" value="runningTotal"/>
        <parameter key="remove_from_store" value="false"/>
          </operator>
          <operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="179" y="34">
        <list key="key_attributes">
          <parameter key="Play" value="Play"/>
        </list>
          </operator>
          <operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="246" y="187">
        <parameter key="name" value="runningTotal"/>
          </operator>
          <connect from_port="in 1" to_op="Join" to_port="left"/>
          <connect from_op="Recall (2)" from_port="result" to_op="Join" to_port="right"/>
          <connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/>
          <connect from_op="Remember" from_port="stored" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="179" y="34">
        <parameter key="name" value="runningTotal"/>
          </operator>
          <connect from_port="in 1" to_op="Remember (2)" to_port="store"/>
          <connect from_op="Remember (2)" from_port="stored" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <connect from_port="example set" to_op="Log" to_port="through 1"/>
      <connect from_op="Log" from_port="through 1" to_op="Log to Data" to_port="through 1"/>
      <connect from_op="Log to Data" from_port="exampleSet" to_op="Subprocess" to_port="in 2"/>
      <connect from_op="Log to Data" from_port="through 1" to_op="Subprocess" to_port="in 1"/>
      <connect from_op="Subprocess" from_port="out 1" to_op="Materialize Data" to_port="example set input"/>
      <connect from_op="Materialize Data" from_port="example set output" to_op="Rename by Generic Names" to_port="example set input"/>
      <connect from_op="Rename by Generic Names" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/>
      <connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Handle Exception" to_port="in 1"/>
      <portSpacing port="source_example set" spacing="0"/>
    </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="313" y="34">
    <process expanded="true">
      <operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="246" y="85">
        <parameter key="name" value="runningTotal"/>
      </operator>
      <connect from_op="Recall" from_port="result" to_port="out 1"/>
      <portSpacing port="source_in 1" spacing="0"/>
      <portSpacing port="source_in 2" spacing="0"/>
      <portSpacing port="sink_out 1" spacing="0"/>
      <portSpacing port="sink_out 2" spacing="0"/>
    </process>
      </operator>
      <operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join (2)" width="90" x="581" y="136">
    <list key="key_attributes"/>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/>
      <connect from_op="Loop Subsets" from_port="example set" to_op="Subprocess (2)" to_port="in 1"/>
      <connect from_op="Subprocess (2)" from_port="out 1" to_op="Join (2)" to_port="left"/>
      <connect from_op="Join (2)" from_port="join" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="90"/>
    </process>
  </operator>
</process>

注意事项

  • Loop Subsets运算符设置为选择属性对
  • Log内使用Log to DataLoop Subsets可以记录当前的属性对,转移到示例集,然后复制到宏中。
  • 将属性重命名为通用名称,连接,然后将结果重命名为原始名称。
  • 使用Join到上一次迭代创建一个运行的总示例集。第一次进入时,没有先前的迭代,这由Handle Exception运算符处理。
  • Loop Subsets运算符之外,在Sub Process内调用正在运行的总示例集,以确保执行顺序正确。
  • 运行总计与原始数据相结合,以便于查看其是否有效。

最后一点,Materialize Data运算符是必需的,即使它不应该是。