我有一个列表,其中属性的总量最初是未知的。
我想隐瞒所有属性对,而不知道有多少属性。
有Loop Attribute Subsets
运算符,但遗憾的是没有输出。
目前我的流程如下:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.1.001" expanded="true" height="82" name="Generate Data" width="90" x="45" y="75">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""A""/>
<parameter key="Group_2" value=""B""/>
<parameter key="Group_3" value=""C""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="180" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""B""/>
<parameter key="Group_2" value=""C""/>
<parameter key="Group_3" value=""D""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="315" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""D""/>
<parameter key="Group_2" value=""A""/>
<parameter key="Group_3" value=""B""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="450" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""A""/>
<parameter key="Group_2" value=""C""/>
<parameter key="Group_3" value=""M""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (5)" width="90" x="585" y="30">
<list key="attribute_values">
<parameter key="Group_1" value=""C""/>
<parameter key="Group_2" value=""M""/>
<parameter key="Group_3" value=""M""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="148" name="Append" width="90" x="720" y="30"/>
<operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="855" y="30">
<parameter key="mode" value="nominal"/>
<parameter key="nominal_value" value="M"/>
</operator>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/>
<connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.1.001" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="75">
<parameter key="macro" value="num_attr"/>
<parameter key="macro_type" value="number_of_attributes"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (2)" width="90" x="313" y="75"/>
<operator activated="true" class="multiply" compatibility="7.1.001" expanded="true" height="124" name="Multiply (2)" width="90" x="179" y="300"/>
<operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="210">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="|att1|att2"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (5)" width="90" x="514" y="210"/>
<operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="380" y="300">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="att1||att3"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (4)" width="90" x="380" y="390">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="att2||att3"/>
</operator>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (3)" width="90" x="514" y="390"/>
<operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (4)" width="90" x="514" y="300"/>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="648" y="210">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (3)" width="90" x="648" y="300">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (4)" width="90" x="648" y="390">
<parameter key="condition_class" value="no_missing_attributes"/>
<list key="filters_list"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (2)" width="90" x="782" y="390">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (3)" width="90" x="782" y="300">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (4)" width="90" x="782" y="210">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="124" name="Append (3)" width="90" x="916" y="255"/>
<connect from_op="Generate Data" from_port="out 1" to_op="Extract Macro (2)" to_port="example set"/>
<connect from_op="Extract Macro (2)" from_port="example set" to_op="Rename by Generic Names (2)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
<connect from_op="Rename by Generic Names (2)" from_port="original" to_port="result 2"/>
<connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/>
<connect from_op="Multiply (2)" from_port="output 3" to_op="Select Attributes (4)" to_port="example set input"/>
<connect from_op="Select Attributes (2)" from_port="example set output" to_op="Rename by Generic Names (5)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (5)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
<connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename by Generic Names (4)" to_port="example set input"/>
<connect from_op="Select Attributes (4)" from_port="example set output" to_op="Rename by Generic Names (3)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (3)" from_port="example set output" to_op="Filter Examples (4)" to_port="example set input"/>
<connect from_op="Rename by Generic Names (4)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/>
<connect from_op="Filter Examples (2)" from_port="example set output" to_op="Generate Concatenation (4)" to_port="example set input"/>
<connect from_op="Filter Examples (3)" from_port="example set output" to_op="Generate Concatenation (3)" to_port="example set input"/>
<connect from_op="Filter Examples (4)" from_port="example set output" to_op="Generate Concatenation (2)" to_port="example set input"/>
<connect from_op="Generate Concatenation (2)" from_port="example set output" to_op="Append (3)" to_port="example set 3"/>
<connect from_op="Generate Concatenation (3)" from_port="example set output" to_op="Append (3)" to_port="example set 2"/>
<connect from_op="Generate Concatenation (4)" from_port="example set output" to_op="Append (3)" to_port="example set 1"/>
<connect from_op="Append (3)" from_port="merged set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
答案 0 :(得分:1)
这是一个艰难的。 Loop Subsets
运算符不会返回单个示例集,因为它将创建多个不同的示例集,每个示例集都包含由输入属性组合构成的属性。为了解决这个问题,Recall
和Remember
运算符可用于存储运行总计。这个故事还没有结束,因为通常只需要一个示例集,所以这意味着需要一些极端的体操来重命名和加入。
简而言之,我已经附上了一个独立的示例流程,说明了所有这些。如果不适应您的数据,它就无法工作。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
<parameter key="repository_entry" value="//Samples/data/Iris"/>
</operator>
<operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="136"/>
<operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
<parameter key="exact_number_of_attributes" value="2"/>
<parameter key="min_number_of_attributes" value="2"/>
<parameter key="limit_max_number" value="true"/>
<parameter key="max_number_of_attributes" value="2"/>
<process expanded="true">
<operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="112" y="34">
<list key="log">
<parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
</list>
</operator>
<operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="112" y="238">
<parameter key="log_name" value="Log"/>
</operator>
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="Subprocess" width="90" x="246" y="238">
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="136">
<parameter key="macro" value="remember"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Attributes"/>
<parameter key="example_index" value="1"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="clear_log" compatibility="7.0.001" expanded="true" height="82" name="Clear Log" width="90" x="380" y="136">
<parameter key="log_name" value="Log"/>
<parameter key="delete_table" value="true"/>
</operator>
<connect from_port="in 1" to_port="out 1"/>
<connect from_port="in 2" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Clear Log" to_port="through 1"/>
<connect from_op="Clear Log" from_port="through 1" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="source_in 3" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
</operator>
<operator activated="true" class="materialize_data" compatibility="7.0.001" expanded="true" height="82" name="Materialize Data" width="90" x="246" y="34"/>
<operator activated="true" class="rename_by_generic_names" compatibility="7.0.001" expanded="true" height="82" name="Rename by Generic Names" width="90" x="380" y="34"/>
<operator activated="true" class="generate_concatenation" compatibility="7.0.001" expanded="true" height="82" name="Generate Concatenation" width="90" x="380" y="136">
<parameter key="first_attribute" value="att1"/>
<parameter key="second_attribute" value="att2"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="238">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="att2|att1"/>
<parameter key="invert_selection" value="true"/>
</operator>
<operator activated="true" class="rename" compatibility="7.0.001" expanded="true" height="82" name="Rename" width="90" x="514" y="34">
<parameter key="old_name" value="att1_att2"/>
<parameter key="new_name" value="%{remember}"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="514" y="136">
<process expanded="true">
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="45" y="187">
<parameter key="name" value="runningTotal"/>
<parameter key="remove_from_store" value="false"/>
</operator>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="179" y="34">
<list key="key_attributes">
<parameter key="Play" value="Play"/>
</list>
</operator>
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="246" y="187">
<parameter key="name" value="runningTotal"/>
</operator>
<connect from_port="in 1" to_op="Join" to_port="left"/>
<connect from_op="Recall (2)" from_port="result" to_op="Join" to_port="right"/>
<connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/>
<connect from_op="Remember" from_port="stored" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="179" y="34">
<parameter key="name" value="runningTotal"/>
</operator>
<connect from_port="in 1" to_op="Remember (2)" to_port="store"/>
<connect from_op="Remember (2)" from_port="stored" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<connect from_port="example set" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_op="Log to Data" to_port="through 1"/>
<connect from_op="Log to Data" from_port="exampleSet" to_op="Subprocess" to_port="in 2"/>
<connect from_op="Log to Data" from_port="through 1" to_op="Subprocess" to_port="in 1"/>
<connect from_op="Subprocess" from_port="out 1" to_op="Materialize Data" to_port="example set input"/>
<connect from_op="Materialize Data" from_port="example set output" to_op="Rename by Generic Names" to_port="example set input"/>
<connect from_op="Rename by Generic Names" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/>
<connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Handle Exception" to_port="in 1"/>
<portSpacing port="source_example set" spacing="0"/>
</process>
</operator>
<operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="313" y="34">
<process expanded="true">
<operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="246" y="85">
<parameter key="name" value="runningTotal"/>
</operator>
<connect from_op="Recall" from_port="result" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join (2)" width="90" x="581" y="136">
<list key="key_attributes"/>
</operator>
<connect from_op="Retrieve Iris" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Loop Subsets" to_port="example set"/>
<connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/>
<connect from_op="Loop Subsets" from_port="example set" to_op="Subprocess (2)" to_port="in 1"/>
<connect from_op="Subprocess (2)" from_port="out 1" to_op="Join (2)" to_port="left"/>
<connect from_op="Join (2)" from_port="join" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="90"/>
</process>
</operator>
</process>
注意事项
Loop Subsets
运算符设置为选择属性对Log
内使用Log to Data
和Loop Subsets
可以记录当前的属性对,转移到示例集,然后复制到宏中。Join
到上一次迭代创建一个运行的总示例集。第一次进入时,没有先前的迭代,这由Handle Exception
运算符处理。Loop Subsets
运算符之外,在Sub Process
内调用正在运行的总示例集,以确保执行顺序正确。最后一点,Materialize Data
运算符是必需的,即使它不应该是。