我有一些数据,其中一个属性包含信息,必须分成新的示例(行)。
该属性被称为:'Group',其值如下:'A + B + C'
我设法拆分该属性,现在我有3个属性:
'Group_1':'A','Group_2':'B','Group_3':'C'
这个群体有另一个属性,例如'价值':'3'。 现在我希望我的示例集包含具有属性Group和Value的示例,而不是具有合并示例。
我的表格如下:
我可以手动将我的过程相乘并为每个组选择一个子集,然后将其命名为Group并将它们合并在一起,但这是一个静态解决方案,并且Group_x的数量不固定。
有没有办法可以根据group_x属性的数量自动执行该过程? (使用宏操作符我只能计算所有属性)
ExampleSet应如下所示:
答案 0 :(得分:0)
Pivot
运算符会执行此操作。这是一个示例 - Subprocess
是创建与您的数据匹配的数据。您需要的唯一操作符是Pivot
来完成工作,Select Attributes
来选择最终结果的属性。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="7.0.000" expanded="true" height="82" name="Subprocess (2)" width="90" x="179" y="187">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="179" y="85">
<list key="attribute_values">
<parameter key="Value" value="3"/>
<parameter key="Group_1" value=""A""/>
<parameter key="Group_2" value=""B""/>
<parameter key="Group_3" value=""C""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="179" y="187">
<list key="attribute_values">
<parameter key="Value" value="13"/>
<parameter key="Group_1" value=""B""/>
<parameter key="Group_2" value=""C""/>
<parameter key="Group_3" value=""D""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="179" y="289">
<list key="attribute_values">
<parameter key="Value" value="11"/>
<parameter key="Group_1" value=""D""/>
<parameter key="Group_2" value=""A""/>
<parameter key="Group_3" value=""B""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="179" y="391">
<list key="attribute_values">
<parameter key="Value" value="32"/>
<parameter key="Group_1" value=""C""/>
<parameter key="Group_2" value=""Missing""/>
<parameter key="Group_3" value=""Missing""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.0.000" expanded="true" height="68" name="Generate Data by User Specification (5)" width="90" x="179" y="493">
<list key="attribute_values">
<parameter key="Value" value="5"/>
<parameter key="Group_1" value=""A""/>
<parameter key="Group_2" value=""C""/>
<parameter key="Group_3" value=""Missing""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.0.000" expanded="true" height="166" name="Append" width="90" x="514" y="85"/>
<operator activated="true" class="declare_missing_value" compatibility="7.0.000" expanded="true" height="82" name="Declare Missing Value" width="90" x="648" y="85">
<parameter key="mode" value="nominal"/>
<parameter key="nominal_value" value="Missing"/>
</operator>
<operator activated="true" class="generate_id" compatibility="7.0.000" expanded="true" height="82" name="Generate ID" width="90" x="782" y="85"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
<connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/>
<connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
<connect from_op="Declare Missing Value" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
<connect from_op="Generate ID" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="de_pivot" compatibility="7.0.000" expanded="true" height="82" name="De-Pivot (2)" width="90" x="313" y="187">
<list key="attribute_name">
<parameter key="Group" value="Group.*"/>
</list>
<parameter key="index_attribute" value="Value1"/>
<parameter key="create_nominal_index" value="true"/>
</operator>
<operator activated="true" class="select_attributes" compatibility="7.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="187">
<parameter key="attribute_filter_type" value="subset"/>
<parameter key="attributes" value="Group|Value|id"/>
</operator>
<connect from_op="Subprocess (2)" from_port="out 1" to_op="De-Pivot (2)" to_port="example set input"/>
<connect from_op="De-Pivot (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
希望有所帮助。