Rapidminer从字符串中提取数字

时间:2017-10-19 10:07:29

标签: rapidminer

从下面名义'nameClust'属性中提取数字的正确方法是什么?

enter image description here

1 个答案:

答案 0 :(得分:0)

最简单的方法是首先从属性中删除群集前缀。您可以使用“替换”运算符和正则表达式执行此操作。然后,您必须使用“Parse Numbers”运算符将值类型从Nominal更改为Numerical。

或者,您可以使用“生成属性”运算符中的表达式构建器来实现相同的效果。

如果您只想提取单个群集号,可以使用“提取宏”,然后使用“生成宏”,使用与第二个示例中相同的表达式构建器来获取数字。

有关所有三个版本的示例,请参阅附加的进程xml。 也可以在RapidMiner community forum

中进一步询问或重新发帖
<?xmlversion="1.0"encoding="UTF-8"?><processversion="7.6.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operatoractivated="true"class="process"compatibility="6.0.002"expanded="true"name="Process">
<processexpanded="true">
<operatoractivated="true"class="retrieve"compatibility="7.6.001"expanded="true"height="68"name="RetrieveIris"width="90"x="45"y="136">
<parameterkey="repository_entry"value="//Samples/data/Iris"/>
</operator>
<operatoractivated="true"class="split_data"compatibility="7.6.001"expanded="true"height="103"name="SplitData"width="90"x="179"y="136">
<enumerationkey="partitions">
<parameterkey="ratio"value="0.5"/>
<parameterkey="ratio"value="0.5"/>
</enumeration>
</operator>
<operatoractivated="true"class="k_means"compatibility="7.6.001"expanded="true"height="82"name="Clustering"width="90"x="313"y="85"/>
<operatoractivated="true"class="apply_model"compatibility="7.6.001"expanded="true"height="82"name="ApplyModel"width="90"x="447"y="136">
<listkey="application_parameters"/>
</operator>
<operatoractivated="true"class="multiply"compatibility="7.6.001"expanded="true"height="124"name="Multiply"width="90"x="648"y="136"/>
<operatoractivated="true"class="extract_macro"compatibility="7.6.001"expanded="true"height="68"name="ExtractMacro"width="90"x="849"y="697">
<parameterkey="macro"value="extracted_cluster"/>
<parameterkey="macro_type"value="data_value"/>
<parameterkey="attribute_name"value="cluster"/>
<parameterkey="example_index"value="1"/>
<listkey="additional_macros"/>
</operator>
<operatoractivated="true"class="generate_macro"compatibility="7.6.001"expanded="true"height="82"name="GenerateMacro"width="90"x="1050"y="697">
<listkey="function_descriptions">
<parameterkey="cluster_number"value="replace(%{extracted_cluster},&quot;cluster_&quot;,&quot;&quot;)"/>
</list>
</operator>
<operatoractivated="true"class="generate_attributes"compatibility="7.6.001"expanded="true"height="82"name="GenerateAttributes"width="90"x="849"y="442">
<listkey="function_descriptions">
<parameterkey="parsed_cluster"value="parse(replace([cluster],&quot;cluster_&quot;,&quot;&quot;))"/>
</list>
<descriptionalign="center"color="purple"colored="true"width="126">Alternativapproachbyusingasingleexpression.</description>
</operator>
<operatoractivated="true"class="replace"compatibility="7.6.001"expanded="true"height="82"name="Replace"width="90"x="849"y="136">
<parameterkey="attribute_filter_type"value="single"/>
<parameterkey="attribute"value="cluster"/>
<parameterkey="include_special_attributes"value="true"/>
<parameterkey="replace_what"value="cluster\_"/>
<descriptionalign="center"color="purple"colored="true"width="126">Firstremovethe&amp;quot;cluster_&amp;quot;SubstringfromtheclusterAttribute.It'simportanttocheckthe&amp;quot;includespecialattributes&amp;quot;parameter</description>
</operator>
<operatoractivated="true"class="parse_numbers"compatibility="7.6.001"expanded="true"height="82"name="ParseNumbers"width="90"x="1050"y="136">
<parameterkey="attribute_filter_type"value="single"/>
<parameterkey="attribute"value="cluster"/>
<parameterkey="include_special_attributes"value="true"/>
<descriptionalign="center"color="purple"colored="true"width="126">Parsethevaluesofthe&amp;quot;clusterAttribute,tochangethevaluetypefromnominaltonumerical.</description>
</operator>
<connectfrom_op="RetrieveIris"from_port="output"to_op="SplitData"to_port="exampleset"/>
<connectfrom_op="SplitData"from_port="partition1"to_op="Clustering"to_port="exampleset"/>
<connectfrom_op="SplitData"from_port="partition2"to_op="ApplyModel"to_port="unlabelleddata"/>
<connectfrom_op="Clustering"from_port="clustermodel"to_op="ApplyModel"to_port="model"/>
<connectfrom_op="ApplyModel"from_port="labelleddata"to_op="Multiply"to_port="input"/>
<connectfrom_op="Multiply"from_port="output1"to_op="Replace"to_port="examplesetinput"/>
<connectfrom_op="Multiply"from_port="output2"to_op="GenerateAttributes"to_port="examplesetinput"/>
<connectfrom_op="Multiply"from_port="output3"to_op="ExtractMacro"to_port="exampleset"/>
<connectfrom_op="ExtractMacro"from_port="exampleset"to_op="GenerateMacro"to_port="through1"/>
<connectfrom_op="GenerateAttributes"from_port="examplesetoutput"to_port="result2"/>
<connectfrom_op="Replace"from_port="examplesetoutput"to_op="ParseNumbers"to_port="examplesetinput"/>
<connectfrom_op="ParseNumbers"from_port="examplesetoutput"to_port="result1"/>
<portSpacingport="source_input1"spacing="0"/>
<portSpacingport="sink_result1"spacing="0"/>
<portSpacingport="sink_result2"spacing="399"/>
<portSpacingport="sink_result3"spacing="0"/>
<descriptionalign="center"color="yellow"colored="false"height="347"resized="true"width="580"x="27"y="10">Splitthedataintwosubsets&lt;br/&gt;buildaclusteringmodelonthetrainingset&lt;br/&gt;andthenapplyitionthetestset</description>
</process>
</operator>
</process>