构建关联矩阵以在Rapid miner

时间:2015-06-16 12:17:05

标签: csv correlation rapidminer

我想用两个不同csv文件的两个不同单元格建立相关矩阵。任何人都可以帮我告诉我如何从一个文件中指定一列并与其他文件相同?

1 个答案:

答案 0 :(得分:1)

您必须通过将两列与Join运算符连接在一起来创建新的示例集,然后您可以计算相关矩阵。确保在加入时两个示例集具有相同的ID属性。 下面的代码块显示了如何选择和连接两个属性的示例过程。

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.000-SNAPSHOT" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.0.000-SNAPSHOT" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="75">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="75">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="a1"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.000-SNAPSHOT" expanded="true" height="60" name="Retrieve Iris (2)" width="90" x="45" y="255">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="255">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="a2"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Join" width="90" x="380" y="165">
        <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="correlation_matrix" compatibility="7.0.000-SNAPSHOT" expanded="true" height="94" name="Correlation Matrix" width="90" x="581" y="165"/>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Retrieve Iris (2)" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_op="Correlation Matrix" to_port="example set"/>
      <connect from_op="Correlation Matrix" from_port="matrix" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>