使用基于分类器结果的rapidminer将文件排序到文件夹中

时间:2016-05-31 12:51:18

标签: rapidminer

我需要帮助来对rapidminer分类到标签(文件夹)中的文件进行排序,这个任务是否可以在rapidminer或java代码中读取结果示例集? 这是结果表

ie:我只想将文件拆分成文件夹代表其标签

这是示例集

    Data: SimpleExampleSet: 15 examples, 31988 regular attributes, 
special attributes = { label = #0: label (polynominal/single_value)/values=[test1] 
metadata_file = #1: metadata_file (polynominal/single_value)/values=[0.txt, 1.txt, 10.txt, 11.txt, 12.txt, 13.txt, 14.txt, 2.txt, 3.txt, 4.txt, 5.txt, 6.txt, 7.txt, 8.txt, 9.txt] 
metadata_path = #2: metadata_path (polynominal/single_value)/values=[D:\Finaltests\test1\0.txt, D:\Finaltests\test1\1.txt, D:\Finaltests\test1\10.txt, D:\Finaltests\test1\11.txt, D:\Finaltests\test1\12.txt, D:\Finaltests\test1\13.txt, D:\Finaltests\test1\14.txt, D:\Finaltests\test1\2.txt, D:\Finaltests\test1\3.txt, D:\Finaltests\test1\4.txt, D:\Finaltests\test1\5.txt, D:\Finaltests\test1\6.txt, D:\Finaltests\test1\7.txt, D:\Finaltests\test1\8.txt, D:\Finaltests\test1\9.txt] 
metadata_date = #3: metadata_date (date_time/single_value) 
confidence_sport = #31993: confidence(sport) (real/single_value) 
confidence_places = #31994: confidence(places) (real/single_value) 
prediction = #31992: prediction(label) (binominal/single_value) }
谢谢。

1 个答案:

答案 0 :(得分:0)

这比我最初说的稍微复杂一点,所以我在下面列举了一个例子。它假定Linux并将任何文件从/tmp/old复制到/tmp/new/A/tmp/new/BAB由标签决定。

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="loop_files" compatibility="7.0.001" expanded="true" height="82" name="Loop Files" width="90" x="45" y="34">
    <parameter key="directory" value="/tmp/old"/>
    <process expanded="true">
      <operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="parent_path" width="90" x="179" y="34">
        <parameter key="macro_name" value="parent_path"/>
      </operator>
      <operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="file_name" width="90" x="179" y="136">
        <parameter key="macro_name" value="file_name"/>
      </operator>
      <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="380" y="34">
        <list key="log">
          <parameter key="parent_path" value="operator.parent_path.value.macro_value"/>
          <parameter key="file_name" value="operator.file_name.value.macro_value"/>
        </list>
      </operator>
      <connect from_op="parent_path" from_port="through 1" to_op="file_name" to_port="through 1"/>
      <connect from_op="file_name" from_port="through 1" to_op="Log" to_port="through 1"/>
      <connect from_op="Log" from_port="through 1" to_port="out 1"/>
      <portSpacing port="source_file object" spacing="0"/>
      <portSpacing port="source_in 1" spacing="0"/>
      <portSpacing port="sink_out 1" spacing="0"/>
      <portSpacing port="sink_out 2" spacing="0"/>
    </process>
      </operator>
      <operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="179" y="34"/>
      <operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="313" y="34">
    <list key="function_descriptions">
      <parameter key="label" value="if(rand()&gt;0.5, &quot;A&quot;, &quot;B&quot;)"/>
    </list>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="187">
    <list key="function_descriptions">
      <parameter key="old" value="parent_path + &quot;/&quot; + file_name"/>
      <parameter key="new" value="&quot;/tmp/new/&quot; + label+ &quot;/&quot; + file_name"/>
    </list>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="7.0.001" expanded="true" height="82" name="Loop Examples" width="90" x="514" y="187">
    <process expanded="true">
      <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="old" width="90" x="112" y="34">
        <parameter key="macro" value="old"/>
        <parameter key="macro_type" value="data_value"/>
        <parameter key="attribute_name" value="old"/>
        <parameter key="example_index" value="%{example}"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="new" width="90" x="112" y="136">
        <parameter key="macro" value="new"/>
        <parameter key="macro_type" value="data_value"/>
        <parameter key="attribute_name" value="new"/>
        <parameter key="example_index" value="%{example}"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="copy_file" compatibility="7.0.001" expanded="true" height="82" name="Copy File" width="90" x="380" y="34">
        <parameter key="source_file" value="%{old}"/>
        <parameter key="new_file" value="%{new}"/>
      </operator>
      <connect from_port="example set" to_op="old" to_port="example set"/>
      <connect from_op="old" from_port="example set" to_op="new" to_port="example set"/>
      <connect from_op="new" from_port="example set" to_op="Copy File" to_port="through 1"/>
      <connect from_op="Copy File" from_port="through 1" to_port="example set"/>
      <portSpacing port="source_example set" spacing="0"/>
      <portSpacing port="sink_example set" spacing="0"/>
      <portSpacing port="sink_output 1" spacing="0"/>
    </process>
      </operator>
      <connect from_op="Loop Files" from_port="out 1" to_op="Log to Data" to_port="through 1"/>
      <connect from_op="Log to Data" from_port="exampleSet" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

我使用Copy File来避免移动文件并造成损坏,希望你能看到它是如何工作的。

总之,如果使用旧名称和新名称创建属性,则必须使用Loop Examples来完成每个示例。在此循环运算符中,您必须提取值以作为宏传递给Copy File运算符。

希望以此为基础。