我需要帮助来对rapidminer分类到标签(文件夹)中的文件进行排序,这个任务是否可以在rapidminer或java代码中读取结果示例集? 这是结果表
ie:我只想将文件拆分成文件夹代表其标签
这是示例集
Data: SimpleExampleSet: 15 examples, 31988 regular attributes,
special attributes = { label = #0: label (polynominal/single_value)/values=[test1]
metadata_file = #1: metadata_file (polynominal/single_value)/values=[0.txt, 1.txt, 10.txt, 11.txt, 12.txt, 13.txt, 14.txt, 2.txt, 3.txt, 4.txt, 5.txt, 6.txt, 7.txt, 8.txt, 9.txt]
metadata_path = #2: metadata_path (polynominal/single_value)/values=[D:\Finaltests\test1\0.txt, D:\Finaltests\test1\1.txt, D:\Finaltests\test1\10.txt, D:\Finaltests\test1\11.txt, D:\Finaltests\test1\12.txt, D:\Finaltests\test1\13.txt, D:\Finaltests\test1\14.txt, D:\Finaltests\test1\2.txt, D:\Finaltests\test1\3.txt, D:\Finaltests\test1\4.txt, D:\Finaltests\test1\5.txt, D:\Finaltests\test1\6.txt, D:\Finaltests\test1\7.txt, D:\Finaltests\test1\8.txt, D:\Finaltests\test1\9.txt]
metadata_date = #3: metadata_date (date_time/single_value)
confidence_sport = #31993: confidence(sport) (real/single_value)
confidence_places = #31994: confidence(places) (real/single_value)
prediction = #31992: prediction(label) (binominal/single_value) }
谢谢。
答案 0 :(得分:0)
这比我最初说的稍微复杂一点,所以我在下面列举了一个例子。它假定Linux并将任何文件从/tmp/old
复制到/tmp/new/A
或/tmp/new/B
。 A
和B
由标签决定。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="loop_files" compatibility="7.0.001" expanded="true" height="82" name="Loop Files" width="90" x="45" y="34">
<parameter key="directory" value="/tmp/old"/>
<process expanded="true">
<operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="parent_path" width="90" x="179" y="34">
<parameter key="macro_name" value="parent_path"/>
</operator>
<operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="file_name" width="90" x="179" y="136">
<parameter key="macro_name" value="file_name"/>
</operator>
<operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="380" y="34">
<list key="log">
<parameter key="parent_path" value="operator.parent_path.value.macro_value"/>
<parameter key="file_name" value="operator.file_name.value.macro_value"/>
</list>
</operator>
<connect from_op="parent_path" from_port="through 1" to_op="file_name" to_port="through 1"/>
<connect from_op="file_name" from_port="through 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="179" y="34"/>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="label" value="if(rand()>0.5, "A", "B")"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="187">
<list key="function_descriptions">
<parameter key="old" value="parent_path + "/" + file_name"/>
<parameter key="new" value=""/tmp/new/" + label+ "/" + file_name"/>
</list>
</operator>
<operator activated="true" class="loop_examples" compatibility="7.0.001" expanded="true" height="82" name="Loop Examples" width="90" x="514" y="187">
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="old" width="90" x="112" y="34">
<parameter key="macro" value="old"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="old"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="new" width="90" x="112" y="136">
<parameter key="macro" value="new"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="new"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="copy_file" compatibility="7.0.001" expanded="true" height="82" name="Copy File" width="90" x="380" y="34">
<parameter key="source_file" value="%{old}"/>
<parameter key="new_file" value="%{new}"/>
</operator>
<connect from_port="example set" to_op="old" to_port="example set"/>
<connect from_op="old" from_port="example set" to_op="new" to_port="example set"/>
<connect from_op="new" from_port="example set" to_op="Copy File" to_port="through 1"/>
<connect from_op="Copy File" from_port="through 1" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_op="Loop Files" from_port="out 1" to_op="Log to Data" to_port="through 1"/>
<connect from_op="Log to Data" from_port="exampleSet" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
我使用Copy File
来避免移动文件并造成损坏,希望你能看到它是如何工作的。
总之,如果使用旧名称和新名称创建属性,则必须使用Loop Examples
来完成每个示例。在此循环运算符中,您必须提取值以作为宏传递给Copy File
运算符。
希望以此为基础。