我是一名工商管理专业的学生,目前正在学习研究项目的社交媒体分析基础知识。我目前的目标是跟踪推文中关键字的使用情况。我下载了RapidMiner,并想出了如何搜索关键字。但是,有没有可能弄清楚关键字在特定时间范围内的使用频率?我可以过滤结果,例如,只会显示2017年12月包含我的关键字的推文吗?
非常感谢您考虑我的问题。
答案 0 :(得分:0)
如果您将数据提取为RapidMiner ExampleSet,则可以使用 Aggregate -Operator来计算所使用的不同关键字。或者您只需使用过滤器示例 -Operator来显示包含关键字的推文。 有关简单示例,请参阅下面的过程。只需将xml复制并粘贴到RapidMiner的流程视图中即可。
也可以在RapidMiner community forum。
中进一步询问或重新发帖<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_direct_mailing_data" compatibility="8.0.001" expanded="true" height="68" name="Generate Direct Mailing Data" width="90" x="45" y="34">
<description align="center" color="transparent" colored="false" width="126">Generic sample data.<br>We use the &quot;sports&quot; Attribute as key words</description>
</operator>
<operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="340">
<list key="filters_list">
<parameter key="filters_entry_key" value="sports.equals.athletics"/>
</list>
<description align="center" color="yellow" colored="true" width="126">Alternatively we can filter for a specific sport and then count.</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate (2)" width="90" x="715" y="340">
<parameter key="use_default_aggregation" value="true"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="sports"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes"/>
<description align="center" color="yellow" colored="true" width="126">Type your comment</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="sports"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes">
<parameter key="sports" value="count"/>
</list>
<parameter key="group_by_attributes" value="sports"/>
<description align="center" color="green" colored="true" width="126">The &quot;group by&quot; and the &quot;aggregation&quot; attributes are both set to &quot;sports&quot;</description>
</operator>
<connect from_op="Generate Direct Mailing Data" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
<connect from_op="Aggregate (2)" from_port="example set output" to_port="result 2"/>
<connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>