用于计算用户原始推文数量的螺栓

时间:2015-07-15 02:56:44

标签: java twitter apache-storm

在我使用风暴将我下载到MongoDB数据库的所有推文存储起来之后,我尝试计算用户原始推文的数量。无论如何,无论何时我使用以下代码计算作者原始推文的数量,它都会继续阅读(并计算)相同的推文。

博尔特:

public class CalculateTheMetrics  extends BaseBasicBolt {

Map<String,Double>OT1=new HashMap<String, Double>();

@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
    declarer.declare(new Fields("USERNAME","OT1"));
}

@Override
public void execute(Tuple input,BasicOutputCollector collector) {

    String author=input.getString(0);
    String tweet=input.getString(2);

    Double OT1=this.OT1.get(author);
    if(OT1==null){
        OT1=0.0;
    }
    if(author!=null && tweet!=null ){

        if(!tweet.startsWith("@") || !tweet.startsWith("RT")){
            OT1+=1;
        }
        this.OT1.put(author,OT1);

        System.out.println(author+" +OT1); 
        collector.emit(new Values(author,OT1))
       }
}

拓扑:

public class TheAuthorsAndTheirTweetData {
public static void main(String[]args) throws Exception{
    TopologyBuilder topologyBuilder=new TopologyBuilder();

    topologyBuilder.setSpout("READ_TWEET_DATA_FROM_MONGODB", new ReadLinesFromTextFile("tweets.txt"));
    topologyBuilder.setBolt("TWEET_DATA_FROM_MONGODB_TO_FURTHER_PROCESSING",new FromMongoDBToProcessing()).shuffleGrouping("READ_TWEET_DATA_FROM_MONGODB");

    topologyBuilder.setSpout("READ_THE_AUTHORS_FROM_TEXT_FILE",new ReadLastLineFromTextFile("authors.txt"));
    topologyBuilder.setBolt("FROM_THE_AUTHORS_TEXT_FILE_TO_FURTHER_PROCESSING", new FromTheAuthorsTextFileToFurtherProcessing()).shuffleGrouping("READ_THE_AUTHORS_FROM_TEXT_FILE");

    topologyBuilder.setBolt("SEARCH_FOR_THE_AUTHORS_TWEET_DATA",new SearchForTheAuthorsTweetData(),16).fieldsGrouping("TWEET_DATA_FROM_MONGODB_TO_FURTHER_PROCESSING",new Fields("USERNAME","ID")).fieldsGrouping("FROM_THE_AUTHORS_TEXT_FILE_TO_FURTHER_PROCESSING",new Fields("USERNAME","ID"));


    topologyBuilder.setBolt("CALCULATE_THE_METRICS",new CalculateTheMetrics(),64).fieldsGrouping("SEARCH_FOR_THE_AUTHORS_TWEET_DATA",new Fields("USERNAME"));




    Config config=new Config();
    if(args!=null && args.length>0){
        config.setNumWorkers(10);
        config.setNumAckers(5);
        config.setMaxSpoutPending(100);
        StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());
    }else{
        LocalCluster localCluster=new LocalCluster();
        localCluster.submitTopology("Test",config,topologyBuilder.createTopology());
        Utils.sleep(1*60*60*1000);
        localCluster.killTopology("Test");
        localCluster.shutdown();
    }
}

}

我想要的是,它停止重复阅读相同的推文并计算相同的推文。请帮助

1 个答案:

答案 0 :(得分:0)

这样的东西?

public class Calculate1Metric extends BaseRichBolt {
private OutputCollector collector;
Map<String ,Integer>OT1;

@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
          declarer.declare(new Fields("username","OT1"));

}

@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    this.collector=collector;
    this.OT1=new HashMap<String, Integer>();

    }

@Override
public void execute(Tuple input) {
    final String sourceComponent = input.getSourceComponent();

        String author = input.getString(0);
        String tweet = input.getString(2);

        if (author != null && tweet != null) {

            Integer OT1 = this.OT1.get(author);
            if (OT1 == null) {
                OT1 = 0;
            }


            if (!tweet.startsWith("@") || !tweet.contains("RT ") || !tweet.startsWith("RT")) {
                OT1 += 1;
            }



            if(!this.OT1.containsKey(author)) {
                this.OT1.put(author, OT1);



            }else{
                collector.emit(new Values(author,OT1,OT2));
                System.out.println(author + " " + OT1+" "+OT2);


                this.OT1.remove(author);

            }
        }else{
            collector.fail(input);

        }
      collector.ack(input);
 }