风暴螺栓不发送共同输出

时间:2015-07-23 03:25:15

标签: java apache-storm

我试图使用storm找到两个文本文件之间的普通用户。前两个螺栓"读"从两个不同的文本文件中,所有用户和第三个bolt搜索普通用户,但即使它似乎找到它们也不会打印出普通用户。这是我的代码:

Bolt#1

public class ProcessAuthors extends BaseRichBolt {
      private OutputCollector collector;


      @Override
      public void declareOutputFields(OutputFieldsDeclarer declarer) {
             declarer.declare(new Fields("USERNAME"));
      }

      @Override
      public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
             this.collector=collector;
     }

     @Override
     public void execute(Tuple input) {
         String lineFromFile=input.getString(0);

         int initialIndex=lineFromFile.indexOf("[");
         int lastIndex=lineFromFile.indexOf("]");

         String author1=lineFromFile.substring(initialIndex+1);
         String author=author1.replace("]","");
         collector.emit(input,new Values(author));
        }
     }

Bolt#2

public class FromTweetsFileToFurtherProcessing extends BaseRichBolt {
      OutputCollector collector;

      String author;

      @Override
     public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("USERNAME"));
     }

    @Override
    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
          this.collector=collector;
    }

    @Override
    public void execute(Tuple input) {
            String lineFromFile = input.getString(0);

            String author="";

            String[] splitLineFromFileIntoFields =      lineFromFile.split(",");

            for (int i = 0; i < splitLineFromFileIntoFields.length; i++) {
                 if (splitLineFromFileIntoFields[i].startsWith("\"tweetAuthor\"")) {
                      this.author = removeTwoWordsFromString(splitLineFromFileIntoFields[i], "\"tweetAuthor\":\"", "\"");
            }
    }

    collector.emit(input,new Values(this.author));
}

Bolt#3

public class SearchForTheAuthorsTweetData extends BaseRichBolt{
OutputCollector collector;
List<String>authors=new ArrayList<String>();
List<String>temp=new ArrayList<String>();

String author1;String author2;

@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
    declarer.declare(new Fields("USERNAME"));
}


@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    this.collector=collector;
}

@Override
public void execute(Tuple input) {
    final String sourceComponent = input.getSourceComponent();


    if ("PROCESS_AUTHORS".equals(sourceComponent)) {

        author1 = input.getString(0);
        authors.add(author1);
    }




    if ("PROCESS_TWEETS_FROM_MONGODB".equals(sourceComponent)) {
        author2 = input.getString(0);
        temp.add(author2);

    }

    if(author2!=null && author1!=null){
        System.out.println("USER: "+author2);
        System.out.println("AUTHOR: "+author1);

        if(author2.equals(author1)){
            System.out.println("COMMON AUTHOR: "+author2);
            collector.emit(input,new Values(author1));
        }
    }

拓扑

 public class TheAuthorsAndTheirTweetData {
 public static void main(String[]args) throws Exception{
    TopologyBuilder topologyBuilder=new TopologyBuilder();

    topologyBuilder.setSpout("READ_AUTHORS_MONGO",new ReadLinesFromTextFile("anAuthor.txt"),1);
    topologyBuilder.setBolt("PROCESS_AUTHORS",new ProcessAuthors(),2).shuffleGrouping("READ_AUTHORS_MONGO");

    topologyBuilder.setSpout("READ_TWEETS_FROM_MONGODB",new ReadLinesFromTextFile("tweets.txt"),1);

    topologyBuilder.setBolt("PROCESS_TWEETS_FROM_MONGODB",new FromTweetsFileToFurtherProcessing(),2).shuffleGrouping("READ_TWEETS_FROM_MONGODB");


    topologyBuilder.setBolt("SEARCH_1",new SearchForTheAuthorsTweetData(),8).fieldsGrouping("PROCESS_AUTHORS", new Fields("USERNAME")).fieldsGrouping("PROCESS_TWEETS_FROM_MONGODB", new Fields("USERNAME"));


    Config config=new Config();
    if(args!=null && args.length>0){
        config.setNumWorkers(10);
        config.setNumAckers(5);
        config.setMaxSpoutPending(100);
        StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());
    }else{
        LocalCluster localCluster=new LocalCluster();
        localCluster.submitTopology("Test",config,topologyBuilder.createTopology());
        Utils.sleep(1*60*60*1000);
        localCluster.killTopology("Test");
        localCluster.shutdown();
    }
}

1 个答案:

答案 0 :(得分:0)

如果该作者包含在两个后续元组中,第三个螺栓只能找到一个共同作者,一个来自第一个螺栓,一个来自第二个螺栓。将代码更改为如下所示:

if ("PROCESS_TWEETS_FROM_MONGODB".equals(sourceComponent)) {
    author2 = input.getString(0);
    temp.add(author2);
    if (authors.contains(author2)) {
         // common author - emit it
    }
}

请注意,这可能不会缩放,因为您必须将两个文件的内容都读入3号内存。