使用storm将用户名映射到推文

时间:2014-07-19 07:58:29

标签: java twitter apache-storm

我正在尝试创建一个拓扑,其中包含:1个发出推文的spout和两个bolt:  收集推文的TweetParserBolt  和UserParserBolt收集高频扬声器'用户名。

假设我已经创建了第三个用于锚定TweetParserBolt和UserParserBolt的螺栓,以便它可以将高音扬声器的用户名映射到她/他已经发布的推文列表。问题我&#39 ;遇到的是,bolt返回一个推文的空列表。

任何人都可以帮助我理解代码的错误

下面是我的拓扑代码和三个螺栓:

public class TwitterTopology {
private static String consumerKey = "*********************";
private static String consumerSecret = "*****************";
private static String accessToken = "********************";
private static String accessTokenSecret = "****************";

public static void main(String [] args) throws Exception{

    /*** SETUP ***/

    String remoteClusterTopologyName = null;
    if (args!=null) {
        if (args.length==1) {
            remoteClusterTopologyName = args[0];
        }
        // If credentials are provided as commandline arguments
        else if (args.length==4) {
            accessToken =args[0];
            accessTokenSecret =args[1];
            consumerKey =args[2];
            consumerSecret =args[3];
        }

    }
    /**************** ****************/

    TopologyBuilder builder = new TopologyBuilder();

    FilterQuery filterQuery = new FilterQuery();
    filterQuery.track(new String[]{"#cloudcomputing"});
    filterQuery.language(new String[]{"en"});

    TwitterSpout spout = new TwitterSpout( accessToken, accessTokenSecret,consumerKey, consumerSecret, filterQuery);
    builder.setSpout("TwitterSpout",spout,1);

    builder.setBolt("TweetParserBolt",new TweetParserBolt(),4).shuffleGrouping("TwitterSpout");
    builder.setBolt("UserMapperBolt",new UserParserBolt()).shuffleGrouping("TwitterSpout");

    UserAndTweetsMapperBolt()).fieldsGrouping(("TweetParserBolt"), new Fields("username","tweet","bolt"))
                                                                            .fieldsGrouping(("UserMapperBolt"),new Fields("username","tweet","bolt"));

    Config conf = new Config();
    conf.setDebug(true);


    if (remoteClusterTopologyName!=null) {
        conf.setNumWorkers(4);

        StormSubmitter.submitTopology(remoteClusterTopologyName, conf, builder.createTopology());
    }
    else {
        conf.setMaxTaskParallelism(3);

        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("test", conf, builder.createTopology());

        Thread.sleep(460000);

        cluster.shutdown();
    }


}

public class TweetParserBolt extends BaseRichBolt {
   private OutputCollector collector;

   @Override
   public void declareOutputFields(OutputFieldsDeclarer declarer){
      declarer.declare(new Fields("username","tweet","bolt"));
   }



   @Override
   public void prepare(Map map,TopologyContext context,OutputCollector collector){
      this.collector=collector;
   }

   @Override
   public void execute(Tuple tuple){
      Status tweet=(Status)tuple.getValue(0);
      String username=tweet.getUser().getScreenName();

      collector.emit(tuple,new Values(username,tweet,"tweet_parser_bolt"));

    }
  }

   public class UserParserBolt extends BaseRichBolt{
     private OutputCollector collector;

     @Override
     public void declareOutputFields(OutputFieldsDeclarer declarer){
        declarer.declare(new Fields("username","tweet"));
     }



     @Override
     public void prepare(Map map,TopologyContext context,OutputCollector collector){
        this.collector=collector;
     }

     @Override
     public void execute(Tuple tuple){
       Status tweet=(Status)tuple.getValue(0);
       String username=tweet.getUser().getScreenName();

       collector.emit(tuple,new Values(username,tweet,"user_parser_bolt"));

    }
 }

 public class UserAndTweetsMapperBolt extends  BaseRichBolt {
    private OutputCollector collector;

    List<Tuple>listOfTuples;


    Map<String,Status>tempTweetsMap;
    Map<String,List<Status>>UserAndTweetsMap;
    List<Status>tweets;
    List<String>tempUsers;


    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer){
       declarer.declare(new Fields("username","tweets"));
    }

    @Override
    public void prepare(Map map,TopologyContext context,OutputCollector collector){
      this.collector=collector;
      this.listOfTuples=new ArrayList<Tuple>();

      this.tempTweetsMap=new HashMap<String, Status>();
      this.UserAndTweetsMap=new HashMap<String, List<Status>>();

      this.tempUsers=new ArrayList<String>();
      this.tweets=new ArrayList<Status>();
    }



    @Override
    public void execute(Tuple tuple){
       //String username=tuple.getStringByField("username");
       //Status status=(Status)tuple.getValueByField("tweet");

       String username=tuple.getValue(0).toString();

       String sourceComponent=tuple.getSourceComponent();

       if(sourceComponent.equals("TwitterParserBolt")){
          String tempUser1=tuple.getValue(0).toString();
          Status tempStatus1=(Status)tuple.getValue(1);

          tempTweetsMap.put(tempUser1,tempStatus1);

       }else if(sourceComponent.equals("UserParserBolt")){
          String tempUser2=tuple.getValue(0).toString();
          Status tempStatus2=(Status)tuple.getValue(1);

          tempUsers.add(tempUser2);
      }


      for(int i=0;i<tempUsers.size();i++){
         for(int j=0;j<tempTweetsMap.size();j++){
             if(tempUsers.get(i).equals(tempTweetsMap.get(j).getUser().getScreenName())){
                 tweets.add(tempTweetsMap.get(j));
             }
         }
     }


        collector.emit(new Values(username,tweets));
   }
 }

1 个答案:

答案 0 :(得分:0)

您需要对组合它们的螺栓中的用户名进行分组。如果您按现在所做的所有字段进行分组,则可能会或可能不会在同一任务中获取同一用户的所有推文。此外,您的地图将仅捕获任何给定用户的最后状态。如果你想要它们,你需要将值设为一组状态。

UserAndTweetsMapperBolt().
    fieldsGrouping(("TweetParserBolt"), new Fields("username")).
    fieldsGrouping(("UserMapperBolt"),new Fields("username"));