我试图使用storm找到两个文本文件之间的普通用户。前两个螺栓"读"从两个不同的文本文件中,所有用户和第三个bolt搜索普通用户,但即使它似乎找到它们也不会打印出普通用户。这是我的代码:
Bolt#1
public class ProcessAuthors extends BaseRichBolt {
private OutputCollector collector;
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("USERNAME"));
}
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
@Override
public void execute(Tuple input) {
String lineFromFile=input.getString(0);
int initialIndex=lineFromFile.indexOf("[");
int lastIndex=lineFromFile.indexOf("]");
String author1=lineFromFile.substring(initialIndex+1);
String author=author1.replace("]","");
collector.emit(input,new Values(author));
}
}
Bolt#2
public class FromTweetsFileToFurtherProcessing extends BaseRichBolt {
OutputCollector collector;
String author;
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("USERNAME"));
}
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
@Override
public void execute(Tuple input) {
String lineFromFile = input.getString(0);
String author="";
String[] splitLineFromFileIntoFields = lineFromFile.split(",");
for (int i = 0; i < splitLineFromFileIntoFields.length; i++) {
if (splitLineFromFileIntoFields[i].startsWith("\"tweetAuthor\"")) {
this.author = removeTwoWordsFromString(splitLineFromFileIntoFields[i], "\"tweetAuthor\":\"", "\"");
}
}
collector.emit(input,new Values(this.author));
}
Bolt#3
public class SearchForTheAuthorsTweetData extends BaseRichBolt{
OutputCollector collector;
List<String>authors=new ArrayList<String>();
List<String>temp=new ArrayList<String>();
String author1;String author2;
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("USERNAME"));
}
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector=collector;
}
@Override
public void execute(Tuple input) {
final String sourceComponent = input.getSourceComponent();
if ("PROCESS_AUTHORS".equals(sourceComponent)) {
author1 = input.getString(0);
authors.add(author1);
}
if ("PROCESS_TWEETS_FROM_MONGODB".equals(sourceComponent)) {
author2 = input.getString(0);
temp.add(author2);
}
if(author2!=null && author1!=null){
System.out.println("USER: "+author2);
System.out.println("AUTHOR: "+author1);
if(author2.equals(author1)){
System.out.println("COMMON AUTHOR: "+author2);
collector.emit(input,new Values(author1));
}
}
拓扑
public class TheAuthorsAndTheirTweetData {
public static void main(String[]args) throws Exception{
TopologyBuilder topologyBuilder=new TopologyBuilder();
topologyBuilder.setSpout("READ_AUTHORS_MONGO",new ReadLinesFromTextFile("anAuthor.txt"),1);
topologyBuilder.setBolt("PROCESS_AUTHORS",new ProcessAuthors(),2).shuffleGrouping("READ_AUTHORS_MONGO");
topologyBuilder.setSpout("READ_TWEETS_FROM_MONGODB",new ReadLinesFromTextFile("tweets.txt"),1);
topologyBuilder.setBolt("PROCESS_TWEETS_FROM_MONGODB",new FromTweetsFileToFurtherProcessing(),2).shuffleGrouping("READ_TWEETS_FROM_MONGODB");
topologyBuilder.setBolt("SEARCH_1",new SearchForTheAuthorsTweetData(),8).fieldsGrouping("PROCESS_AUTHORS", new Fields("USERNAME")).fieldsGrouping("PROCESS_TWEETS_FROM_MONGODB", new Fields("USERNAME"));
Config config=new Config();
if(args!=null && args.length>0){
config.setNumWorkers(10);
config.setNumAckers(5);
config.setMaxSpoutPending(100);
StormSubmitter.submitTopology(args[0], config, topologyBuilder.createTopology());
}else{
LocalCluster localCluster=new LocalCluster();
localCluster.submitTopology("Test",config,topologyBuilder.createTopology());
Utils.sleep(1*60*60*1000);
localCluster.killTopology("Test");
localCluster.shutdown();
}
}
答案 0 :(得分:0)
如果该作者包含在两个后续元组中,第三个螺栓只能找到一个共同作者,一个来自第一个螺栓,一个来自第二个螺栓。将代码更改为如下所示:
if ("PROCESS_TWEETS_FROM_MONGODB".equals(sourceComponent)) {
author2 = input.getString(0);
temp.add(author2);
if (authors.contains(author2)) {
// common author - emit it
}
}
请注意,这可能不会缩放,因为您必须将两个文件的内容都读入3号内存。