在Apache Storm上添加bolt的重试机制

时间:2014-12-14 13:43:49

标签: java error-handling bigdata apache-storm

我的风暴拓扑中有一个bolt(调度程序),它打开了http请求连接。

我想在失败的情况下添加重试机制(连接超时,失败状态等等)。 重试应仅在调度程序中发生,而不是从整个拓扑开始。

通常我要做的是添加一个负责重试和异常处理的队列(例如在3次自动将消息发送到错误队列之后......)

在螺栓内做这样的事情可以吗?任何人都有这方面的经验,并可以建议我可以使用哪个库?

2 个答案:

答案 0 :(得分:1)

当然!这似乎是处理错误的合理方法。我不确定您需要使用哪个库,除了提供连接到您选择的排队系统的API之外。

在你的螺栓内,你可能有这样的代码:

@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
   try {
      // do something which might fail here...
   } catch (Exception e) {
      // do you want to log the error?
      LOG.error("Bolt error {}", e);
      // do you want the error to show up in storm UI?
      collector.reportError(e);
      // or just put information on the queue for processing later
   }
}

只要您在螺栓内捕获异常,拓扑就不会重新启动。

另一个选择是利用Storm's built-in ability for guaranteed message processing使元组失败并以这种方式重试它们。

答案 1 :(得分:-1)

package banktransactions;

import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import org.apache.log4j.Logger;

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;

public class TransactionsSpouts extends BaseRichSpout{

private static final Integer MAX_FAILS = 2;
Map<Integer,String> messages;
Map<Integer,Integer> transactionFailureCount;
Map<Integer,String> toSend;
private SpoutOutputCollector collector;  

static Logger LOG = Logger.getLogger(TransactionsSpouts.class);


public void ack(Object msgId) {
    messages.remove(msgId);
    LOG.info("Message fully processed ["+msgId+"]");
}

public void close() {

}

public void fail(Object msgId) {
    if(!transactionFailureCount.containsKey(msgId))
        throw new RuntimeException("Error, transaction id not found ["+msgId+"]");
    Integer transactionId = (Integer) msgId;

    //Get the transactions fail
    Integer failures = transactionFailureCount.get(transactionId) + 1;
    if(failures >= MAX_FAILS){
        //If exceeds the max fails will go down the topology
        throw new RuntimeException("Error, transaction id ["+transactionId+"] has had many errors ["+failures+"]");
    }
    //If not exceeds the max fails we save the new fails quantity and re-send the message 
    transactionFailureCount.put(transactionId, failures);
    toSend.put(transactionId,messages.get(transactionId));
    LOG.info("Re-sending message ["+msgId+"]");
}

public void nextTuple() {
    if(!toSend.isEmpty()){
        for(Map.Entry<Integer, String> transactionEntry : toSend.entrySet()){
            Integer transactionId = transactionEntry.getKey();
            String transactionMessage = transactionEntry.getValue();
            collector.emit(new Values(transactionMessage),transactionId);
        }
        /*
         * The nextTuple, ack and fail methods run in the same loop, so
         * we can considerate the clear method atomic
         */
        toSend.clear();
    }
    try {
        Thread.sleep(1);
    } catch (InterruptedException e) {}
}

public void open(Map conf, TopologyContext context,
        SpoutOutputCollector collector) {
    Random random = new Random();
    messages = new HashMap<Integer, String>();
    toSend = new HashMap<Integer, String>();
    transactionFailureCount = new HashMap<Integer, Integer>();
    for(int i = 0; i< 100; i++){
        messages.put(i, "transaction_"+random.nextInt());
        transactionFailureCount.put(i, 0);
    }
    toSend.putAll(messages);
    this.collector = collector;
}

public void declareOutputFields(OutputFieldsDeclarer declarer) {
    declarer.declare(new Fields("transactionMessage"));
}

}