在Redstorm DSL中实现风暴蜱元组

时间:2014-03-21 00:07:13

标签: ruby jruby apache-storm

### TOPOLOGY ###
class WordCountTopology < DSL::Topology
  spout RandomSentenceSpout, :parallelism => 2

  bolt SplitSentenceBolt, :parallelism => 2 do
    source RandomSentenceSpout, :shuffle
  end

  bolt WordCountBolt, :parallelism => 2 do
    source SplitSentenceBolt, :fields => ["word"]
  end

  configure :word_count do |env|
    debug true
    max_task_parallelism 4
    if env == :cluster
      num_workers 6
      max_spout_pending(1000)
    end
  end

  on_submit do |env|
    if env == :local
      sleep(60)
      cluster.shutdown
    end
  end
end



### SPOUT ###
class RandomSentenceSpout < DSL::Spout
  output_fields :word

  on_send {@sentences[rand(@sentences.length)]}
  sleep(10)

  on_init do
    @sentences = [
      "the cow jumped over the moon",
      "an apple a day keeps the doctor away",
      "four score and seven years ago",
      "snow white and the seven dwarfs",
      "i am at two with nature"
    ]
  end
end


####  SplitSentenceBolt ###
class SplitSentenceBolt < DSL::Bolt
  output_fields :word
  on_receive {|tuple| tuple[0].split(' ').map{|w| [w]}}
end




### WORDCOUNTBOLT ###
class WordCountBolt < DSL::Bolt
  output_fields :word, :count
  on_init {@counts = Hash.new{|h, k| h[k] = 0}}

  on_receive do |tuple|
    word = tuple[0].to_s
    @counts[word] += 1

    [word, @counts[word]]
  end
end

我想实现一个tick元组。我想每60秒做一次单词计数,发出结果,将计数器重置为0。

//可能是 //在分句语句中

每隔60秒发送一次tick_tuple的功能

//在字数螺栓中 如果(tick_tuple){
   发出结果
   @counts = [] #re initialize
}

任何人都可以帮我实现吗?我是风暴世界的新手。

2 个答案:

答案 0 :(得分:1)

您可以使用Storm内部tick元组功能来设置一个螺栓,以指定的间隔接收一个tick元组。

在拓扑定义的bolt部分中,只需添加以下配置:

bolt MyBolt do
  ...
  set Backtype::Config::TOPOLOGY_TICK_TUPLE_FREQ_SECS, 60
  ...
end

在你的螺栓中,你可以测试这样的滴答元组:

if tuple.source_stream_id == "__tick"
  ...
end

答案 1 :(得分:0)

不知道红宝石,但希望python会帮助你解决问题。这就是我们如何在我们的螺栓中使用它:

def process(self, tup):                                                     
    """                                                                     
    If spout trigers tick-tuple, then perform upload                                 
    """                                                                     
    if tup.stream == '__tick':                                                       
        self.perform_upload()                                                  
        storm.ack(tup)                                                      
        return