用大约500个scollectors运行bosun之后,我在日志中看到了各种问题。这个来自opentsdb stdout log。
org.hbase.async.RemoteException: org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: Server localhost,39157,1475044082464 aborting
at org.apache.hadoop.hbase.regionserver.RSRpcServices.checkOpen(RSRpcServices.java:980)
at org.apache.hadoop.hbase.regionserver.RSRpcServices.get(RSRpcServices.java:1895)
at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32201)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2114)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:101)
at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:130)
at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:107)
at java.lang.Thread.run(Thread.java:745)
at org.hbase.async.RegionClient.makeException(RegionClient.java:1738) [asynchbase-1.7.1.jar:na]
at org.hbase.async.RegionClient.decodeException(RegionClient.java:1756) [asynchbase-1.7.1.jar:na]
at org.hbase.async.RegionClient.decode(RegionClient.java:1468) [asynchbase-1.7.1.jar:na]
at org.hbase.async.RegionClient.decode(RegionClient.java:88) [asynchbase-1.7.1.jar:na]
at org.jboss.netty.handler.codec.replay.ReplayingDecoder.callDecode(ReplayingDecoder.java:500) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.handler.codec.replay.ReplayingDecoder.messageReceived(ReplayingDecoder.java:435) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70) [netty-3.9.4.Final.jar:na]
at org.hbase.async.RegionClient.handleUpstream(RegionClient.java:1206) [asynchbase-1.7.1.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelHandler.messageReceived(SimpleChannelHandler.java:142) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelHandler.handleUpstream(SimpleChannelHandler.java:88) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.handler.timeout.IdleStateAwareChannelHandler.handleUpstream(IdleStateAwareChannelHandler.java:36) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline$DefaultChannelHandlerContext.sendUpstream(DefaultChannelPipeline.java:791) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.handler.timeout.IdleStateHandler.messageReceived(IdleStateHandler.java:294) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.SimpleChannelUpstreamHandler.handleUpstream(SimpleChannelUpstreamHandler.java:70) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:564) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.DefaultChannelPipeline.sendUpstream(DefaultChannelPipeline.java:559) [netty-3.9.4.Final.jar:na]
at org.hbase.async.HBaseClient$RegionClientPipeline.sendUpstream(HBaseClient.java:3108) [asynchbase-1.7.1.jar:na]
at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:268) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.Channels.fireMessageReceived(Channels.java:255) [netty-3.9.4.Final.jar:na]
at org.jboss.netty.channel.socket.nio.NioWorker.read
我有时也会看到java.lang.OutOfMemoryError: Java heap space
错误。
我最近改变的一件事是使用我自己的zk集群,并在opentsdb.conf中设置了tsd.storage.hbase.zk_quorum=ip1,ip2,ip3
。
任何可以使用容器化的bosun进行调整以使其适用于大约1200个scollector实例的配置。 这是针对PoC的,我现在不打算单独配置和扩展模块。