您好我正在使用具有本地群集模式的风暴进行开发。 我运行了一个包含spout和两个bolt的简单代码,代码示例计算日志文件中的单词。 代码示例网址: http://kaviddiss.com/2013/05/17/how-to-get-started-with-storm-framework-in-5-minutes/
代码与小型日志文件(7.3M)完美配合,但是当我尝试运行大型日志文件(100M-1000M)时,我遇到异常。
我设置了很长的延迟,直到群集停止运行。 我可以在这里错过一些配置选项吗?
例外:
11326 [Thread-6] INFO backtype.storm.daemon.supervisor - Launching worker with assignment #backtype.storm.daemon.supervisor.LocalAssignment{:storm-id "HelloStorm-1-1403522378", :executors ([3 3] [ 4 4] [2 2] [1 1])} for this supervisor 868aff95-7b63-44d1-ad55-2dd07d9c7ba2 on port 1024 with id df052251-45ec-4bc3-a486-c2bf11a8a0fa
11336 [Thread-6] INFO backtype.storm.daemon.worker - Launching worker for HelloStorm-1-1403522378 on 868aff95-7b63-44d1-ad55-2dd07d9c7ba2:1024 with id df052251-45ec-4bc3-a486-c2bf11a8a0fa and conf {"dev.zookeeper.path" "/tmp/dev-storm-zookeeper", "topology.tick.tuple.freq.secs" nil, "topology.builtin.metrics.bucket.size.secs" 60, "topology.fall.back.on.java.serialization" true, "topology.ma x.error.report.per.interval" 5, "zmq.linger.millis" 0, "topology.skip.missing.kryo.registrations" true, "storm.messaging.netty.client_worker_threads" 1, "ui.childopts" "-Xmx768m", "storm.zookeeper. session.timeout" 20000, "nimbus.reassign" true, "topology.trident.batch.emit.interval.millis" 50, "nimbus.monitor.freq.secs" 10, "logviewer.childopts" "-Xmx128m", "java.library.path" "/usr/local/li b:/opt/local/lib:/usr/lib", "topology.executor.send.buffer.size" 1024, "storm.local.dir" "/var/tmp//77d5cd63-9539-44a4-892a-9e91553987df", "storm.messaging.netty.buffer_size" 5242880, "supervisor.w orker.start.timeout.secs" 120, "topology.enable.message.timeouts" true, "nimbus.cleanup.inbox.freq.secs" 600, "nimbus.inbox.jar.expiration.secs" 3600, "drpc.worker.threads" 64, "topology.worker.sha red.thread.pool.size" 4, "nimbus.host" "localhost", "storm.messaging.netty.min_wait_ms" 100, "storm.zookeeper.port" 2000, "transactional.zookeeper.port" nil, "topology.executor.receive.buffer.size" 1024, "transactional.zookeeper.servers" nil, "storm.zookeeper.root" "/storm", "storm.zookeeper.retry.intervalceiling.millis" 30000, "supervisor.enable" true, "storm.messaging.netty.server_worker_t hreads" 1, "storm.zookeeper.servers" ["localhost"], "transactional.zookeeper.root" "/transactional", "topology.acker.executors" nil, "topology.transfer.buffer.size" 1024, "topology.worker.childopts " nil, "drpc.queue.size" 128, "worker.childopts" "-Xmx768m", "supervisor.heartbeat.frequency.secs" 5, "topology.error.throttle.interval.secs" 10, "zmq.hwm" 0, "drpc.port" 3772, "supervisor.monitor. frequency.secs" 3, "drpc.childopts" "-Xmx768m", "topology.receiver.buffer.size" 8, "task.heartbeat.frequency.secs" 3, "topology.tasks" nil, "storm.messaging.netty.max_retries" 30, "topology.spout.w ait.strategy" "backtype.storm.spout.SleepSpoutWaitStrategy", "nimbus.thrift.max_buffer_size" 1048576, "topology.max.spout.pending" nil, "storm.zookeeper.retry.interval" 1000, "topology.sleep.spout. wait.strategy.time.ms" 1, "nimbus.topology.validator" "backtype.storm.nimbus.DefaultTopologyValidator", "supervisor.slots.ports" (1024 1025 1026), "topology.debug" false, "nimbus.task.launch.secs" 120, "nimbus.supervisor.timeout.secs" 60, "topology.message.timeout.secs" 30, "task.refresh.poll.secs" 10, "topology.workers" 1, "supervisor.childopts" "-Xmx256m", "nimbus.thrift.port" 6627, "topol ogy.stats.sample.rate" 0.05, "worker.heartbeat.frequency.secs" 1, "topology.tuple.serializer" "backtype.storm.serialization.types.ListDelegateSerializer", "topology.disruptor.wait.strategy" "com.lm ax.disruptor.BlockingWaitStrategy", "nimbus.task.timeout.secs" 30, "storm.zookeeper.connection.timeout" 15000, "topology.kryo.factory" "backtype.storm.serialization.DefaultKryoFactory", "drpc.invoc ations.port" 3773, "logviewer.port" 8000, "zmq.threads" 1, "storm.zookeeper.retry.times" 5, "storm.thrift.transport" "backtype.storm.security.auth.SimpleTransportPlugin", "topology.state.synchroniz ation.timeout.secs" 60, "supervisor.worker.timeout.secs" 30, "nimbus.file.copy.expiration.secs" 600, "storm.messaging.transport" "backtype.storm.messaging.netty.Context", "logviewer.appender.name" "A1", "storm.messaging.netty.max_wait_ms" 1000, "drpc.request.timeout.secs" 600, "storm.local.mode.zmq" false, "ui.port" 8080, "nimbus.childopts" "-Xmx1024m", "storm.cluster.mode" "local", "topolog y.optimize" true, "topology.max.task.parallelism" nil}
11337 [Thread-6] INFO com.netflix.curator.framework.imps.CuratorFrameworkImpl - Starting
11344 [Thread-6-EventThread] INFO backtype.storm.zookeeper - Zookeeper state update: :connected:none
11358 [Thread-6] INFO com.netflix.curator.framework.imps.CuratorFrameworkImpl - Starting
11611 [Thread-6] INFO backtype.storm.daemon.executor - Loading executor line-reader-spout:[2 2]
11618 [Thread-6] INFO backtype.storm.daemon.executor - Loaded executor tasks line-reader-spout:[2 2]
11632 [Thread-16-line-reader-spout] INFO backtype.storm.daemon.executor - Opening spout line-reader-spout:(2)
Start Time: 18512885554479686
11634 [Thread-16-line-reader-spout] INFO backtype.storm.daemon.executor - Opened spout line-reader-spout:(2)
11636 [Thread-16-line-reader-spout] INFO backtype.storm.daemon.executor - Activating spout line-reader-spout:(2)
11638 [Thread-6] INFO backtype.storm.daemon.executor - Finished loading executor line-reader-spout:[2 2]
11677 [Thread-6] INFO backtype.storm.daemon.executor - Loading executor word-counter:[3 3]
11721 [Thread-6] INFO backtype.storm.daemon.executor - Loaded executor tasks word-counter:[3 3]
11725 [Thread-6] INFO backtype.storm.daemon.executor - Finished loading executor word-counter:[3 3]
11733 [Thread-6] INFO backtype.storm.daemon.executor - Loading executor word-spitter:[4 4]
11735 [Thread-6] INFO backtype.storm.daemon.executor - Loaded executor tasks word-spitter:[4 4]
11737 [Thread-6] INFO backtype.storm.daemon.executor - Finished loading executor word-spitter:[4 4]
11746 [Thread-6] INFO backtype.storm.daemon.executor - Loading executor __system:[-1 -1]
11747 [Thread-6] INFO backtype.storm.daemon.executor - Loaded executor tasks __system:[-1 -1]
11748 [Thread-6] INFO backtype.storm.daemon.executor - Finished loading executor __system:[-1 -1]
11761 [Thread-6] INFO backtype.storm.daemon.executor - Loading executor __acker:[1 1]
11765 [Thread-6] INFO backtype.storm.daemon.executor - Loaded executor tasks __acker:[1 1]
11767 [Thread-6] INFO backtype.storm.daemon.executor - Timeouts disabled for executor __acker:[1 1]
11768 [Thread-6] INFO backtype.storm.daemon.executor - Finished loading executor __acker:[1 1]
11768 [Thread-6] INFO backtype.storm.daemon.worker - Launching receive-thread for 868aff95-7b63-44d1-ad55-2dd07d9c7ba2:1024
11786 [Thread-6] INFO backtype.storm.daemon.worker - Worker has topology config {"storm.id" "HelloStorm-1-1403522378", "dev.zookeeper.path" "/tmp/dev-storm-zookeeper", "topology.tick.tuple.freq.se cs" nil, "topology.builtin.metrics.bucket.size.secs" 60, "topology.fall.back.on.java.serialization" true, "topology.max.error.report.per.interval" 5, "zmq.linger.millis" 0, "topology.skip.missing.k ryo.registrations" true, "storm.messaging.netty.client_worker_threads" 1, "ui.childopts" "-Xmx768m", "storm.zookeeper.session.timeout" 20000, "nimbus.reassign" true, "topology.trident.batch.emit.in terval.millis" 50, "nimbus.monitor.freq.secs" 10, "logviewer.childopts" "-Xmx128m", "java.library.path" "/usr/local/lib:/opt/local/lib:/usr/lib", "topology.executor.send.buffer.size" 1024, "storm.l ocal.dir" "/var/tmp//77d5cd63-9539-44a4-892a-9e91553987df", "storm.messaging.netty.buffer_size" 5242880, "supervisor.worker.start.timeout.secs" 120, "topology.enable.message.timeouts" true, "inputF ile" "test_log.log", "nimbus.cleanup.inbox.freq.secs" 600, "nimbus.inbox.jar.expiration.secs" 3600, "drpc.worker.threads" 64, "topology.worker.shared.thread.pool.size" 4, "nimbus.host" "localhost", "storm.messaging.netty.min_wait_ms" 100, "storm.zookeeper.port" 2000, "transactional.zookeeper.port" nil, "topology.executor.receive.buffer.size" 1024, "transactional.zookeeper.servers" nil, "stor m.zookeeper.root" "/storm", "storm.zookeeper.retry.intervalceiling.millis" 30000, "supervisor.enable" true, "storm.messaging.netty.server_worker_threads" 1, "storm.zookeeper.servers" ["localhost"], "transactional.zookeeper.root" "/transactional", "topology.acker.executors" nil, "topology.kryo.decorators" (), "topology.name" "HelloStorm", "topology.transfer.buffer.size" 1024, "topology.worker .childopts" nil, "drpc.queue.size" 128, "worker.childopts" "-Xmx768m", "supervisor.heartbeat.frequency.secs" 5, "topology.error.throttle.interval.secs" 10, "zmq.hwm" 0, "drpc.port" 3772, "superviso r.monitor.frequency.secs" 3, "drpc.childopts" "-Xmx768m", "topology.receiver.buffer.size" 8, "task.heartbeat.frequency.secs" 3, "topology.tasks" nil, "storm.messaging.netty.max_retries" 30, "topolo gy.spout.wait.strategy" "backtype.storm.spout.SleepSpoutWaitStrategy", "nimbus.thrift.max_buffer_size" 1048576, "topology.max.spout.pending" 1, "storm.zookeeper.retry.interval" 1000, "topology.slee p.spout.wait.strategy.time.ms" 1, "nimbus.topology.validator" "backtype.storm.nimbus.DefaultTopologyValidator", "supervisor.slots.ports" (1024 1025 1026), "topology.debug" false, "nimbus.task.launc h.secs" 120, "nimbus.supervisor.timeout.secs" 60, "topology.kryo.register" nil, "topology.message.timeout.secs" 30, "task.refresh.poll.secs" 10, "topology.workers" 1, "supervisor.childopts" "-Xmx25 6m", "nimbus.thrift.port" 6627, "topology.stats.sample.rate" 0.05, "worker.heartbeat.frequency.secs" 1, "topology.tuple.serializer" "backtype.storm.serialization.types.ListDelegateSerializer", "top ology.disruptor.wait.strategy" "com.lmax.disruptor.BlockingWaitStrategy", "nimbus.task.timeout.secs" 30, "storm.zookeeper.connection.timeout" 15000, "topology.kryo.factory" "backtype.storm.serializ ation.DefaultKryoFactory", "drpc.invocations.port" 3773, "logviewer.port" 8000, "zmq.threads" 1, "storm.zookeeper.retry.times" 5, "storm.thrift.transport" "backtype.storm.security.auth.SimpleTransp ortPlugin", "topology.state.synchronization.timeout.secs" 60, "supervisor.worker.timeout.secs" 30, "nimbus.file.copy.expiration.secs" 600, "storm.messaging.transport" "backtype.storm.messaging.nett y.Context", "logviewer.appender.name" "A1", "storm.messaging.netty.max_wait_ms" 1000, "drpc.request.timeout.secs" 600, "storm.local.mode.zmq" false, "ui.port" 8080, "nimbus.childopts" "-Xmx1024m", "storm.cluster.mode" "local", "topology.optimize" true, "topology.max.task.parallelism" nil}
11786 [Thread-6] INFO backtype.storm.daemon.worker - Worker df052251-45ec-4bc3-a486-c2bf11a8a0fa for storm HelloStorm-1-1403522378 on 868aff95-7b63-44d1-ad55-2dd07d9c7ba2:1024 has finished loading
11801 [Thread-18-word-counter] INFO backtype.storm.daemon.executor - Preparing bolt word-counter:(3)
11821 [Thread-18-word-counter] INFO backtype.storm.daemon.executor - Prepared bolt word-counter:(3)
11823 [Thread-20-word-spitter] INFO backtype.storm.daemon.executor - Preparing bolt word-spitter:(4)
11825 [Thread-20-word-spitter] INFO backtype.storm.daemon.executor - Prepared bolt word-spitter:(4)
11838 [Thread-24-__acker] INFO backtype.storm.daemon.executor - Preparing bolt __acker:(1)
11840 [Thread-22-__system] INFO backtype.storm.daemon.executor - Preparing bolt __system:(-1)
11854 [Thread-24-__acker] INFO backtype.storm.daemon.executor - Prepared bolt __acker:(1)
12173 [Thread-22-__system] INFO backtype.storm.daemon.executor - Prepared bolt __system:(-1)
112055 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: SUSPENDED
112058 [main-EventThread] WARN backtype.storm.cluster - Received event :disconnected::none: with disconnected Zookeeper.
112058 [Thread-6-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: SUSPENDED
112058 [Thread-6-EventThread] WARN backtype.storm.cluster - Received event :disconnected::none: with disconnected Zookeeper.
121441 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: SUSPENDED
121442 [main-EventThread] WARN backtype.storm.cluster - Received event :disconnected::none: with disconnected Zookeeper.
121442 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: SUSPENDED
121442 [main-EventThread] WARN backtype.storm.cluster - Received event :disconnected::none: with disconnected Zookeeper.
121443 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: SUSPENDED
121443 [main-EventThread] WARN backtype.storm.cluster - Received event :disconnected::none: with disconnected Zookeeper.
121443 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
121444 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
134654 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: SUSPENDED
134655 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
134655 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
134656 [main-EventThread] WARN com.netflix.curator.ConnectionState - Session expired event received
134656 [main-EventThread] WARN backtype.storm.cluster - Received event :disconnected::none: with disconnected Zookeeper.
134656 [main-EventThread] WARN com.netflix.curator.ConnectionState - Session expired event received
134657 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: LOST
134657 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
134657 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: LOST
139931 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
149745 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
149745 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
149746 [main-EventThread] WARN com.netflix.curator.ConnectionState - Session expired event received
149746 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: LOST
149747 [main-EventThread] WARN backtype.storm.cluster - Received event :expired::none: with disconnected Zookeeper.
149747 [main-EventThread] WARN com.netflix.curator.ConnectionState - Session expired event received
149747 [main-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: LOST
149747 [main-EventThread] WARN backtype.storm.cluster - Received event :expired::none: with disconnected Zookeeper.
158929 [main-EventThread] WARN backtype.storm.cluster - Received event :expired::none: with disconnected Zookeeper.
158931 [main-EventThread] WARN backtype.storm.cluster - Received event :expired::none: with disconnected Zookeeper.
158931 [Thread-6-EventThread] WARN com.netflix.curator.ConnectionState - Session expired event received
158931 [Thread-6-EventThread] INFO com.netflix.curator.framework.state.ConnectionStateManager - State change: LOST
158931 [Thread-6-EventThread] WARN backtype.storm.cluster - Received event :expired::none: with disconnected Zookeeper.
158932 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
158933 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
176934 [ConnectionStateManager-0] WARN com.netflix.curator.framework.state.ConnectionStateManager - There are no ConnectionStateListeners registered.
357333 [CuratorFramework-5] ERROR com.netflix.curator.ConnectionState - Connection timed out
org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss
at com.netflix.curator.ConnectionState.getZooKeeper(ConnectionState.java:72) ~[curator-client-1.0.1.jar:na]
at com.netflix.curator.CuratorZookeeperClient.getZooKeeper(CuratorZookeeperClient.java:74) [curator-client-1.0.1.jar:na]
at com.netflix.curator.framework.imps.CuratorFrameworkImpl.getZooKeeper(CuratorFrameworkImpl.java:353) [curator-framework-1.0.1.jar:na]
at com.netflix.curator.framework.imps.BackgroundSyncImpl.performBackgroundOperation(BackgroundSyncImpl.java:39) [curator-framework-1.0.1.jar:na]
at com.netflix.curator.framework.imps.OperationAndData.callPerformBackgroundOperation(OperationAndData.java:40) [curator-framework-1.0.1.jar:na]
at com.netflix.curator.framework.imps.CuratorFrameworkImpl.backgroundOperationsLoop(CuratorFrameworkImpl.java:547) [curator-framework-1.0.1.jar:na]
at com.netflix.curator.framework.imps.CuratorFrameworkImpl.access$200(CuratorFrameworkImpl.java:50) [curator-framework-1.0.1.jar:na]
at com.netflix.curator.framework.imps.CuratorFrameworkImpl$2.call(CuratorFrameworkImpl.java:177) [curator-framework-1.0.1.jar:na]
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) [na:1.6.0_65]
at java.util.concurrent.FutureTask.run(FutureTask.java:138) [na:1.6.0_65]
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895) [na:1.6.0_65]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918) [na:1.6.0_65]
at java.lang.Thread.run(Thread.java:680) [na:1.6.0_65]
[更新]
I got new exception running 70M file:
622366 [CuratorFramework-9] ERROR com.netflix.curator.framework.imps.CuratorFrameworkImpl - Background exception was not retry-able or retry gave up
java.lang.OutOfMemoryError: GC overhead limit exceeded
答案 0 :(得分:2)
问题似乎与描述的完全相同:您已经将更多数据加载到内存中,而不是JVM可以支持的内容。我认为这是发生在鲸鱼喷水。对于非常大的文件,您需要通过提前拆分文件或流式传输文件而不是尝试将整个文件加载到内存中来中断处理。