使用tez作为执行引擎运行查询时出现Hive OOM错误

时间:2016-07-24 18:14:43

标签: java hadoop hive out-of-memory tez

当我运行以下查询时,我收到以下错误。

insert overwrite table mybug select row_number() over (order by clickstream_key) as key, clickstream_key as data from mytest;

它正在启动大约2个映射器任务和240个减速器任务,并且工作平稳​​且快速达到239个任务,需要3个小时才能完成4个任务尝试然后工作失败。 表 mytest 包含20亿条记录,其中一列“clickstream_key”。

2016-07-24 09:54:37,918 [ERROR] [TezChild] |tez.TezProcessor|: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3181)
at java.util.ArrayList.grow(ArrayList.java:261)
at java.util.ArrayList.ensureExplicitCapacity(ArrayList.java:235)
at java.util.ArrayList.ensureCapacityInternal(ArrayList.java:227)
at java.util.ArrayList.add(ArrayList.java:458)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$RowNumberBuffer.incr(GenericUDAFRowNumber.java:73)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$GenericUDAFRowNumberEvaluator.iterate(GenericUDAFRowNumber.java:102)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.aggregate(GenericUDAFEvaluator.java:185)
at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.evaluateWindowFunction(WindowingTableFunction.java:164)
at org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.iterator(WindowingTableFunction.java:580)
at org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.finishPartition(PTFOperator.java:340)
at org.apache.hadoop.hive.ql.exec.PTFOperator.closeOp(PTFOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:617)
at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:631)
at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.close(ReduceRecordProcessor.java:290)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:164)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:344)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:181)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:172)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:168)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2016-07-24 09:54:37,920 [INFO] [TezChild] |task.TezTaskRunner|: Encounted an error while executing task: attempt_1469365079244_0001_1_01_000000_0
java.lang.RuntimeException: java.lang.OutOfMemoryError: Java heap space
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:344)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:181)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:172)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1709)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:172)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:168)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:3181)
at java.util.ArrayList.grow(ArrayList.java:261)
at java.util.ArrayList.ensureExplicitCapacity(ArrayList.java:235)
at java.util.ArrayList.ensureCapacityInternal(ArrayList.java:227)
at java.util.ArrayList.add(ArrayList.java:458)
at org.apache.hadoop.hive.ql.udf.generic.GenericUDAFRowNumber$RowNumberBuffer.incr(GenericUDAFRowNumber.java:73)

我尝试增加数据节点堆,reducer堆,映射器堆,hive.tez.java.opts,但它们都没有工作。任何关于此的线索都将受到高度赞赏。错误说java堆空间,java堆空间是什么意思?

0 个答案:

没有答案