以下是在hive服务器上运行的查询4 map reduce作业正在运行。运行第二个map-reduce作业后失败。下面是查询并跟随似乎即将发生的错误。如果您有任何想法,请告诉我。
select kpl.normalized_keyword, kpl.ptitle_id as feature_id, kpl.tagcount as tag_count, kpl.market_id, kpl.rpd_metric, kpl.rpi_metric,
kpl.imps_day, kpl.clicks_day, kpl.clicks, kpl.lang
from
(select coalesce(kpl_sprf.normalized_keyword, kpl_sr.normalized_keyword) normalized_keyword, coalesce(kpl_sprf.ptitle_id, kpl_sr.ptitle_id) ptitle_id,
coalesce(kpl_sprf.tagcount, kpl_sr.tagcount) tagcount, coalesce(kpl_sprf.market_id, kpl_sr.market_id)
market_id, kpl_sprf.rpd_metric, kpl_sprf.rpi_metric, kpl_sprf.lang, kpl_sprf.imps_day, kpl_sprf.clicks_day, kpl_sr.clicks
from
(select kpl_sprf.normalized_keyword, kpl_sprf.lang, kpl_sprf.ptitle_id, kpl_sprf.tagcount, kpl_sprf.market_id, kpl_sprf.rpd_metric,
kpl_sprf.rpi_metric, case when kp.avg_imps_short is not null then kp.avg_imps_short/kp.interval_days_short when datediff(kp.last_modified,
kp.interval_start_short) <= 0 then kp.cur_imps_short/1.0 else kp.cur_imps_short/datediff(kp.last_modified, kp.interval_start_short) end as
imps_day, case when kp.avg_clicks_short is not null then kp.avg_clicks_short/kp.interval_days_short when datediff(kp.last_modified, kp.interval_start_short)
<= 0 then cur_clicks_short/1.0 else cur_clicks_short/datediff(kp.last_modified, kp.interval_start_short) end as clicks_day
from
kpl_sprf_wk kpl_sprf
left semi join
(select * from scfg_kpl_sprf_wk where date_ = '2015-02-26') kw on kw.normalized_keyword
= kpl_sprf.normalized_keyword and kw.lang = kpl_sprf.lang join (select * from sprf_keyword_ptitle_ng_stats_wk where date_ = '2015-02-26')
kp on kp.normalized_keyword = kpl_sprf.normalized_keyword and kp.ptitle_id = kpl_sprf.ptitle_id and kp.market_id = kpl_sprf.market_id where
(kp.cur_clicks_long >= 3 or kp.avg_clicks_long >= 3) and (datediff(kp.last_modified, kp.interval_start_long) >= 7 or
kp.avg_imps_long is not null) and kp.ptitle_id not between 1100000000 and 1100100000) kpl_sprf full outer join
(select kpl_sr.normalized_keyword, kpl_sr.ptitle_id, kpl_sr.tagcount, kpl_sr.market_id, kpl_sr.clicks from kpl_sr_wk kpl_sr left semi join
(select * from scfg_kpl_sprf_wk where date_='2015-02-26') kw on kw.normalized_keyword = kpl_sr.normalized_keyword and kw.lang = kpl_sr.lang )
kpl_sr on kpl_sr.normalized_keyword = kpl_sprf.normalized_keyword and kpl_sr.ptitle_id = kpl_sprf.ptitle_id and kpl_sr.market_id = kpl_sprf.market_id)
kpl where kpl.market_id in (1)
发出以下错误
Error: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"normalized_keyword":"","lang":"es","date_":"2015-02-26"}
at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:159)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:428)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:340)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:157)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1408)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:152)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"normalized_keyword":"","lang":"es","date_":"2015-02-26"}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:675)
at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141)
... 8 more
Caused by: java.lang.RuntimeException: cannot find field normalized_keyword from [0:_col0, 1:_col1]
at org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.getStandardStructFieldRef(ObjectInspectorUtils.java:346)
at org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector.getStructFieldRef(StandardStructObjectInspector.java:143)
at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.initialize(ExprNodeColumnEvaluator.java:57)
at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator.initialize(ExprNodeGenericFuncEvaluator.java:128)
at org.apache.hadoop.hive.ql.exec.FilterOperator.processOp(FilterOperator.java:85)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:474)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:800)
at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:474)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:800)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:474)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:800)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:658)
这个问题似乎很快就会出现。我认为这是因为使用了连接查询Any Idea如何解决它?
答案 0 :(得分:0)
在该查询中的某个位置,您引用了字段normalized_keyword
,但在您选择的字段中找不到该字段。我建议的第一件事是将SELECT *
替换为实际字段,并处理normalized_keyword
的每个实例,以确保它可供选择。