我的查询
insert into table hist
Select subs2.SUBS_KEY
, subs2.FACT_DATE
, subs2.TYPE
, subs2.BALANCE
, '${DATE_1D}' as TIME_KEY
from subs2
where CNT < 15 and AMT > 0;
其中'${DATE_1D}'
为hivevar DATE_1D=2017-02-28
执行后,行数从
开始db.hist stats: [numFiles=22168, numRows=254582570, totalSize=19188669803, rawDataSize=19613070967]
到
db.hist stats: [numFiles=22268, numRows=257376901, totalSize=19386949977, rawDataSize=19808556810]
差异= 2794331行
但是当我做的时候
insert into table db.new_hist
select *
from db.hist
where time_key = '2017-02-28';
我得到了
db.new_hist stats: [numFiles=201, numRows=178798, totalSize=2227208, rawDataSize=12510497]
我使用new_hist
create table like
show create
的结果CREATE TABLE `db.hist`(
`subs_key` string,
`fact_date` string,
`type` int,
`balance` double,
`time_key` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
为什么行数不匹配?
我做了
Select count(*)
from hist
有没有
set hive.compute.query.using.stats=true;
导致两种情况= 246210052
由于某种原因,插入日志显示错误的行数。