我正在运行下面的查询,大约200万个蜂巢。有没有办法改善性能?源配置单元表是created_date
的分区列 select t.id,
case when t.amt_1_rank < 0.3*f.amt_1_count then t.amt_1 else null end as amt_1,
case when t.amt_2_rank < 0.3*f.amt_2_count then t.amt_2 else null end as amt_2,
..
..
.. -- Like wise 30 columns e.g. amt_3,amt_3...
from (
select a.id,
a.amt_1,
row_number() over (ORDER BY cast(a.amt_1 AS DECIMAL(8,7)) DESC) AS amt_1_rank,
a.amt_2,
row_number() over (ORDER BY cast(a.amt_2 AS DECIMAL(8,7)) DESC) AS amt_2_rank
from source_table a WHERE created_date='2017-10-15' )t
join
(
SELECT count(case when amt_1='.' then null else 1 end) AS amt_1_count,
count(case when amt_2='.' then null else 1 end) AS amt_2_count,
..
..
FROM source_table
WHERE created_date='2017-10-15'
) f
答案 0 :(得分:0)
你可以不加入来做到这一点:
select t.id,
case when t.amt_1_rank < 0.3*t.amt_1_count then t.amt_1 else null end as amt_1,
case when t.amt_2_rank < 0.3*t.amt_2_count then t.amt_2 else null end as amt_2,
..
..
.. -- Like wise 30 columns e.g. amt_3,amt_3...
from (
select a.id,
a.amt_1,
row_number() over (ORDER BY cast(a.amt_1 AS DECIMAL(8,7)) DESC) AS amt_1_rank,
a.amt_2,
row_number() over (ORDER BY cast(a.amt_2 AS DECIMAL(8,7)) DESC) AS amt_2_rank,
count(amt_1_flag) over() AS amt_1_count,
count(amt_2_flag) over() AS amt_2_count
from
(select a.*,
case when amt_1='.' then null else 1 end as amt_1_flag,
case when amt_2='.' then null else 1 end as amt_2_flag
from source_table a WHERE created_date='2017-10-15'
)a
)t