当前方案=>我们有一个在产品集群上运行的查询。 此查询仅从1个表和另一个嵌套表(嵌套的联接)之间的联接中选择3个字段,然后在最后执行groupby,但在生产中运行了2个小时,并在该联接中命中了一个巨大表
查询:
INSERT OVERWRITE TABLE mstr_wrk.final_acct_data
SELECT
a0,
a1,
a2,
a3
FROM
(
SELECT
t1.a0 as a0
FROM
(
SELECT
t1.a0 as a0
FROM
(
SELECT
CAST(t1.acct_id AS STRING) as a0
FROM
mstr_wrk.cust_xref t1
)
t1
GROUP BY
t1.a0
)
t1
)
tab1
RIGHT OUTER JOIN
(
SELECT
a0,
a1,
a2
FROM
(
SELECT
(
CASE
WHEN
1 = t1.a1
THEN
t1.a0
ELSE
CAST(NULL AS TIMESTAMP)
END
) as a0, UDFcalldate('TRUNC', UDFcalldate('ADD_TO_DATE',
(
CASE
WHEN
1 = t1.a1
THEN
t1.a0
ELSE
CAST(NULL AS TIMESTAMP)
END
)
, 'D', - 1), 'DD') as a1
FROM
(
SELECT
MAX(t1.a0) as a0,
MAX(t1.a1) as a1
FROM
(
SELECT
load_audit.run_ts as a0,
1 as a1
FROM
mstr_wrk.load_audit
WHERE
val_name = 'card_stg'
)
t1
)
t1
)
tab4
JOIN
(
SELECT
CAST(t1.acct_cd AS STRING) as a0,
CAST(t1.h_acct_cd AS STRING) as a1,
CAST(t1.acct_num AS STRING) as a2,
CAST(t1.load_dt AS TIMESTAMP) as a3,
t1.ts as a4
FROM
mstr_work.acct_crd t1
)
tab3
WHERE
(
tab4.a0 < tab3.a4
)
AND
(
tab4.a1 <= tab3.a3
)
)
tab2
ON (tab1.a0 = tab2.a1)
WHERE
1 =
(
CASE
WHEN
tab1.a0 IS NULL
THEN
1
ELSE
0
END
)
GROUP BY
tab2.a0, tab2.a1, tab2.a2
我尝试过的- 我试图启用CBO和矢量化以及ppd,但是没有运气 我在这里看不到任何小桌子,因此无法尝试地图侧连接 看起来像交叉联接的联接之一可以转换为内部联接 但是无论如何,我可以在这里尝试CTE
请求- 请指导我如何解决它 有没有更好的方法来重写此查询。