我有一个查询,我可以找到两行之间的差异,但我也想显示重复行作为差异。我知道表 actual_orders 有重复项,而我的表 expected_rows 没有重复项。我怎样才能修改我的查询,以便将重复显示为差异?而且不仅仅是实际数据。
这是我的疑问:
select
expected_orders.mk_file_id,actual_orders.mk_file_id,
expected_orders.ind_id, actual_orders.ind_id,
expected_orders.mk_cust_id,actual_orders.mk_cust_id,
expected_orders.order_sk,actual_orders.order_sk,
expected_orders.progen_order_id,actual_orders.progen_order_id,
expected_orders.order_chanel_id,actual_orders.order_chanel_id,
expected_orders.order_date_str,actual_orders.order_date_str,
expected_orders.order_total_usd,actual_orders.order_total_usd,
expected_orders.order_ship_usd,actual_orders.order_ship_usd,
expected_orders.order_discount_usd,actual_orders.order_discount_usd,
expected_orders.order_tax_usd,actual_orders.order_tax_usd,
expected_orders.empty_source_code,actual_orders.empty_source_code,
expected_orders.method_of_payment_code,actual_orders.method_of_payment_code,
expected_orders.feed_id,actual_orders.feed_id,
expected_orders.creation_date_str,actual_orders.creation_date_str,
expected_orders.update_ts_str,actual_orders.update_ts_str,
expected_orders.empty_match_type,actual_orders.empty_match_type,
expected_orders.mp_id,actual_orders.mp_id
from default.expected_orders
FULL OUTER JOIN default.actual_orders
ON (
COALESCE(expected_orders.mk_file_id,-1)=COALESCE(actual_orders.mk_file_id,-1) AND
COALESCE(expected_orders.ind_id,-1)=COALESCE(actual_orders.ind_id,-1)AND
COALESCE(expected_orders.mk_cust_id,'-1')=COALESCE(actual_orders.mk_cust_id,'-1') AND
COALESCE(expected_orders.order_sk,-1)=COALESCE(actual_orders.order_sk,-1)
)where (
COALESCE(expected_orders.mk_file_id,-1)<>COALESCE(actual_orders.mk_file_id,-1) OR
COALESCE(expected_orders.ind_id,-1)<>COALESCE(actual_orders.ind_id,-1) OR
COALESCE(expected_orders.mk_cust_id,'-1')<>COALESCE(actual_orders.mk_cust_id,'-1') OR
COALESCE(expected_orders.order_sk,-1)<>COALESCE(actual_orders.order_sk,-1) OR
COALESCE(expected_orders.progen_order_id,'-1')<>COALESCE(actual_orders.progen_order_id,'-1') OR
COALESCE(expected_orders.order_chanel_id,-1)<>COALESCE(actual_orders.order_chanel_id,-1) OR
COALESCE(expected_orders.order_date_str,'-1')<>COALESCE(actual_orders.order_date_str,'-1') OR
COALESCE(expected_orders.order_total_usd,0.0)<>COALESCE(actual_orders.order_total_usd,0.0) OR
COALESCE(expected_orders.order_ship_usd,0.0)<>COALESCE(actual_orders.order_ship_usd,0.0) OR
COALESCE(expected_orders.order_discount_usd,0.0)<>COALESCE(actual_orders.order_discount_usd,0.0) OR
COALESCE(expected_orders.order_tax_usd,0.0)<>COALESCE(actual_orders.order_tax_usd,0.0) OR
COALESCE(expected_orders.empty_source_code,'-1')<>COALESCE(actual_orders.empty_source_code,'-1') OR
COALESCE(expected_orders.method_of_payment_code,'-1')<>COALESCE(actual_orders.method_of_payment_code,'-1') OR
COALESCE(expected_orders.feed_id,-1)<>COALESCE(actual_orders.feed_id,-1) OR
COALESCE(expected_orders.creation_date_str,'-1')<>COALESCE(actual_orders.creation_date_str,'-1') OR
COALESCE(expected_orders.update_ts_str,'-1')<>COALESCE(actual_orders.update_ts_str,'-1') OR
COALESCE(expected_orders.empty_match_type,'-1')<>COALESCE(actual_orders.empty_match_type,'-1') OR
COALESCE(expected_orders.mp_id,-1)<>COALESCE(actual_orders.mp_id,-1) )
我正在使用配置单元,但我还要包含其他标签,如SQL和Progress。任何帮助都会非常感激
答案 0 :(得分:0)
从高级摘要开始
select total_rows
,expected_rows
,actual_rows
,record_variations
,count (*) as number_of_keys
from (select count (*) as total_rows
,count (case when tab = 'E' then 1 end) as expected_rows
,count (case when tab = 'A' then 1 end) as actual_rows
,count (distinct rec) as record_variations
from ( select 'E' as tab,struct(*) as rec,mk_file_id,ind_id,mk_cust_id,order_sk from expected_orders
union all select 'A' as tab,struct(*) as rec,mk_file_id,ind_id,mk_cust_id,order_sk from actual_orders
) t
group by mk_file_id
,ind_id
,mk_cust_id
,order_sk
) t
group by total_rows
,expected_rows
,actual_rows
,record_variations
;
然后向下钻取
select mk_file_id
,ind_id
,mk_cust_id
,order_sk
,count (*) as total_rows
,count (case when tab = 'E' then 1 end) as expected_rows
,count (case when tab = 'A' then 1 end) as actual_rows
,count (distinct rec) as record_variations
from ( select 'E' as tab,struct(*) as rec,mk_file_id,ind_id,mk_cust_id,order_sk from expected_orders
union all select 'A' as tab,struct(*) as rec,mk_file_id,ind_id,mk_cust_id,order_sk from actual_orders
) t
group by mk_file_id
,ind_id
,mk_cust_id
,order_sk
-- having ...
;