我正在创建两个表,第一个表具有序列号,第二个表是第一个表的过滤版本,包括原始序列号以及其他列。然后,我在序列号的第一个表和第二个表之间进行左连接,以创建第三个表。当我尝试对CTE执行相同的操作以便创建1个表而不是3个表时,联接不起作用。
我成功地将这两个表作为CTE,但是当我对它们进行左连接以获得第三张表时,结果是不正确的。
create table purchases_base_auto(serial int, ts timestamp, query_string string, package string, tags string);
insert into purchases_base_auto partition(dt)
select row_number() over() serial, regexp_replace(regexp_replace(get_json_object(regexp_replace(w.air_json, '@timestamp', 'ts'),'$.ts'),'T',' '),'Z',' ') tstamp, get_json_object(regexp_replace(w.air_json, 'query-string', 'query_string'), '$.query_string') query_string, get_json_object(w.air_json, '$.package') package,
get_json_object(w.air_json, '$.tags') tags
,w.dt from wifi.wifi_airlogs w
where
((get_json_object(w.air_json, '$.event') = 'purchase') and w.dt = date_add(current_date,-7) ;
create table purchases_join_auto(serial int, ts timestamp, query_string string, package string, tags string, depart timestamp, arrive timestamp);
insert into purchases_join_auto partition(dt) select * from (select w.serial, w.ts tstamp, w.query_string,w.package,w.tags,
f.ACT_DPRT_DTMZ depart, f.ACT_ARRV_DTMZ arrive
,w.dt from purchases_base_auto w inner join sb_wifi.ft_data f on w.tail = f.registration_id) as q where
(q.tstamp between CAST(from_unixtime(unix_timestamp(q.depart) - 840) as timestamp) and CAST(from_unixtime(unix_timestamp(q.arrive) + 840) as timestamp));
create table purchases_final_auto(serial int, ts timestamp, query_string string, package string, tags string, depart timestamp, arrive timestamp);
insert into purchases_final_auto partition(dt) select w.serial, w.ts, w.query_string, w.package, w.tags, f.depart, f.arrive, w.dt from purchases_base_auto w left outer join purchases_join_auto f on w.serial = f.serial;
与CTE相同的事物在创建第三张表的最终联接中出现问题
select row_number() over() serial, regexp_replace(regexp_replace(get_json_object(regexp_replace(w.air_json, '@timestamp', 'ts'),'$.ts'),'T',' '),'Z',' ') tstamp,
get_json_object(regexp_replace(w.air_json, 'query-string', 'query_string'), '$.query_string') query_string, get_json_object(w.air_json, '$.package') package,
get_json_object(w.air_json, '$.tags') tags
from wifi_airlogs w
where
((get_json_object(w.air_json, '$.event') = 'purchase') and w.dt = '2019-04-15')),
jtable as (
select btable.serial serialt, btable.tstamp, btable.query_string, btable.package, btable.tags, f.ACT_DPRT_DTMZ depart, f.ACT_ARRV_DTMZ arrive
from btable inner join 041519_flifo_data f on btable.tail = f.registration_id where
(btable.tstamp between CAST(from_unixtime(unix_timestamp(f.ACT_DPRT_DTMZ) - 840) as timestamp) and CAST(from_unixtime(unix_timestamp(f.ACT_ARRV_DTMZ) + 840) as timestamp)))
create table final_cte as select btable.serial serialt, btable.tstamp, btable.tail, btable.query_string, btable.package, btable.tags, jtable.depart, jtable.arrive
from btable left outer join jtable on btable.serial = jtable.serialt;
我希望CTE方法产生与上述相同的结果,但是返回的结果不同(联接未正确发生)