使用Hive中的子查询创建表

时间:2019-06-13 03:39:17

标签: hive

我想使用配置单元中的子查询创建一个表

WITH subquery
     AS (SELECT dpspm.dpspm_epi_id       AS person_identifier,
                hatmf.dmeme_ck           AS meme_ck,
                hatmf.hmeme_contract_num AS contract_number,
                hatmf.dgrgr_group_num    AS group_identifier
           FROM wcmdr_data_lake.wcm_dpspm_person_master  dpspm
                JOIN wcmdr_data_lake.wcm_hmepm_member_person_master hmepm
                    ON     dpspm.dpspm_sk = hmepm.dpspm_sk
                         JOIN wcmdr_data_lake.wcm_hatmf_member_fact hatmf
                    ON     hatmf.dmeme_sk = hmepm.dmeme_sk
                    where    unix_timestamp(hatmf.hatmf_as_of_to_dt,'MM/dd/yyyy') > unix_timestamp ()
                    and unix_timestamp(hmepm.hmepm_as_of_to_dt,'MM/dd/yyyy') > unix_timestamp ()
                       and hatmf.dmeme_ck is not NULL
                                                                 )
SELECT DISTINCT
       sq.meme_ck as meme_ck,
       CASE
           WHEN gref.program_type ='Premium' THEN 'Premium'
           ELSE rgn.region_code
       END
           AS region_code 
  FROM standard_data.member_coverage  cvg
       LEFT OUTER JOIN subquery sq
           ON     CAST (sq.person_identifier AS bigint) =
                      cvg.enterprise_person_identifier
              AND sq.contract_number = cvg.contract_number
              AND sq.group_identifier = cvg.group_identifier
       LEFT OUTER JOIN
       (SELECT rgn.region_code,
               rgn.zip_code,
               rgn.state_code,
               rgn.country
               FROM informatics_data_lake.ref_geographical_region rgn
         WHERE    
         (rgn.state_code = 'MI' OR rgn.state_code <> 'MI')
               AND (rgn.country = 'USA' OR rgn.country <> 'USA')
               and (rgn.region_code LIKE 'HPB%' OR rgn.region_code LIKE 'SHS%')
         --     AND (   cvg.address_1_country_code = 'USA'OR cvg.address_1_country_code <> 'USA')
             ) rgn on  rgn.zip_code = cvg.address_1_zip_code 
             and rgn.state_code = cvg.address_1_state_code

 LEFT OUTER JOIN care_management_it.ui_cm_group_reference  gref on 
 gref.customer_identifier = cvg.customer_identifier           
and   gref.group_identifier = cvg.group_identifier
AND   gref.group_sub_segment_identifier =cvg.group_sub_segment_identifier
AND    gref.group_class_identifier =cvg.group_class_identifier
and gref.package_identifier=cvg.nasco_package_code 
AND   gref.mos_benefit_package_identifier =cvg.mos_plan_package_identifier
and  gref.end_date  >CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS INT)
AND gref.start_date  <= CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS INT)
LEFT OUTER JOIN care_management_it.ui_cm_group_reference  gref1 on 
 gref.customer_identifier = cvg.customer_identifier           
and   gref.group_identifier = 'ALL'
AND   gref.group_sub_segment_identifier ='ALL'
AND    gref.group_class_identifier ='ALL'
and gref.package_identifier='ALL'
AND   gref.mos_benefit_package_identifier ='ALL'
and  gref.end_date  >CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS INT)
AND gref.start_date  <= CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS INT)

where 
 cvg.member_medical_coverage_indicator = 'Y'
and cvg.member_coverage_effective_date <= CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS BIGINT) 
and cvg.member_coverage_termination_date > CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS BIGINT) 
and cvg.logically_deleted_indicator = 'N'
and (cvg.member_medicare_advantage_indicator='N' or cvg.member_medicare_advantage_indicator='Y')
and (cvg.address_1_country_code = 'USA'or  cvg.address_1_country_code <> 'USA')
--and  gref.end_date  >CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS INT)
--AND gref.start_date  <= CAST (from_unixtime (unix_timestamp (), 'yyyyMMdd') AS INT)
and rgn.region_code is NOT NULL
and sq.meme_ck IS NOT NULL

1 个答案:

答案 0 :(得分:0)

在SELECT DISTINCT之前添加CREATE TABLE AS:

with subquery AS( ... 
...
)

CREATE TABLE table_name AS

SELECT DISTINCT
...