我正在从impala表创建数据框,如下所示。下面给出了模式o / p。
import org.apache.spark.sql.hive._;
val hctx= new HiveContext(sc);
val dfSql = hctx.sql("select * from df2_rs.rs_trans_2 limit 1")
当我使用表格模式的下划线镶木地板文件创建数据框架时是不同的(前者中有字符串类型,后面的同一列中有二进制类型)。架构如下。为什么存在架构差异,两者最初都是从同一个镶木地板文件中创建的。
val df201606=hctx.parquetFile("/user/hive/warehouse/df2_rs.db/trans_2/month_id=201606");
df201606.printSchema;
模式o / p如下: - dfSql.printSchema
root
|-- supplier_id: short (nullable = true)
|-- msa_seq_nbr: decimal(20,0) (nullable = true)
|-- prod_surro_id: long (nullable = true)
|-- mp_surro_id: long (nullable = true)
|-- otlt_surro_id: integer (nullable = true)
|-- payer_pln_surro_id: long (nullable = true)
|-- btch_nbr: long (nullable = true)
|-- store_nbr: string (nullable = true)
|-- dspnsd_dt: timestamp (nullable = true)
|-- btch_supplier_seq_nbr: long (nullable = true)
|-- store_chnl_typ_cd: string (nullable = true)
|-- typ_cd: string (nullable = true)
|-- typ_cd_actn_cd: byte (nullable = true)
|-- cmf_prod_nbr: string (nullable = true)
|-- cmf_pack_nbr: string (nullable = true)
|-- otlt_ctry_cd: string (nullable = true)
|-- otlt_pstl_cd: string (nullable = true)
|-- otlt_seq_nbr: string (nullable = true)
|-- otlt_actn_cd: byte (nullable = true)
|-- qty: decimal(18,3) (nullable = true)
|-- qty_actn_cd: byte (nullable = true)
|-- authd_rfll_nbr: byte (nullable = true)
|-- authd_rfll_cd_actn_cd: byte (nullable = true)
|-- pay_typ_cd: short (nullable = true)
|-- pay_typ_cd_actn_cd: byte (nullable = true)
|-- ddsupp_cnt: decimal(18,3) (nullable = true)
|-- ddsupp_cnt_actn_cd: byte (nullable = true)
|-- dspnsd_ndc: string (nullable = true)
|-- dspnsd_ndc_actn_cd: byte (nullable = true)
|-- mp_id: string (nullable = true)
|-- mtch_cd: long (nullable = true)
|-- usc_cd: string (nullable = true)
|-- std_payer_id: string (nullable = true)
|-- std_pln_id: string (nullable = true)
|-- msk_payer_id: string (nullable = true)
|-- msk_pln_id: string (nullable = true)
|-- corrn_id: long (nullable = true)
|-- enc_id: string (nullable = true)
|-- pat_nbr: long (nullable = true)
|-- dacon_qty: decimal(18,3) (nullable = true)
|-- dacon_qty_actn_cd: byte (nullable = true)
|-- dosg_qty: decimal(18,3) (nullable = true)
|-- dosg_qty_actn_cd: byte (nullable = true)
|-- cre_dt: timestamp (nullable = true)
|-- std_pln_cd_ind: short (nullable = true)
|-- supplier_pln_cd: string (nullable = true)
|-- supplier_pln_id_actn_cd: short (nullable = true)
|-- pcn: string (nullable = true)
|-- bin_id: string (nullable = true)
|-- grp_nbr: string (nullable = true)
|-- pln_id_brdg_cd: short (nullable = true)
|-- cust_prc_amt: decimal(18,3) (nullable = true)
|-- strg: string (nullable = true)
|-- store_cost_amt: decimal(18,3) (nullable = true)
|-- store_cost_amt_actn_cd: string (nullable = true)
|-- co_pay_amt: decimal(18,3) (nullable = true)
|-- co_pay_cd: string (nullable = true)
|-- pat_pharmy_owe_amt: decimal(18,3) (nullable = true)
|-- pat_pay_ind: string (nullable = true)
|-- pharmy_npi_id: string (nullable = true)
|-- prscr_npi: string (nullable = true)
|-- diag_icd9_cd: string (nullable = true)
|-- orig_cd: string (nullable = true)
|-- pat_zip3: string (nullable = true)
|-- claim_id: long (nullable = true)
|-- fill_nbr: integer (nullable = true)
|-- data_use_qlfr_cd: string (nullable = true)
|-- patient_id: long (nullable = true)
|-- otlt_ncpdp_id: string (nullable = true)
|-- mp_dea: string (nullable = true)
|-- provider_id: long (nullable = true)
|-- revsl_updt_dt: timestamp (nullable = true)
|-- dup_updt_dt: timestamp (nullable = true)
|-- corrn_updt_dt: timestamp (nullable = true)
|-- supplier_pat_id: string (nullable = true)
|-- diag_cd: string (nullable = true)
|-- diag_vers_typ_id: byte (nullable = true)
|-- month_id: integer (nullable = true)
模式o / p如下: - df201606.printSchema: -
root
|-- supplier_id: integer (nullable = true)
|-- msa_seq_nbr: decimal(20,0) (nullable = true)
|-- prod_surro_id: long (nullable = true)
|-- mp_surro_id: long (nullable = true)
|-- otlt_surro_id: integer (nullable = true)
|-- payer_pln_surro_id: long (nullable = true)
|-- lbtch_nbr: long (nullable = true)
|-- store_nbr: binary (nullable = true)
|-- dspnsd_dt: timestamp (nullable = true)
|-- btch_supplier_seq_nbr: long (nullable = true)
|-- store_chnl_typ_cd: binary (nullable = true)
|-- typ_cd: binary (nullable = true)
|-- typ_cd_actn_cd: integer (nullable = true)
|-- cmf_prod_nbr: binary (nullable = true)
|-- cmf_pack_nbr: binary (nullable = true)
|-- otlt_ctry_cd: binary (nullable = true)
|-- otlt_pstl_cd: binary (nullable = true)
|-- otlt_seq_nbr: binary (nullable = true)
|-- otlt_actn_cd: integer (nullable = true)
|-- qty: decimal(18,3) (nullable = true)
|-- qty_actn_cd: integer (nullable = true)
|-- authd_rfll_nbr: integer (nullable = true)
|-- authd_rfll_cd_actn_cd: integer (nullable = true)
|-- pay_typ_cd: integer (nullable = true)
|-- pay_typ_cd_actn_cd: integer (nullable = true)
|-- ddsupp_cnt: decimal(18,3) (nullable = true)
|-- ddsupp_cnt_actn_cd: integer (nullable = true)
|-- dspnsd_ndc: binary (nullable = true)
|-- dspnsd_ndc_actn_cd: integer (nullable = true)
|-- mp_id: binary (nullable = true)
|-- mtch_cd: long (nullable = true)
|-- usc_cd: binary (nullable = true)
|-- std_payer_id: binary (nullable = true)
|-- std_pln_id: binary (nullable = true)
|-- msk_payer_id: binary (nullable = true)
|-- msk_pln_id: binary (nullable = true)
|-- corrn_id: long (nullable = true)
|-- enc_id: binary (nullable = true)
|-- pat_nbr: long (nullable = true)
|-- dacon_qty: decimal(18,3) (nullable = true)
|-- dacon_qty_actn_cd: integer (nullable = true)
|-- dosg_qty: decimal(18,3) (nullable = true)
|-- dosg_qty_actn_cd: integer (nullable = true)
|-- cre_dt: timestamp (nullable = true)
|-- std_pln_cd_ind: integer (nullable = true)
|-- supplier_pln_cd: binary (nullable = true)
|-- supplier_pln_id_actn_cd: integer (nullable = true)
|-- pcn: binary (nullable = true)
|-- bin_id: binary (nullable = true)
|-- grp_nbr: binary (nullable = true)
|-- pln_id_brdg_cd: integer (nullable = true)
|-- cust_prc_amt: decimal(18,3) (nullable = true)
|-- strg: binary (nullable = true)
|-- store_cost_amt: decimal(18,3) (nullable = true)
|-- store_cost_amt_actn_cd: binary (nullable = true)
|-- co_pay_amt: decimal(18,3) (nullable = true)
|-- co_pay_cd: binary (nullable = true)
|-- pat_pharmy_owe_amt: decimal(18,3) (nullable = true)
|-- pat_pay_ind: binary (nullable = true)
|-- lpharmy_npi_id: binary (nullable = true)
|-- prscr_npi: binary (nullable = true)
|-- diag_icd9_cd: binary (nullable = true)
|-- orig_cd: binary (nullable = true)
|-- pat_zip3: binary (nullable = true)
|-- claim_id: long (nullable = true)
|-- fill_nbr: integer (nullable = true)
|-- data_use_qlfr_cd: binary (nullable = true)
|-- patient_id: long (nullable = true)
|-- otlt_ncpdp_id: binary (nullable = true)
|-- mp_dea: binary (nullable = true)
|-- provider_id: long (nullable = true)
|-- revsl_updt_dt: timestamp (nullable = true)
|-- dup_updt_dt: timestamp (nullable = true)
|-- corrn_updt_dt: timestamp (nullable = true)
|-- supplier_pat_id: binary (nullable = true)
|-- diag_cd: binary (nullable = true)
|-- diag_vers_typ_id: integer (nullable = true)