配置单元中的多嵌套JSON

时间:2018-02-07 19:12:06

标签: json hive nested

我有嵌套JSON的以下输入,我想将这些数据摄取到多行的hive中

"taxes": [{ "line_id": 1, "commodity_code": "997159", "fee": { "amt": { "curr_code": "USD", "value": "71.4" }, "type": "receiver" }, "ship_addr": { "admin_area_1": "MAHARASHTRA", "country_code": "IN" }, "total_tax": { "curr_code": "USD", "value": "12.8520000000" }, "tax-details": [{ "exempt_option": false, "auth_name": "India Maharashtra Central GST", "doc_amt": { "currency_code": "USD", "value": "6.43" }, "unrnd_doc_amt": { "currency_code": "USD", "value": "6.4260000000" }, "rate": "0.09", "rate_code": "SR", "non_basis_doc_amt": "0.00", "exempt_doc_amt": "0.00", "jdx_memo": "INSS2: Tax payable in Seller location.", "seller_reg_no": "27AAGCP4442G1ZF", "admin_zone_level": "Country", "auth_type": "CGST", "erp_code": "MHCGST", "inv_desc": "Standard Rate - CGST", "basis_doc_amt": "71.40" }, { "exempt_option": false, "auth_name": "India Maharashtra State GST", "doc_amt": { "currency_code": "USD", "value": "6.43" }, "unrnd_doc_amt": { "currency_code": "USD", "value": "6.4260000000" }, "rate": "0.09", "rate_code": "SR", "non_basis_doc_amt": "0.00", "exempt_doc_amt": "0.00", "jdx_memo": "INSS2: Tax payable in Seller location.", "seller_reg_no": "27AAGCP4442G1ZF", "admin_zone_level": "Province", "auth_type": "SGST", "erp_code": "MHSGST", "inv_desc": "Standard Rate - SGST", "basis_doc_amt": "71.40" }], "transaction_type": "DS" }]

我使用以下DDL

select 

    get_json_object(t.json,concat('$.taxes[',e.i,'].line_id')) as line_id
  , get_json_object(t.json,concat('$.taxes[',e.i,'].commodity_code')) as commodity_code
  , get_json_object(t.json,concat('$.taxes[',e.i,'].fee.amt.curr_code')) as curr_code
  , get_json_object(t.json,concat('$.taxes[',e.i,'].fee.amt.value')) as value
  , get_json_object(t.json,concat('$.taxes[',e.i,'].fee.type')) as type
  , get_json_object(t.json,concat('$.taxes[',e.i,'].ship_addr.admin_area_1')) as admin_area
  , get_json_object(t.json,concat('$.taxes[',e.i,'].ship_addr.country_code')) as country_code
  , get_json_object(t.json,concat('$.taxes[',e.i,'].total_tax.curr_code')) as total_tax_curr_code
  , get_json_object(t.json,concat('$.taxes[',e.i,'].total_tax.value')) as total_tax_value
    get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].exempt_option')) as exempt_option
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].auth_name')) as auth_name
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].doc_amt.currency_code')) as doc_amt_currency_code
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].doc_amt.value')) as doc_amt_value
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].unrnd_doc_amt.currency_code')) as unrnd_doc_amt_currency_code
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].unrnd_doc_amt.value')) as unrnd_doc_amt_value
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].rate')) as rate
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].rate_code')) as rate_code
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].non_basis_doc_amt')) as non_basis_doc_amt
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].exempt_doc_amt')) as exempt_doc_amount
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].jdx_memo')) as jdx_memo
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].seller_reg_no')) as seller_reg_no
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].admin_zone_level')) as admin_zone_level
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].auth_type')) as auth_type
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].erp_code')) as erp_code
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].inv_desc')) as inv_desc
  , get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].basis_doc_amt')) as basis_doc_amt



FROM json_2002 t
    LATERAL VIEW POSEXPLODE (split(get_json_object(json,'$.taxes[*].line_id'),'","')) e as i,x
    LATERAL VIEW POSEXPLODE (split(get_json_object(json,'$taxes.tax_details[*].exempt_option'),'","')) f as g,h

1 个答案:

答案 0 :(得分:0)

我可以使用regex_replace

解决问题

LATERAL VIEW POSEXPLODE(split(regexp_replace(get_json_object(json,' $。taxess.tax_details [*] .exempt_option'),' \ [| \]',&# 39;'),','))f为g,h

解决了我的问题