drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / _impala_insert_staging drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI [mgupta @ sjc-dev-binn01~] $ hadoop fs -ls / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI 找到27项 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201601 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201602 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201603 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201604 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201605 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201606 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201607 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201608 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201609 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201610 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201611 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201612 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201701 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201702 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201703 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201704 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201705 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201706 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:17 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201707 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201708 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201709 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201710 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201711 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201712 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201801 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201802 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:18 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201803 [mgupta @ sjc-dev-binn01~] $ hadoop fs -ls / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201601 找到3项 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201601 / company_sid = 0 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201601 / company_sid = 38527 drwxr-xr-x - mgupta supergroup 0 2018-03-26 22:16 / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201601 / company_sid = HIVE_DEFAULT_PARTITION [mgupta @ sjc-dev-binn01~] $ hadoop fs -ls / kylin / retailer / qi_basket_brand_bucket_fact / product_hierarchy_type = CI / month_id = 201601 / company_sid = 0 找到1项 -rw-r - r-- 3 mgupta supergroup 2069014 2018-03-26 22:16 /kylin/retailer/qi_basket_brand_bucket_fact/product_hierarchy_type=CI/month_id=201601/company_sid=0/f9466a0068b906cf-6ace7f8500000049_294515768_data.0.parq [mgupta @ sjc-dev-binn01~] $
答案 0 :(得分:1)
您可以尝试以下步骤。
方法1
将数据加载到分区表中。
create table <table_name> (col1 data_type1, col2 data_type2..)
partitioned by(part_col data_type3)
row format delimited
fields terminated by '<field_delimiter_in_your_data>'
load data inpath '/hdfs/loc/file1' into table <table_name>
partition (<part_col>='201601');
load data inpath '/hdfs/loc/file1' into table <table_name>
partition (<part_col>='201602')
load data inpath '/hdfs/loc/file1' into table <table_name>
partition (<part_col>='201603')
依此类推。
方法2
使用动态分区插入从登台表加载数据到主表。
create table <staging_table> (col1 data_type1, col2 data_type2..)
row format delimited
fields terminated by '<field_delimiter_in_your_data>'
create table <main_table> (col1 data_type1, col2 data_type2..)
partitioned by(part_col data_type3);
load data inpath '/hdfs/loc/directory/' into table <staging_table>;
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
insert into table <main_table>
partition(part_col)
select col1,col2,....part_col from <staging_table>;
方法2的主要方面是:
答案 1 :(得分:0)
让我们创建一个表,在年和月上有一个分区,表中有一个时间戳:
CREATE TABLE `mypart_p`(
`id` bigint,
`open_ts` string
)
PARTITIONED BY (YEAR INT, MONTH INT)
现在我必须改变表格。
ALTER TABLE mypart_p ADD PARTITION (YEAR=2020, MONTH=1)
我必须每年和每月都这样做,在 python 中循环执行。现在让我们用数据填充它并指定该数据属于哪个分区:
INSERT into mypart_p PARTITION (YEAR=2020, MONTH=1)
select id,
open_ts
FROM some_other_table
WHERE substring(open_ts,0,4) = '2020'
AND substring(open_ts,6,2) = '01'