使用EMR群集将{csv转换为镶木地板时出错

时间:2018-03-20 07:26:29

标签: hive hiveql emr parquet

我已经创建了一个带有Hive脚本的EMR集群,作为添加的执行步骤之一。 以下是我的HIVE脚本的样子:

- 为现有数据创建Hive外部表 CREATE EXTERNAL TABLE calls_csv (   id int,   campaign_id int,   campaign_name字符串,   offer_id int,   offer_name字符串,   is_offer_not_found int,   ivr_key字符串,   call_uuid字符串,   a_leg_uuid字符串,   a_leg_request_uuid字符串,   to_number字符串,   promo_id int,   description字符串,   call_type字符串,   answer_type字符串,   agent_id int,   from_number字符串,   from_caller_name字符串,   from_line_type字符串,   from_state字符串,   from_city字符串,   from_country字符串,   from_zip字符串,   from_latitude字符串,   from_longitude字符串,   b_leg_uuid字符串,   b_leg_number字符串,   b_leg_duration int,   b_leg_bill_rate加倍,   b_leg_bill_duration int,   b_leg_total_cost加倍,   b_leg_hangup_cause字符串,   b_leg_start_time字符串,   b_leg_answer_time字符串,   b_leg_end_time字符串,   b_leg_active tinyint,   bill_rate加倍,   bill_duration int,   hangup_cause字符串,   start_time字符串,   answer_time字符串,   end_time字符串,   status字符串,   selected_ivr_keys字符串,   processed_ivr_keys字符串,   filter_id int,   filter_name字符串,   ivr_action字符串,   selected_zip_code字符串,   processed_zip_code字符串,   duration int,   payout加倍,   min_duration int,   connected_duration int,   provider_cost加倍,   caller_id_cost加倍,   total_revenue加倍,   total_cost加倍,   total_profit加倍,   publisher_id int,   publisher_name字符串,   publisher_revenue加倍,   publisher_cost加倍,   publisher_profit加倍,   advertiser_id int,   advertiser_name字符串,   advertiser_cost加倍,   is_test tinyint,   is_sale tinyint,   is_repeat tinyint,   is_machine_detection tinyint,   no_of_call_transfer int,   offer_ivr_status tinyint,   file_url字符串,   algo字符串,   callback_service_status tinyint,   hangup_service_status tinyint,   sms_uuid字符串,   number_name字符串,   keyword字符串,   keywordmatchtype字符串,   created_at字符串,   updated_at字符串,   ymdhm bigint ) 行格式SERDE' org.apache.hadoop.hive.serde2.OpenCSVSerde' 与SERDEPROPERTIES(    ' separatorChar' =',',    ' quoteChar' =' \"')    LOCATION' s3:// calls-csv /' TBLPROPERTIES(' has_encrypted_data' =' false',               ' serialization.null.format' ='&#39);

msck修复表calls_csv;

- 现在让我们创建一个Parquet格式的外部表 CREATE EXTERNAL TABLE calls_parquet(     id int,   campaign_id int,   campaign_name字符串,   offer_id int,   offer_name字符串,   is_offer_not_found int,   ivr_key字符串,   call_uuid字符串,   a_leg_uuid字符串,   a_leg_request_uuid字符串,   to_number字符串,   promo_id int,   description字符串,   call_type字符串,   answer_type字符串,   agent_id int,   from_number字符串,   from_caller_name字符串,   from_line_type字符串,   from_state字符串,   from_city字符串,   from_country字符串,   from_zip字符串,   from_latitude字符串,   from_longitude字符串,   b_leg_uuid字符串,   b_leg_number字符串,   b_leg_duration int,   b_leg_bill_rate加倍,   b_leg_bill_duration int,   b_leg_total_cost加倍,   b_leg_hangup_cause字符串,   b_leg_start_time字符串,   b_leg_answer_time字符串,   b_leg_end_time字符串,   b_leg_active tinyint,   bill_rate加倍,   bill_duration int,   hangup_cause字符串,   start_time字符串,   answer_time字符串,   end_time字符串,   status字符串,   selected_ivr_keys字符串,   processed_ivr_keys字符串,   filter_id int,   filter_name字符串,   ivr_action字符串,   selected_zip_code字符串,   processed_zip_code字符串,   duration int,   payout加倍,   min_duration int,   connected_duration int,   provider_cost加倍,   caller_id_cost加倍,   total_revenue加倍,   total_cost加倍,   total_profit加倍,   publisher_id int,   publisher_name字符串,   publisher_revenue加倍,   publisher_cost加倍,   publisher_profit加倍,   advertiser_id int,   advertiser_name字符串,   advertiser_cost加倍,   is_test tinyint,   is_sale tinyint,   is_repeat tinyint,   is_machine_detection tinyint,   no_of_call_transfer int,   offer_ivr_status tinyint,   file_url字符串,   algo字符串,   callback_service_status tinyint,   hangup_service_status tinyint,   sms_uuid字符串,   number_name字符串,   keyword字符串,   keywordmatchtype字符串,   created_at字符串,   updated_at字符串,   ymdhm bigint) 存储为PARQUET LOCATION' s3:// calls-parquet /';

- 转换和出口的时间。此步骤将运行很长时间,具体取决于您的数据大小和群集大小。 INSERT OVERWRITE TABLE calls_parquet SELECT * FROM calls_csv

以下是我在EMR群集上运行此步骤时出现的错误

状态:失败

详细信息:FAILED:执行错误,从org.apache.hadoop.hive.ql.exec.MoveTask返回代码1。移动时出错:s3://calls-parquet/.hive-staging_hive_2018-03-20_07-09-28_592_6773618098932115163-1/-ext-10000 into:s3:// calls-parquet /

JAR位置:command-runner.jar 主要类别:无

参数:hive-script --run-hive-script --args -f s3://calls-scripts/converToParquetHive.sql -d INPUT = s3:// calls-csv -d OUTPUT = s3://电话实木复合地板 失败行动:继续

0 个答案:

没有答案