只有STRING定义的列在HIVE中加载,即int和double的列为NULL
create table A(
id STRING,
member_id STRING,
loan_amnt DOUBLE,
funded_amnt DOUBLE,
`funded_amnt_inv` DOUBLE,
`term` STRING,
`int_rate` STRING,
`installment` DOUBLE,
`grade` STRING,
`sub_grade` STRING,
`emp_title` STRING,
`emp_length` STRING,
`home_ownership` STRING,
`nnual_inc` INT,
`verification_status` STRING,
`issue_d` STRING,
`loan_status` STRING,
`pymnt_plan` STRING,
`url` STRING,
`desc` STRING,
`purpose` STRING,
`title` STRING,
`zip_code` STRING,
`addr_state` STRING,
`dti` DOUBLE,
`delinq_2yrs` INT,
`earliest_cr_line` STRING,
`inq_last_6mths` STRING,
`mths_since_last_delinq` STRING,
`mths_since_last_record` STRING,
`open_acc` INT,
`pub_rec` INT,
`revol_bal` INT,
`revol_util` STRING,
`total_acc` INT,
`initial_list_status` STRING,
`out_prncp` DOUBLE,
`out_prncp_inv` DOUBLE,
`total_pymnt` DOUBLE,
`total_pymnt_inv` DOUBLE,
`total_rec_prncp` DOUBLE,
`total_rec_int` DOUBLE,
`total_rec_late_fee` DOUBLE,
`recoveries` DOUBLE,
`collection_recovery_fee` DOUBLE,
`last_pymnt_d` STRING,
`last_pymnt_amnt` DOUBLE,
`next_pymnt_d` STRING,
`last_credit_pull_d` STRING,
`collections_12_mths_ex_med` INT,
`mths_since_last_major_derog` STRING,
`policy_code` STRING,
`application_type` STRING,
`annual_inc_joint` STRING,
`dti_joint` STRING,
`verification_status_joint` STRING,
`acc_now_delinq` STRING,
`tot_coll_amt` STRING,
`tot_cur_bal` STRING,
`open_acc_6m` STRING,
`open_il_6m` STRING,
`open_il_12m` STRING,
`open_il_24m` STRING,
`mths_since_rcnt_il` STRING,
`total_bal_il` STRING,
`il_util` STRING,
`open_rv_12m ` STRING,
`open_rv_24m` STRING,
`max_bal_bc` STRING,
`all_util` STRING,
`total_credit_rv` STRING,
`inq_fi` STRING,
`total_fi_tl` STRING,
`inq_last_12m` STRING
)
ROW FORMAT delimited
fields terminated by ','
STORED AS TEXTFILE;
load data local inpath '/home/cloudera/Desktop/Project-3/1/LoanStats3a.txt' into table A;
hive> SELECT * FROM A LIMIT 1;
" 1077501" " 1296599" NULL NULL NULL" 36个月" " 10.65%" NULL" B" " B2" "" " 10年以上" "出租" NULL"已验证" " DEC-2011" "全 付费和#34; " N" " https://www.lendingclub.com/browse/loanDetail.action?loan_id=1077501" " 借款人于12/22/11添加>我需要升级我的业务 技术
&#34。 " CREDIT_CARD" "计算机" " 860xx" " AZ" NULL NULL" 1985年1月" " 1" "" "" NULL NULL NULL" 83.7%" NULL" f" NULL NULL NULL NULL NULL NULL NULL NULL NULL" Jan-2015" NULL"" " DEC-2015" NULL"" " 1" "个人""" "" "" " 0" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""
答案 0 :(得分:0)
您的CSV似乎包含各个字段的引号。 HIVE不支持周围的引号,因此它们成为了这些领域的一部分。在字符串字段的情况下,引号成为字符串的一部分。如果是数字字段,引号会使该字段成为无效数字,从而产生NULL。
有关支持CSV文件中引号的服务,请参阅csv-serde 。
答案 1 :(得分:0)
我找到了解决方案: -
create table stat2(id String,member_id INT,loan_amnt FLOAT,funding_amnt FLOAT,funding_amnt_inv FLOAT,term String,int_rate String,installment FLOAT,grade String,sub_grade String,emp_title String,emp_length String,home_ownership String,annual_inc FLOAT,verification_status String,issue_d date,loan_status String,pymnt_plan String,url String,descp String,purpose String,title String,zip_code String,addr_state String,dti FLOAT,delinq_2yrs FLOAT,earliest_cr_line String,inq_last_6mths FLOAT,mths_since_last_delinq FLOAT,mths_since_last_record FLOAT,open_acc FLOAT, pub_rec FLOAT,revol_bal FLOAT,revol_util字符串,total_acc FLOAT,initial_list_status字符串,out_prncp FLOAT,out_prncp_inv FLOAT,total_pymnt FLOAT,total_pymnt_inv FLOAT,total_rec_prncp FLOAT,total_rec_int FLOAT,total_rec_late_fee FLOAT,recoveries FLOAT,collection_recovery_fee FLOAT, last_pymnt_d字符串,last_pymnt_amnt FLOAT,next_pymnt_d字符串,last_credit_pull_d字符串,collections_12_mths_ex_med FLOAT,mths_since_last_major_derog FLOAT,policy_code FLOAT,APPLICATION_TYPE字符串,annual_inc_joint FLOAT,dti_joint FLOAT,verification_status_joint字符串,acc_now_delinq FLOAT,tot_coll_amt FLOAT,tot_cur_bal FLOAT,open_acc_6m FLOAT,open_il_6m FLOAT,open_il_12m FLOAT ,open_il_24m FLOAT,mths_since_rcnt_il FLOAT,total_bal_il FLOAT,il_util FLOAT,open_rv_12m FLOAT,open_rv_24m FLOAT,max_bal_bc FLOAT,all_util FLOAT,total_rev_hi_lim FLOAT,inq_fi FLOAT,total_cu_tl FLOAT,inq_last_12m FLOAT)
行格式SERDE' org.apache.hadoop.hive.serde2.OpenCSVSerde'与serdeproperties(
" separatorChar" =",",
" quoteChar" =" \""
)
存储为TEXTFILE tblproperties(" skip.header.line.count" =" 2", " skip.footer.line.count" =" 4&#34);