我对hive中的数据使用以下查询。
CREATE EXTERNAL TABLE IF NOT EXISTS aircel1 (subscriberID INT, towerID STRING, dataDownloaded STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ''
STORED AS TEXTFILE
LOCATION '/user/username/name';
数据如下所示的分隔符是什么。
subId=00001111911128052627towerid=11232w34532543456345623453456984756894756bytes=122112212212212218.4621702216543667E17
subId=00001111911128052639towerid=11232w34532543456345623453456984756894756bytes=122112212212212219.6726312167218586E17
subId=00001111911128052615towerid=11232w34532543456345623453456984756894756bytes=122112212212212216.9431647633139046E17
subId=00001111911128052615towerid=11232w34532543456345623453456984756894756bytes=122112212212212214.7836041833447418E17
subId=00001111911128052639towerid=11232w34532543456345623453456984756894756bytes=122112212212212219.0366596827240525E17
subId=00001111911128052619towerid=11232w34532543456345623453456984756894756bytes=122112212212212218.0686280014540467E17
subId=00001111911128052658towerid=11232w34532543456345623453456984756894756bytes=122112212212212216.9860890496178944E17
subId=00001111911128052652towerid=11232w34532543456345623453456984756894756bytes=122112212212212218.303981333116041E17
答案 0 :(得分:0)
您可以尝试使用正则表达式(我还没有对此进行测试)
CREATE EXTERNAL TABLE IF NOT EXISTS aircel1 (
subscriberID STRING, towerID STRING, dataDownloaded STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES ('input.regex'='subId=(.*)towerid=(.*)bytes=(.*)')
LOCATION '/user/username/dirname';
答案 1 :(得分:0)
使用等号作为分隔符,您可以分两步构建表格。
首先,创建一个包含所有字符串列的临时表。
例如,第一列是字符串00001111911128052627towerid
。
然后使用实际数据类型创建“真实表”,然后您可以将字符串"towerid"
子串出来,例如,从第一列
答案 2 :(得分:0)
我们可以使用以下代码完成任务,
create external table table1 (del string, subid string, towerid string, bytes double)
row format delimited
fields terminated by '='
location '/user/murali/';
create table table2 (subid string, towerid string, bytes double);
insert table table2 select
substring(subid,1,20),substring(towerid,1,41),bytes from table1;
select * from table2;