我正在尝试在SQL Server中创建一个使用Posexplode函数在Hive中完成的进程。
我有一个大型数据集,如下所示:
userid sku qty
1 abc 2427022 3
2 abc 1883575|2427022 1|3
3 def 2427022|1562336|153842 1|1|1
4 ghi 2427022 3
我希望数据看起来像:
userid sku qty
1 abc 2427022 3
2 abc 1883575 1
3 abc 2427022 3
4 def 2427022 1
5 def 1562336 1
6 def 153842 1
7 ghi 2427022 3
谢谢!
答案 0 :(得分:1)
我明白了。如果有人在分隔2个分隔列时出现类似的问题,我使用下面的代码来获得所需的输出。我使用SQL Server中的Cross Apply函数和XML来获得与在Hive中使用posexplode函数相同的输出。
--Create test table
create table Table1 (userid varchar(max), Sku varchar(max), Qty varchar(max))
insert Table1 select 'abc', '2427022' , '3'
insert Table1 select 'abc', '1883575|2427022', '1|3'
insert Table1 select 'def', '2427022|1562336|153842' , '1|1|1'
insert Table1 select 'ghi', '2427022' , ' 3'
;WITH CTE1
AS
(
SELECT RN= Row_Number () over(Order by userid),userid,
Split.a.value('.', 'VARCHAR(100)') AS Sku
FROM
(
SELECT Userid,Sku,
CAST ('<M>' + REPLACE(sku, '|', '</M><M>') + '</M>' AS XML) AS Data
FROM Table1
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a)
)
,CTE2
AS
(
SELECT RN= Row_Number () over(Order by userid),userid,
Split.b.value('.', 'VARCHAR(100)') AS qty
FROM
(
SELECT Userid,qty,
CAST ('<M>' + REPLACE(qty, '|', '</M><M>') + '</M>' AS XML) AS Data
FROM Table1
) AS B CROSS APPLY Data.nodes ('/M') AS Split(b)
)
Select c.userid, c.Sku, d.qty
from CTE1 as c
Inner Join CTE2 as d
on c.RN =d.RN