将CSV字段拆分为SQL中的不同行

时间:2016-09-07 11:30:21

标签: sql csv

一位矿工的同事在处理COBOL程序时遇到了这个问题,并最终在应用程序级别解决了这个问题。 我仍然很好奇是否可以使用SQL在数据访问级别上解决它。 这与this other question有某种关系,但我只想使用ANSI SQL。

我正在寻找单个SQL select查询,该查询作用于包含可变长度CSV行的VARCHAR字段。查询的目的是将每个CSV字段拆分为自己的结果集行。

以下是架构和数据的示例(这里是fiddle):

CREATE TABLE table1 (`field` varchar(100));

 INSERT INTO table1 (`field`)
      VALUES
             ('Hello,world,!')    ,
             ('Haloa,!')          ,
             ('Have,a,nice,day,!');

以下是我想从查询中得到的输出:

Hello
world
!
Haloa
!
Have
a
nice
day
!

使用的CSV分隔符是逗号,现在我不担心转义。

3 个答案:

答案 0 :(得分:3)

据我所知,这是ANSI SQL:

with recursive word_list (field, word, rest, field_id, level) as (             
  select field, 
         substring(field from 1 for position(',' in field) - 1) as word,
         substring(field from position(',' in field) + 1) as rest,
         row_number() over () as field_id,
         1
  from table1
  union all 
  select c.field, 
         case 
            when position(',' in p.rest) = 0 then p.rest
            else substring(p.rest from 1 for position(',' in p.rest) - 1) 
         end as word,
         case 
            when position(',' in p.rest) = 0 then null
            else substring(p.rest from position(',' in p.rest) + 1) 
         end as rest, 
         p.field_id,
         p.level + 1
  from table1 as c
    join word_list p on c.field = p.field and position(',' in p.rest) >= 0
) 
select word
from word_list
order by field_id, level;

这假设field中的值是唯一的。

以下是一个正在运行的示例:http://rextester.com/NARS7464

答案 1 :(得分:0)

在Oracle中你可以使用类似的东西(也许它不是最优雅的,但它可以提供你想要的结果) - 只需用{strong> your_table_name 替换tab

WITH 
tab2 AS (
SELECT t.field,
       CASE WHEN INSTR(t.field, ',', 1, 1) > 0 AND regexp_count(t.field,',') >= 1 THEN INSTR(t.field, ',', 1, 1) ELSE NULL END AS pos1,
       CASE WHEN INSTR(t.field, ',', 1, 2) > 0 AND regexp_count(t.field,',') >= 2 THEN INSTR(t.field, ',', 1, 2) ELSE NULL END AS pos2,
       CASE WHEN INSTR(t.field, ',', 1, 3) > 0 AND regexp_count(t.field,',') >= 3 THEN INSTR(t.field, ',', 1, 3) ELSE NULL END AS pos3,
       CASE WHEN INSTR(t.field, ',', 1, 4) > 0 AND regexp_count(t.field,',') >= 4 THEN INSTR(t.field, ',', 1, 4) ELSE NULL END AS pos4,
       CASE WHEN INSTR(t.field, ',', 1, 5) > 0 AND regexp_count(t.field,',') >= 5 THEN INSTR(t.field, ',', 1, 5) ELSE NULL END AS pos5,
       CASE WHEN INSTR(t.field, ',', 1, 6) > 0 AND regexp_count(t.field,',') >= 6 THEN INSTR(t.field, ',', 1, 6) ELSE NULL END AS pos6
FROM tab t
),
tab3 AS (
SELECT SUBSTR(tt.field,1,tt.pos1-1) AS col1,
       SUBSTR(tt.field,tt.pos1+1, CASE WHEN tt.pos2 IS NULL THEN LENGTH(tt.field) - tt.pos1 ELSE tt.pos2 - tt.pos1 - 1 END) AS col2,
       SUBSTR(tt.field,tt.pos2+1, CASE WHEN tt.pos3 IS NULL THEN LENGTH(tt.field) - tt.pos2 ELSE tt.pos3 - tt.pos2 - 1 END) AS col3,
       SUBSTR(tt.field,tt.pos3+1, CASE WHEN tt.pos4 IS NULL THEN LENGTH(tt.field) - tt.pos3 ELSE tt.pos4 - tt.pos3 - 1 END) AS col4,
       SUBSTR(tt.field,tt.pos4+1, CASE WHEN tt.pos5 IS NULL THEN LENGTH(tt.field) - tt.pos4 ELSE tt.pos5 - tt.pos4 - 1 END) AS col5,
       SUBSTR(tt.field,tt.pos5+1, CASE WHEN tt.pos6 IS NULL THEN LENGTH(tt.field) - tt.pos5 ELSE tt.pos6 - tt.pos5 - 1 END) AS col6
       ,ROWNUM AS r
FROM tab2 tt
),
tab4 AS (
SELECT ttt.col1 AS col FROM tab3 ttt WHERE r  = 1
UNION ALL SELECT ttt.col2 FROM tab3 ttt WHERE r  = 1
UNION ALL SELECT ttt.col3 FROM tab3 ttt WHERE r  = 1
UNION ALL SELECT ttt.col4 FROM tab3 ttt WHERE r  = 1
UNION ALL SELECT ttt.col5 FROM tab3 ttt WHERE r  = 1
UNION ALL SELECT ttt.col6 FROM tab3 ttt WHERE r  = 1
UNION ALL
SELECT ttt.col1 FROM tab3 ttt WHERE r  = 2
UNION ALL SELECT ttt.col2 FROM tab3 ttt WHERE r  = 2
UNION ALL SELECT ttt.col3 FROM tab3 ttt WHERE r  = 2
UNION ALL SELECT ttt.col4 FROM tab3 ttt WHERE r  = 2
UNION ALL SELECT ttt.col5 FROM tab3 ttt WHERE r  = 2
UNION ALL SELECT ttt.col6 FROM tab3 ttt WHERE r  = 2
UNION ALL
SELECT ttt.col1 FROM tab3 ttt WHERE r  = 3
UNION ALL SELECT ttt.col2 FROM tab3 ttt WHERE r  = 3
UNION ALL SELECT ttt.col3 FROM tab3 ttt WHERE r  = 3
UNION ALL SELECT ttt.col4 FROM tab3 ttt WHERE r  = 3
UNION ALL SELECT ttt.col5 FROM tab3 ttt WHERE r  = 3
UNION ALL SELECT ttt.col6 FROM tab3 ttt WHERE r  = 3
UNION ALL
SELECT ttt.col1 FROM tab3 ttt WHERE r  = 4
UNION ALL SELECT ttt.col2 FROM tab3 ttt WHERE r  = 4
UNION ALL SELECT ttt.col3 FROM tab3 ttt WHERE r  = 4
UNION ALL SELECT ttt.col4 FROM tab3 ttt WHERE r  = 4
UNION ALL SELECT ttt.col5 FROM tab3 ttt WHERE r  = 4
UNION ALL SELECT ttt.col6 FROM tab3 ttt WHERE r  = 4
UNION ALL
SELECT ttt.col1 FROM tab3 ttt WHERE r  = 5
UNION ALL SELECT ttt.col2 FROM tab3 ttt WHERE r  = 5
UNION ALL SELECT ttt.col3 FROM tab3 ttt WHERE r  = 5
UNION ALL SELECT ttt.col4 FROM tab3 ttt WHERE r  = 5
UNION ALL SELECT ttt.col5 FROM tab3 ttt WHERE r  = 5
UNION ALL SELECT ttt.col6 FROM tab3 ttt WHERE r  = 5
)
SELECT col
FROM tab4
WHERE col IS NOT NULL

它给了我结果:

1   Hello
2   world
3   !
4   Haloa
5   !
6   Have
7   a
8   nice
9   day
10  !

答案 2 :(得分:0)

FWIW,这是另一种Oracle特定方法。也许至少会给出一个想法或帮助未来的搜索者。

SQL> with tbl(rownbr, col1) as (
           select 1, 'Hello,world,!'     from dual union
           select 2, 'Haloa,!'           from dual union
           select 3, 'Have,a,nice,day,!' from dual
      )
   SELECT rownbr, column_value substring_nbr,
        regexp_substr(col1, '(.*?)(,|$)', 1, column_value, null, 1)
    FROM tbl,
                TABLE(
                  CAST(
                    MULTISET(SELECT LEVEL
                                FROM dual
                                CONNECT BY LEVEL <= REGEXP_COUNT(col1, ',')+1
                            ) AS sys.OdciNumberList
                  )
                )
      order by rownbr, substring_nbr;

    ROWNBR SUBSTRING_NBR REGEXP_SUBSTR(COL
---------- ------------- -----------------
         1             1 Hello
         1             2 world
         1             3 !
         2             1 Haloa
         2             2 !
         3             1 Have
         3             2 a
         3             3 nice
         3             4 day
         3             5 !

10 rows selected.

SQL>