如何将逗号分隔值转换为oracle中的行?

时间:2016-07-14 10:37:22

标签: sql oracle

这是DDL -

create table tbl1 (
   id number,
   value varchar2(50)
);

insert into tbl1 values (1, 'AA, UT, BT, SK, SX');
insert into tbl1 values (2, 'AA, UT, SX');
insert into tbl1 values (3, 'UT, SK, SX, ZF');

注意,此处的值为以逗号分隔字符串。

但是,我们需要结果如下 -

ID VALUE
-------------
1  AA
1  UT
1  BT
1  SK
1  SX
2  AA
2  UT
2  SX
3  UT
3  SK
3  SX
3  ZF

我们如何为此编写SQL?

5 个答案:

答案 0 :(得分:12)

我同意这是一个非常糟糕的设计。 如果您无法更改该设计,请尝试此操作:

select distinct id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
  from tbl1
   connect by regexp_substr(value, '[^,]+', 1, level) is not null
   order by id, level;

<强> OUPUT

id value level
1   AA  1
1   UT  2
1   BT  3
1   SK  4
1   SX  5
2   AA  1
2   UT  2
2   SX  3
3   UT  1
3   SK  2
3   SX  3
3   ZF  4

this

的信用

以更优雅和有效的方式删除重复项(致@mathguy)

select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
  from tbl1
   connect by regexp_substr(value, '[^,]+', 1, level) is not null
      and PRIOR id =  id 
      and PRIOR SYS_GUID() is not null  
   order by id, level;

如果您想要“ANSIer”方法,请使用CTE:

with t (id,res,val,lev) as (
           select id, trim(regexp_substr(value,'[^,]+', 1, 1 )) res, value as val, 1 as lev
             from tbl1
            where regexp_substr(value, '[^,]+', 1, 1) is not null
            union all           
            select id, trim(regexp_substr(val,'[^,]+', 1, lev+1) ) res, val, lev+1 as lev
              from t
              where regexp_substr(val, '[^,]+', 1, lev+1) is not null
              )
select id, res,lev
  from t
order by id, lev;

<强>输出

id  val lev
1   AA  1
1   UT  2
1   BT  3
1   SK  4
1   SX  5
2   AA  1
2   UT  2
2   SX  3
3   UT  1
3   SK  2
3   SX  3
3   ZF  4

MT0的另一种递归方法,但没有正则表达式:

WITH t ( id, value, start_pos, end_pos ) AS
  ( SELECT id, value, 1, INSTR( value, ',' ) FROM tbl1
  UNION ALL
  SELECT id,
    value,
    end_pos                    + 1,
    INSTR( value, ',', end_pos + 1 )
  FROM t
  WHERE end_pos > 0
  )
SELECT id,
  SUBSTR( value, start_pos, DECODE( end_pos, 0, LENGTH( value ) + 1, end_pos ) - start_pos ) AS value
FROM t
ORDER BY id,
  start_pos;

我尝试了3种方法,其中包含30000行数据集,返回了118104行,得到了以下平均结果:

  • 我的递归方式:5秒
  • MT0接近:4秒
  • Mathguy方法:16秒
  • MT0递归逼近no-regex:3.45秒

@Mathguy还测试了更大的数据集:

  

在所有情况下递归查询(我只用常规测试了一个   substr和instr)做得更好,比例为2到5.这是   每串字符串/令牌的组合和CTAS执行   层次与递归的时间,层次优先。所有时间都在   秒

  • 30,000 x 4:5/1。
  • 30,000 x 10:15/3。
  • 30,000 x 25:56/37。
  • 5,000 x 50:33/14。
  • 5,000 x 100:160/81。
  • 10,000 x 200:1,924 / 772

答案 1 :(得分:4)

这将获取值,而不要求您删除重复项或在SYS_GUID()中使用包括DBMS_RANDOM.VALUE()CONNECT BY的黑客:

SELECT t.id,
       v.COLUMN_VALUE AS value
FROM   TBL1 t,
       TABLE(
         CAST(
           MULTISET(
             SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
             FROM   DUAL
             CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
           )
           AS SYS.ODCIVARCHAR2LIST
         )
       ) v

<强>更新

返回列表中元素的索引:

选项1 - 返回UDT:

CREATE TYPE string_pair IS OBJECT( lvl INT, value VARCHAR2(4000) );
/

CREATE TYPE string_pair_table IS TABLE OF string_pair;
/

SELECT t.id,
       v.*
FROM   TBL1 t,
       TABLE(
         CAST(
           MULTISET(
             SELECT string_pair( level, TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) ) )
             FROM   DUAL
             CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
           )
           AS string_pair_table
         )
       ) v;

选项2 - 使用ROW_NUMBER()

SELECT t.id,
       v.COLUMN_VALUE AS value,
       ROW_NUMBER() OVER ( PARTITION BY id ORDER BY ROWNUM ) AS lvl
FROM   TBL1 t,
       TABLE(
         CAST(
           MULTISET(
             SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
             FROM   DUAL
             CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
           )
           AS SYS.ODCIVARCHAR2LIST
         )
       ) v;

答案 2 :(得分:1)

韦尔切利发布了正确答案。但是,如果要分割多个字符串,connect by将生成指数增长的行数,其中包含许多重复行。 (只需在没有distinct的情况下尝试查询。)这会破坏非平凡大小数据的性能。

克服此问题的一种常见方法是使用prior条件和附加检查以避免层次结构中的循环。像这样:

select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
  from tbl1
   connect by regexp_substr(value, '[^,]+', 1, level) is not null
          and prior id = id
          and prior sys_guid() is not null
   order by id, level;

例如,请参阅有关OTN的讨论:https://community.oracle.com/thread/2526535

答案 3 :(得分:1)

另一种方法是定义一个简单的PL / SQL函数:

CREATE OR REPLACE FUNCTION split_String(
  i_str    IN  VARCHAR2,
  i_delim  IN  VARCHAR2 DEFAULT ','
) RETURN SYS.ODCIVARCHAR2LIST DETERMINISTIC
AS
  p_result       SYS.ODCIVARCHAR2LIST := SYS.ODCIVARCHAR2LIST();
  p_start        NUMBER(5) := 1;
  p_end          NUMBER(5);
  c_len CONSTANT NUMBER(5) := LENGTH( i_str );
  c_ld  CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
  IF c_len > 0 THEN
    p_end := INSTR( i_str, i_delim, p_start );
    WHILE p_end > 0 LOOP
      p_result.EXTEND;
      p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
      p_start := p_end + c_ld;
      p_end := INSTR( i_str, i_delim, p_start );
    END LOOP;
    IF p_start <= c_len + 1 THEN
      p_result.EXTEND;
      p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
    END IF;
  END IF;
  RETURN p_result;
END;
/

然后SQL变得非常简单:

SELECT t.id,
       v.column_value AS value
FROM   TBL1 t,
       TABLE( split_String( t.value ) ) v

答案 4 :(得分:0)

--converting row of data into comma sepaerated string
SELECT
    department_id,
    LISTAGG(first_name, ',') WITHIN GROUP(
        ORDER BY
            first_name
    ) comma_separted_data
FROM
    hr.employees
GROUP BY
    department_id;

--comma-separated string into row of data

CREATE TABLE t (
    deptno          NUMBER,
    employee_name   VARCHAR2(255)
);

INSERT INTO t VALUES (
    10,
    'mohan,sam,john'
);

INSERT INTO t VALUES (
    20,
    'manideeep,ashok,uma'
);

INSERT INTO t VALUES (
    30,
    'gopal,gopi,manoj'
);

SELECT
    deptno,
    employee_name,
    regexp_count(employee_name, ',') + 1,
    regexp_substr(employee_name, '\w+', 1, 1)
FROM
    t,
    LATERAL (
        SELECT
            level l
        FROM
            dual
        CONNECT BY
            level < regexp_count(employee_name, ',') + 1
    );

DROP TABLE t;