计算两个字符串oracle中的顺序匹配单词

时间:2013-10-09 11:05:32

标签: sql oracle stored-procedures

我想要一个返回两个字符串中单词的顺序匹配数的查询 例如:

Id  column1               column2     result   
1   'foo bar live'        'foo bar'       2  
2   'foo live tele'       'foo tele'      1  
3   'bar foo live'        'foo bar live'  0 

获取我正在使用的总发生次数:

select id, column1,column2,
extractvalue(dbms_xmlgen.getxmltype('select cardinality (
  sys.dbms_debug_vc2coll(''' || replace(lower(column1), ' ', ''',''' ) || ''') multiset intersect
  sys.dbms_debug_vc2coll('''||replace(lower(column2), ' ', ''',''' )||'''))  x from dual'), '//text()') cnt
from table.

任何人都可以建议在类似的行上查询顺序匹配,因为我想要连续匹配的数量和一起显示的出现次数。

3 个答案:

答案 0 :(得分:3)

就个人而言,在这种情况下,我会选择PL / SQL代码而不是普通的SQL。类似的东西:

包装规格:

create or replace package PKG is
  function NumOfSeqWords(
    p_str1 in varchar2,
    p_str2 in varchar2
  ) return number;
end;

包体:

create or replace package body PKG is
  function NumOfSeqWords(
    p_str1 in varchar2,
    p_str2 in varchar2
  ) return number
  is
    l_str1     varchar2(4000) := p_str1;
    l_str2     varchar2(4000) := p_str2;
    l_res      number  default 0;
    l_del_pos1 number;
    l_del_pos2 number;
    l_word1    varchar2(1000);
    l_word2    varchar2(1000);
  begin
    loop
      l_del_pos1 := instr(l_str1, ' ');
      l_del_pos2 := instr(l_str2, ' ');
      case l_del_pos1
        when 0 
        then l_word1 := l_str1;
             l_str1 := ''; 
        else l_word1 := substr(l_str1, 1, l_del_pos1 - 1);
      end case;
      case l_del_pos2
        when 0 
        then l_word2 := l_str2;
             l_str2 := ''; 
        else l_word2 := substr(l_str2, 1, l_del_pos2 - 1);
      end case;
      exit when (l_word1 <> l_word2) or 
                ((l_word1 is null) or (l_word2 is null));

      l_res := l_res + 1;
      l_str1 := substr(l_str1, l_del_pos1 + 1);
      l_str2 := substr(l_str2, l_del_pos2 + 1);
    end loop;
    return l_res;
  end;
end;

测试用例:

 with t1(Id1, col1, col2) as(
   select 1, 'foo bar live'  ,'foo bar'     from dual union all
   select 2, 'foo live tele' ,'foo tele'    from dual union all
   select 3, 'bar foo live'  ,'foo bar live'from dual
  )
  select id1
       , col1
       , col2
       , pkg.NumOfSeqWords(col1, col2) as res
    from t1
  ;

结果:

       ID1 COL1          COL2                RES
---------- ------------- ------------ ----------
         1 foo bar live  foo bar               2
         2 foo live tele foo tele              1
         3 bar foo live  foo bar live          0

答案 1 :(得分:3)

为什么要放弃查询方法。我知道它有点复杂,我希望有人可以通过它来改进它,但是在业余时间做这件事我能够度过一个下午的电话......

此处SQLFidlle

SELECT Table1.id,
       Table1.column1,
       Table1.column2,
       max(nvl(t.l,0)) RESULT
FROM (
  SELECT  id,
          column1,
          column2,
          LEVEL l,
          decode(LEVEL,
                     1,
                 substr(column1, 1, instr(column1,' ', 1, LEVEL) -1),
                 substr(column1, 1, (instr(column1,' ', 1, LEVEL )))
                 )  sub1,
          decode(LEVEL,
                     1,
                 substr(column2, 1, instr(column2,' ', 1, LEVEL) -1),
                 substr(column2, 1, (instr(column2,' ', 1, LEVEL )))
                 )  sub2

     FROM (SELECT id,
                  column1 || ' ' column1,
                  column2 || ' ' column2
             FROM Table1)
    WHERE  decode(LEVEL,
                      1,
                  substr(column1, 1, instr(column1,' ', 1, LEVEL) -1),
                  substr(column1, 1, (instr(column1,' ', 1, LEVEL )))
                 )  =
           decode(LEVEL,
                      1,
                  substr(column2, 1, instr(column2,' ', 1, LEVEL) -1),
                  substr(column2, 1, (instr(column2,' ', 1, LEVEL )))
                 )
  START WITH column1 IS NOT NULL
  CONNECT BY instr(column1,' ', 1, LEVEL) > 0
  ) t
RIGHT OUTER JOIN Table1 ON trim(t.column1) = Table1.column1
                       AND trim(t.column2) = Table1.column2
                       AND t.id = Table1.id
GROUP BY  Table1.id,
          Table1.column1,
          Table1.column2
ORDER BY  max(nvl(t.l,0)) DESC

答案 2 :(得分:0)

我知道这个问题很老,但我找到了一个很好的解决方案:

您可以从这里进行测试 https://rextester.com/l/oracle_online_compiler

select
id1,
col1,
col2,


(
 Select Count(*)

 From

 (Select Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) w1
  From xmltable(('"' || Replace(Replace(col1,' ', ','), ',', '","') || '"'))
      Where Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) Is Not Null) c1,

 (Select Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) w2
  From xmltable(('"' || Replace(Replace(col2,' ', ','), ',', '","') || '"'))
  Where Upper(To_Char(Trim(Substr(Column_Value,0,Length(Column_Value))))) Is Not Null) c2

 Where c1.w1 = c2.w2


 ) Test


From 
(select 1 Id1, 'foo bar live' col1, 'foo bar' col2  from dual union all
 select 2, 'foo live tele pepe gato coche' ,'bar foo live tele perro gato' from dual union all
 select 3, 'bar foo live tele perro gato'  ,'foo bar live'from dual) t1