如何在Oracle中的句子之间找到确切的单词?

时间:2019-09-10 19:11:55

标签: sql oracle pattern-matching

如果单词在“单词”列中退出,我正在尝试替换句子中的单词。下面的代码替换并更新回到表,但它也替换了单词之间的单词。如何只寻找要替换的确切字词?

例如:它将testing中的automtestingation替换为ID。我只是在寻找automtestingation 2

Create table temp(id NUMBER,
word VARCHAR2(1000),
Sentence VARCHAR2(2000));

insert into temp(1,'automation testing','automtestingation testing is popular kind of testing');
insert into temp(2,'testing','manual testing');
insert into temp(3,'manual testing','this is an old method of testing');

merge into temp o
using (
  select s_rid, sentence, is_last from (
    select s.rowid s_rid, w.id word_id, w.word,
      cast(replace(s.sentence, w.word, w.id) as varchar2(4000)) sentence,
      length(w.word) word_length
    from temp w join temp s
    on instr(s.sentence, w.word) > 0
  )
  model
    partition by (s_rid)
    dimension by (
      row_number() over(partition by s_rid order by word_length desc, word) rn
    )
    measures(word_id, word, sentence, 0 is_last)
  rules (
    sentence[rn > 1] = replace(sentence[cv()-1], word[cv()], word_id[cv()]),
    is_last[any] = presentv(is_last[cv()+1], 0, 1)
  )
) n
on (o.rowid = n.s_rid and n.is_last = 1)
when matched then update set o.sentence = n.sentence;

在这里一位同事的帮助下获得了这段代码。谢谢。

id word                   sentence
1  automation testing     automtestingation 2 is popular kind of 2
2  testing                3
3  manual testing         this is an old method of 2

2 个答案:

答案 0 :(得分:1)

Oracle设置

Create table temp(
  id       NUMBER,
  word     VARCHAR2(1000),
  Sentence VARCHAR2(2000)
);

insert into temp
SELECT 1,'automation testing', 'automtestingation testing is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL;

合并

MERGE INTO temp dst
USING (
  WITH ordered_words ( rn, id, word ) AS (
    SELECT ROW_NUMBER() OVER ( ORDER BY LENGTH( word ) ASC, word DESC ),
           id,
           word
    FROM   temp
  ),
  sentences ( rid, sentence, rn ) AS (
    SELECT ROWID,
           sentence,
           COUNT(*) OVER () + 1
    FROM temp
  UNION ALL
    SELECT s.rid,
           REGEXP_REPLACE(
             REGEXP_REPLACE(
               s.sentence,
               '(^|[^a-z])' || w.word || '($|[^a-z])',
               '\1' || w.id || '\2',
               1,
               0,
               'i'
              ),
             '(^|[^a-z])' || w.word || '($|[^a-z])',
             '\1' || w.id || '\2',
             1,
             0,
             'i'
           ),
           s.rn - 1
    FROM   sentences s
           INNER JOIN ordered_words w
           ON ( s.rn - 1 = w.rn )
  )
  SELECT rid, sentence
  FROM   sentences
  WHERE  rn = 1
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
  UPDATE
  SET    sentence = src.sentence;

输出

ID | WORD               | SENTENCE                                
-: | :----------------- | :---------------------------------------
 1 | automation testing | automtestingation 2 is popular kind of 2
 2 | testing            | 3                                       
 3 | manual testing     | this is an old method of 2              
 4 | punctuation        | 1,3,4,automanual 2-2                    

db <>提琴here


或者,您也可以修改MERGEMODEL子句使用相同的技术:

MERGE INTO temp o
USING (
  SELECT s_rid,
         sentence,
         is_last
  FROM   (
    SELECT s.rowid AS s_rid,
           w.id    AS word_id,
           w.word,
           CAST(
             REGEXP_REPLACE(
               REGEXP_REPLACE(
                 s.sentence,
                 '(^|\W)' || w.word || '($|\W)',
                 '\1' || w.id || '\2'
               ),
               '(^|\W)' || w.word || '($|\W)',
               '\1' || w.id || '\2'
             )
             as varchar2(4000)
           ) sentence,
           length(w.word) word_length
    FROM   temp w
           JOIN temp s
           ON REGEXP_LIKE(
             s.sentence,
             '(^|\W)' || w.word || '(\W|$)'
           )
  )
  model
    partition by (s_rid)
    dimension by (
      row_number() over(partition by s_rid order by word_length desc, word) rn
    )
    measures(word_id, word, sentence, 0 is_last)
  rules (
    sentence[rn > 1] = REGEXP_REPLACE(
                         REGEXP_REPLACE(
                           sentence[cv()-1],
                           '(^|\W)' || word[cv()] || '($|\W)',
                           '\1' || word_id[cv()] || '\2'
                         ),
                         '(^|\W)' || word[cv()] || '($|\W)',
                         '\1' || word_id[cv()] || '\2'
                       ),
    is_last[any] = presentv(is_last[cv()+1], 0, 1)
  )
) n
on (o.rowid = n.s_rid and n.is_last = 1)
when matched then update set o.sentence = n.sentence;

db <>提琴here

答案 1 :(得分:0)

怎么样?

起点:

SQL> select * from temp;

 ID WORD                 SENTENCE
--- -------------------- ------------------------------------------------------------
  1 automation testing   automtestingation testing is popular kind of testing
  2 testing              manual testing
  3 manual testing       this is an old method of testing

SQL>

现在,

  • 进行自我加入
  • 检查句子
  • 中是否存在单词instr
  • 如果是这样,请将其替换为ID

以下是查询:

SQL> merge into temp t
  2    using (with
  3           test as
  4             (select a.id aid, a.sentence, b.word, b.id bid,
  5                     instr(a.sentence, b.word) ins
  6              from temp a join temp b on a.id <> b.id
  7             )
  8           select aid, sentence, word,
  9             regexp_replace(sentence,
 10                            '(^|\s|\W)' || word ||'($|\s|\W)',
 11                            case when ins = 1 then to_char(bid)
 12                                 else ' '||bid||' '
 13                            end) result
 14           from test
 15           where ins > 0
 16          ) x
 17  on (x.aid = t.id)
 18  when matched then update set t.sentence = x.result;

3 rows merged.

结果:

SQL> select * from temp;

 ID WORD                 SENTENCE
--- -------------------- ------------------------------------------------------------
  1 automation testing   automtestingation 2 is popular kind of 2
  2 testing              3
  3 manual testing       this is an old method of 2

SQL>