SQL Strip最长公共前缀

时间:2019-10-13 09:49:28

标签: mysql sql string prefix


col1    | col2
bar     | foo
foo     | foobar
bar1foo | bar2foo


  `col1` varchar(20) COLLATE latin1_general_ci NOT NULL,
  `col2` varchar(20) COLLATE latin1_general_ci NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;

INSERT INTO `tbl1` (`col1`, `col2`) VALUES
('bar', 'foo'),
('foo', 'foobar'),
('bar1foo', 'bar2foo');


bar  | foo
     | bar
1foo | 2foo


SUBSTR(`col1`, 1+GREATEST(LENGTH(`col1`), LENGTH(`col2`)) - CEIL(LENGTH(TRIM(TRAILING '0' FROM HEX(ABS(CONV(HEX(REVERSE(`col1`)),16,10) - CONV(HEX(REVERSE(`col2`)),16,10)))))/2)),
SUBSTR(`col2`, 1+GREATEST(LENGTH(`col1`), LENGTH(`col2`)) - CEIL(LENGTH(TRIM(TRAILING '0' FROM HEX(ABS(CONV(HEX(REVERSE(`col1`)),16,10) - CONV(HEX(REVERSE(`col2`)),16,10)))))/2))
FROM tbl1

简短说明:将字符串反转(REVERSE),将其转换为整数(HEXCONV),并相减(-和{{1} }),转换为十六进制表示形式(ABS,从结尾(HEX开始修剪0,从最长字符串的长度中减去此结果的长度( TRIM-LENGTH),然后由GREATEST用于获取结果。


  • 不适用于长度超过64位的字符串。
  • 不适用于包含多字节字符的字符串
  • 很长很丑
  • 性能不好。

2 个答案:

答案 0 :(得分:1)


  substring(t.col1, g.maxlen + 1) col1, 
  substring(t.col2, g.maxlen + 1) col2
from tbl1 t inner join (
  select t.col1, t.col2,
    max(case when left(col1, tt.n) = left(col2, tt.n) then tt.n else 0 end) maxlen
  from tbl1 t inner join (
    select 1 n union all select 2 union all  select 3 union all  select 4 union all 
    select 5 union all  select 6 union all  select 7 union all  select 8 union all  
    select 9 union all  select 10 union all  select 11 union all  select 12 union all 
    select 13 union all  select 14 union all  select 15 union all  select 16 union all 
    select 17 union all  select 18 union all  select 19 union all  select 20
  ) tt on least(length(t.col1), length(t.col2)) >= tt.n 
  group by t.col1, t.col2
) g on g.col1 = t.col1 and g.col2 = t.col2   

对于 MySql 8.0 + ,您可以使用recursive CTE,在这种情况下,无需事先了解列的长度:

  recursive lengths as (
    select 1 n
    union all
    select n + 1
    from lengths
    where n < (select max(least(length(col1), length(col2))) from tbl1)
  cte as (
    select t.col1, t.col2,
      max(case when left(col1, l.n) = left(col2, l.n) then l.n else 0 end) maxlen
    from tbl1 t inner join lengths l      
    on least(length(t.col1), length(t.col2)) >= l.n 
    group by t.col1, t.col2                                
  substring(t.col1, c.maxlen + 1) col1, 
  substring(t.col2, c.maxlen + 1) col2
from tbl1 t inner join cte c 
on c.col1 = t.col1 and c.col2 = t.col2  


| col1 | col2 |
| ---- | ---- |
|      | bar  |
| bar  | foo  |
| 1foo | 2foo |

答案 1 :(得分:1)


select substr(col1, prefix_length + 1),
       substr(col2, prefix_length + 1)
from (select tbl1.*,
             (case when left(col1, 10) = left(col2, 10) then 10
                   when left(col1, 9) = left(col2, 9) then 9
                   . . .
                   else 0
              end) as prefix_length
      from tbl1
     ) t;


with recursive cte as (
      select col1, col2, 1 as lev, col1 as orig_col1, col2 as orig_col2
      from tbl1
      union all
      select substr(col1, 2), substr(col2, 2), lev + 1, orig_col1, orig_col2
      from cte
      where left(col1, 1) = left(col2, 1)
select col1, col2
from (select cte.*,
             dense_rank() over (partition by orig_col1, orig_col2 order by lev desc) as seqnum
      from cte
     ) x
where seqnum = 1;


Here是db <>两种解决方案的提琴。