我正在寻找一种方法来找出字符串中最大字符重复的值。
例如:
String NMCR
-----------------------
akhsdjjjaajjj 6
AABBDDDDDDD 7
答案 0 :(得分:7)
我的尝试,以及CTE突出显示的步骤:
with data as (select 'akhsdjjjaajjj' txt from dual
union all
select 'AABBDDDDDDD' txt from dual
),
chars as(select txt,substr(txt,lvl, 1) c, lvl
from data join (select level lvl from dual connect by level < 1000)
on length(data.txt) >= lvl
),
counts as (select txt, c, count(*) cnt
from chars
group by txt, c
)
select txt, max(cnt)
from counts
group by txt;
结果:
TXT MAX(CNT)
akhsdjjjaajjj 6
AABBDDDDDDD 7
答案 1 :(得分:6)
奇怪的要求,但这是一种方式:
create or replace
function max_repetetive_letter_count (string varchar2) return integer
is
letter_col SYS.KU$_VCNT := SYS.KU$_VCNT(); -- A handy collection type
l_max_count integer;
begin
letter_col.extend(length(string));
for i in 1..length(string) loop
letter_col(i) := substr(string,i,1);
end loop;
select max(letter_count)
into l_max_count
from
( select column_value, count(*) letter_count
from table(letter_col)
group by column_value
);
return l_max_count;
end;
/
使用示例:
SQL> select string, max_repetetive_letter_count(string)
2 from
3 ( select 'ajkhsdjjjaajjj' as string from dual
4 union all
5 select 'AABBDDDDDDD' as string from dual
6 );
STRING MAX_REPETETIVE_LETTER_COUNT(STRING)
-------------- -----------------------------------
ajkhsdjjjaajjj 7
AABBDDDDDDD 7
(注意你的例子中的6是不正确的!)
答案 2 :(得分:5)
您也可以使用connect by
子句:
SELECT cad,
( SELECT MAX (REGEXP_COUNT (cad, SUBSTR (cad, LEVEL, 1)))
FROM DUAL
CONNECT BY LEVEL < LENGTH (cad))
FROM (SELECT 'akhsdjjjaajjj' cad FROM DUAL
UNION ALL
SELECT 'AABBDDDDDDD' FROM DUAL)
我希望这也有帮助。
答案 3 :(得分:3)
基于问题的原始版本(仅计算重复的字符):
Oracle安装程序:
CREATE TABLE test ( id, string ) AS
SELECT 1, 'ajkhsdjjjaajjj' FROM DUAL UNION ALL
SELECT 2, 'AABBDDDDDDD' FROM DUAL;
<强>查询强>:
SELECT id,
matched_character,
frequency
FROM (
SELECT id,
matched_character,
SUM( number_of_repeats ) AS frequency,
RANK() OVER ( PARTITION BY ID ORDER BY SUM( number_of_repeats ) DESC ) AS rnk
FROM (
SELECT id,
REGEXP_SUBSTR( string, '(.)\1+', 1, COLUMN_VALUE, NULL, 1 ) AS matched_character,
LENGTH( REGEXP_SUBSTR( string, '(.)\1+', 1, COLUMN_VALUE ) ) AS number_of_repeats
FROM test t,
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.string, '(.)\1+' )
)
AS SYS.ODCINUMBERLIST
)
) v
)
GROUP BY id, matched_character
)
WHERE rnk = 1;
<强>结果:
ID MATCHED_CHARACTER FREQUENCY
--- ------------------ ----------
1 j 6
2 D 7
已更新 - 针对已修改的问题(统计所有字符):
<强>查询强>:
SELECT id,
matched_character,
frequency
FROM (
SELECT id,
SUBSTR( string, COLUMN_VALUE, 1 ) AS matched_character,
COUNT(1) AS frequency,
RANK() OVER ( PARTITION BY ID ORDER BY COUNT(1) DESC ) AS rnk
FROM test t,
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= LENGTH( t.string )
)
AS SYS.ODCINUMBERLIST
)
) v
GROUP BY id, SUBSTR( string, COLUMN_VALUE, 1 )
)
WHERE rnk = 1;
<强>结果:
ID MATCHED_CHARACTER FREQUENCY
---------- ----------------- ----------
1 j 7
2 D 7