如何在字符串中查找最长的字母序列

时间:2012-09-22 10:18:27

标签: sql-server-2008 tsql set-based

我想找出字符串中最长的字母序列

e.g。在单词Honorificabcdwert中,输出将为abcd

我该怎么办?

我的想法是获取Ascii然后计算序列,直到它在某个时刻中断。但我只能继续

DECLARE @t TABLE(ID INT IDENTITY,String VARCHAR(100))
INSERT INTO @t SELECT 'Honorificabcdwert'

;with Get_Individual_Chars_Cte AS
( 
   SELECT 
        ID
        ,Row_ID =ROW_NUMBER() Over(PARTITION by ID Order by ID) 
        ,SUBSTRING(String,Number,1) AS [Char]
        ,ASCII(SUBSTRING(String,Number,1)) AS [Ascii Value]

FROM @t  
INNER JOIN master.dbo.spt_values ON
 Number BETWEEN 1 AND LEN(String)
 AND type='P'

)

Select * from Get_Individual_Chars_Cte 

在此之后我不知道该怎么做。这种或任何其他方式需要帮助。

2 个答案:

答案 0 :(得分:2)

这会有帮助吗

DECLARE @t TABLE(ID INT IDENTITY,String VARCHAR(100))
INSERT INTO @t 
SELECT 'Honorificabcdwert' UNION ALL
SELECT 'AbCdEfxy' UNION ALL
SELECT 'abc1234defg' UNION ALL
SELECT 'XYZABCPPCKLMIDBABC' UNION ALL
SELECT 'MNOP$%^&~()MNOPQRS;:'


SELECT ID, OriginalString,Sequence
FROM (SELECT ID, REPLACE(string,'%','') AS Sequence,OriginalString,
      ROW_NUMBER() OVER(PARTITION BY ID ORDER BY LEN(string) DESC, string) AS rn
      FROM (SELECT OriginalString = b.String, CASE WHEN b.String LIKE a.strings THEN a.strings ELSE NULL END AS string, 
            b.ID, ROW_NUMBER() OVER(PARTITION BY ID ORDER BY LEN(strings) DESC, strings) AS rn
            FROM (SELECT COALESCE('%' + b.strings+a.strings + '%','%' + a.strings + '%') AS strings
                  FROM (SELECT SUBSTRING('ABCDEFGHIJKLMNOPQRSTUVWXYZ',t1.N,t2.N-t1.N+1) AS strings, t1.N
                        FROM (VALUES(1),(2),(3),(4),(5),(6),(7),(8),
                                    (9),(10),(11),(12),(13),(14),(15),
                                    (16),(17),(18),(19),(20),(21),(22),
                                    (23),(24),(25),(26)) t1(N)
                        CROSS JOIN (VALUES(1),(2),(3),(4),(5),(6),(7),(8),
                                          (9),(10),(11),(12),(13),(14),(15),
                                          (16),(17),(18),(19),(20),(21),(22),
                                          (23),(24),(25),(26)) t2(N)
                        WHERE t1.N <= t2.N) a
                  LEFT OUTER JOIN (SELECT REVERSE(SUBSTRING('ZYXWVUTSRQPONMLKJIHGFEDCBA',1,N)) AS strings, 1 AS ID
                                   FROM (VALUES(1),(2),(3),(4),(5),(6),(7),(8),
                                               (9),(10),(11),(12),(13),(14),(15),
                                               (16),(17),(18),(19),(20),(21),(22),
                                               (23),(24),(25),(26)) t1(N)
                                   UNION ALL SELECT '', 1) b ON a.N = b.ID) a
            CROSS JOIN @t b) a ) a
WHERE a.rn = 1
ORDER BY a.ID

<强>结果

ID  OriginalString          Sequence
1   Honorificabcdwert       ABCD
2   AbCdEfxy                ABCDEF
3   abc1234defg             DEFG
4   XYZABCPPCKLMIDBABC      XYZABC
5   MNOP$%^&~()MNOPQRS;:    MNOPQRS

根据您在与@Martin Smith讨论过程中提供的输入,该程序正在开发中。请测试一下,如果它符合您的要求,请告诉我。

答案 1 :(得分:0)

对于字符顺序上升的连续行(在此处使用ASCII顺序等同字母顺序)ROW_NUMBER() OVER (ORDER BY Row_ID) - [Ascii Value]将是相同的。

这对于将ABCZE放在与E相同的组中的字符串ABC本身是不够的,因此您需要进行第二次操作以找到其中的空白分组序列。

以下内容应该这样做。

DECLARE @t TABLE(ID INT IDENTITY,String VARCHAR(100))
INSERT INTO @t SELECT 'Honorificabcdwfrt'

;with Get_Individual_Chars_Cte AS
( 
   SELECT 
        ID
        ,Row_ID =ROW_NUMBER() Over(PARTITION by ID Order by ID) 
        ,SUBSTRING(String,number,1) AS [Char]
        ,ASCII(SUBSTRING(String,number,1)) AS [Ascii Value]

FROM @t  
INNER JOIN master.dbo.spt_values ON
 number BETWEEN 1 AND LEN(String)
 AND type='P'
)
, T1 AS
(
Select *,
       ROW_NUMBER() OVER (ORDER BY Row_ID) - [Ascii Value] AS RN
 from Get_Individual_Chars_Cte 
 ), T2 AS
 (
 SELECT *,
        ROW_NUMBER() OVER (ORDER BY Row_ID) - 
        ROW_NUMBER() OVER (PARTITION BY RN ORDER BY Row_ID) AS Grp
 FROM T1
 )
 SELECT TOP 1 WITH TIES *
 FROM T2
 ORDER BY  COUNT(*) OVER (PARTITION BY RN, Grp) DESC