SQL Server:在某些字符后检查大写或小写

时间:2017-06-04 10:40:26

标签: sql sql-server tsql split sql-server-2012

我有像'John is my name; Ram is my name; Adam is my name'这样的数据。

我的规则是;之后的每个首字母都应该是大写字母。

如何选择满足规则的所有值?

6 个答案:

答案 0 :(得分:2)

您可以使用像这样的XML技巧将其拆分

DECLARE @YourString VARCHAR(100)='John is my name; Ram is my name; Adam is my name';
WITH Splitted AS
(
    SELECT CAST('<x>' + REPLACE((SELECT REPLACE(@YourString,'; ','$$SplitHere$$') AS [*] FOR XML PATH('')),'$$SplitHere$$','</x><x>')+ '</x>' AS XML) AS Casted
)
,DerivedTable AS
(
    SELECT  ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS PartNr
           ,x.value(N'text()[1]',N'nvarchar(max)') AS Part
    FROM Splitted
    CROSS APPLY Casted.nodes(N'/x') AS X(x)
)
SELECT PartNr
      ,Part
      ,CASE WHEN ASCII(LEFT(Part,1)) BETWEEN ASCII('A') AND ASCII('Z') THEN 1 ELSE 0 END AS FirstIsCapital
FROM DerivedTable;

结果

Nr  Part            FirstLetterIsCaptial
---------------------------------------- 
1   John is my name     1
2   Ram is my name      1
3   Adam is my name     1

我不知道你的最终目标是什么......找到第一个字母不是资本的部分?确保您的规则已满了?

然而:
最好的是,使用它来纠正您的设计并将这些部分放在1:n相关的边桌中。

答案 1 :(得分:1)

有点丑陋的解决方案,但你可以尝试一下......

Declare @str nvarchar(max) = 'John is my name; Ram is my name; Adam is my name'

Declare @xml as xml
Set @xml = cast(('<X>'+replace(@str,';' ,'</X><X>')+'</X>') as xml)
Select * from (
    Select RowN = Row_Number() over (order by (SELECT NULL)), LTrim(RTrim(N.value('.', 'nvarchar(MAX)'))) as value FROM @xml.nodes('X') as T(N) -- this is to split if you are using sql server 2016 you can use string_Split
) a
Where unicode(substring(a.[value],1,1)) = unicode(upper(substring(a.[value],1,1)))

想法是拆分字符串并检查unicode值以查看它是否为上层

答案 2 :(得分:1)

注意:标准方法是使用C#/ VB [.Net]

在app中执行此操作

[1]如果你试图找到第一个字母(不是第一个字母)没有大写的所有句子,那么我会尝试以下解决方案:

DECLARE @Source NVARCHAR(100) = N'john is my name; Ram is my name; adam is my name'

SELECT z.Sentence
FROM (VALUES ( CONVERT(XML, N'<root><i>' + REPLACE(@Source, N';', N'</i><i>;') + N'</i></root>') )) AS x(XmlCol)
CROSS APPLY x.XmlCol.nodes(N'/root/i') AS y(XmlCol)
CROSS APPLY ( VALUES( y.XmlCol.value('(text())[1]', 'NVARCHAR(100)') ) ) AS z(Sentence)
WHERE SUBSTRING(z.Sentence, NULLIF(PATINDEX('%[a-z]%', z.Sentence), 0), 1) LIKE '%[a-z]%' COLLATE Latin1_General_BIN
ORDER BY ROW_NUMBER() OVER(ORDER BY y.XmlCol)

在这种情况下,结果将是

john is my name
; adam is my name

[2]如果你试图将每个句子中的第一个字母大写那么我会使用以下解决方案(见行末尾的评论):

DECLARE @Source NVARCHAR(100) = N'john is my name; ram is my name; adam is my name'

SELECT (
    SELECT u.NewSentence AS '*'
    FROM (VALUES ( CONVERT(XML, N'<root><i>' + REPLACE(@Source, N';', N';</i><i>') + N'</i></root>') )) AS x(XmlCol) -- It convert source string into XML. Every ; acct as a delimiter for sentence. End results will be like this <root><i>john...;</i><i> ram ....</i>...</root>
    CROSS APPLY x.XmlCol.nodes(N'/root/i') AS y(XmlCol) -- It decompose original XML into separate sentences as XML
    CROSS APPLY ( VALUES( y.XmlCol.value('(text())[1]', 'NVARCHAR(100)') ) ) AS z(Sentence) -- ... AS NVARCHAR(100)
    CROSS APPLY ( VALUES( PATINDEX('%[a-z]%', z.Sentence) )) AS t(FirstLetterIndex) -- It finds index of first letter
    CROSS APPLY ( VALUES( IIF(t.FirstLetterIndex > 0, STUFF(z.Sentence, t.FirstLetterIndex, 1, UPPER(SUBSTRING(z.Sentence, t.FirstLetterIndex, 1))), z.Sentence) )) AS u(NewSentence) -- It replace every first letter with the capitalized version / UPPER(...)
    ORDER BY ROW_NUMBER() OVER(ORDER BY y.XmlCol) -- All sentences should be ordered by original position within source string
    FOR XML PATH('') -- It concatenates all sentences back in one string
)

例如,如果源字符串为N'john is my name; ram is my name; adam is my name',那么结果将为N'John is my name; Ram is my name; Adam is my name'

<强> Demo

注意:如果源字符串不包含某些XML保留字符(例如<),则此解决方案(以及基于XML碎化的所有其他解决方案)都有效。如果这是你的情况,请告诉我。

答案 3 :(得分:1)

你可以创建这样的函数。

Create FUNCTION SPLITTER (    
    @textData NVARCHAR(MAX),
    @Delimeter NVARCHAR(MAX) ) RETURNS @RtnValue TABLE  (
    Data NVARCHAR(MAX) )  AS BEGIN 
    DECLARE @index INT  DECLARE @data nvarchar(1000)    DECLARE @firstCharacter char
    SET @index = CHARINDEX(@Delimeter,@textData)

    WHILE (@index>0)
    BEGIN
                set @data = LTRIM(RTRIM(SUBSTRING(@textData, 1, @index - 1)))       set @firstCharacter = SUBSTRING(@data,1,1);
                if UNICODE(@firstCharacter) = UNICODE(upper(@firstCharacter))       begin           INSERT INTO @RtnValue (data) SELECT @data       end;

        SET @textData = SUBSTRING(@textData, @index + DATALENGTH(@Delimeter) / 2, LEN(@textData))

        SET @index = CHARINDEX(@Delimeter, @textData)
    END
        set @data = @textData   set @firstCharacter = SUBSTRING(@data,1,1);
            if UNICODE(@firstCharacter) = UNICODE(upper(@firstCharacter))   begin       INSERT INTO @RtnValue (data) SELECT @data   end;

    RETURN END

像这样使用

  

从SPLITTER中选择*(&#39; John是我的名字; Ram是我的名字; Adam是我的名字&#39;,&#39 ;;&#39;)

答案 4 :(得分:1)

其他答案显示如何将行转换为与您的模式匹配的内容。

如果您只想select与您所描述的模式匹配的行,则可以使用patindex()like进行区分大小写的排序(或使用collate申请一个)。

这假定除了规则之外,分号后面的每个字母必须是大写字母,第一个字母也应该是大写字母。如果不是这种情况,只需删除where中的第一个子句。

select *
from t
where patindex('[ABCDEFGHIJKLMNOPQRSTUVWXYZ]%', val collate latin1_general_cs_as) = 1
  and patindex('%; [^ABCDEFGHIJKLMNOPQRSTUVWXYZ]%', val collate latin1_general_cs_as) = 0
select *
from t
where val collate latin1_general_cs_as like '[ABCDEFGHIJKLMNOPQRSTUVWXYZ]%'
  and val collate latin1_general_cs_as not like '%; [^ABCDEFGHIJKLMNOPQRSTUVWXYZ]%'

测试设置:

create table t (id int not null identity(1,1),val varchar(256))
insert into t values 
 ('John is my name; Ram is my name; Adam is my name')
,('john is my name; ram is my name; adam is my name')

rextester演示:http://rextester.com/DBGIS10645

以上两种回归:

+----+--------------------------------------------------+
| id |                       val                        |
+----+--------------------------------------------------+
|  1 | John is my name; Ram is my name; Adam is my name |
+----+--------------------------------------------------+

答案 5 :(得分:1)

您可以抓取NGrams8K的副本并执行此操作:

-- note that I made the 3rd item start with lower-case
DECLARE @YourString VARCHAR(100)='John is my name; Ram is my name; adam is my name';

WITH D(n) AS
(
  SELECT 0 UNION ALL SELECT position 
  FROM dbo.NGrams8k(@yourstring,1) WHERE token = ';'
),
TOKEN(token) AS
(
  SELECT LTRIM(SUBSTRING(@YourString, N+1, 
           ISNULL(NULLIF(CHARINDEX(';', @YourString, N+1),0), 101)-(N+1)))
  FROM D
)
SELECT token,
       FirstLetterIsCaptial = IIF(ASCII(SUBSTRING(token,1,1)) BETWEEN 65 AND 90, 1, 0)
FROM TOKEN;

<强>结果

token              FirstLetterIsCaptial
------------------ --------------------
John is my name    1
Ram is my name     1
adam is my name    0