SQL Split函数处理字符串,文本限定符之间出现分隔符?

时间:2010-05-19 14:01:39

标签: sql tsql user-defined-functions

有几种SQL分割函数,从循环驱动到使用xml命令,甚至使用数字表。我没有找到支持文本限定符的那个。

使用下面的示例字符串,我想拆分“,”,但不会出现在双引号或单引号之间。

示例数据:

jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>

应该返回一个表:

jsmith@anywhere.com
"Sally \"Heat\" Jones" <sally@anywhere.com>
"Mark Jones" <mjones@anywhere.com>
"Stone, Ron" <rstone@anywhere.com>

我知道这是一个复杂的查询/功能,但任何建议或任何指导都会非常感激。

3 个答案:

答案 0 :(得分:1)

CREATE FUNCTION [dbo].[udfSplit]
(
    @nvcString nvarchar(max),
    @nvcDelimiter nvarchar(1),
    @nvcTQ nvarchar(1)
)
RETURNS @tblTokens TABLE (
                            Token nvarchar(max)
                            )
AS
BEGIN

    DECLARE @intCounter int
    DECLARE @nvcToken nvarchar(4000)
    DECLARE @nvcCurrentChar nvarchar(1)
    DECLARE @intStart int

    IF @nvcString <> ''
        BEGIN
            SET @intCounter = 1
            SET @nvcToken = ''
            SET @intStart = 0

            --Loop through each character of the string
            WHILE @intCounter <= LEN(@nvcString)
                BEGIN
                    SET @nvcCurrentChar = SUBSTRING(@nvcString, @intCounter, 1)

                    --If current char is TQ
                    IF @nvcCurrentChar = @nvcTQ
                        BEGIN
                            --Concatonate to token
                            SET @nvcToken = @nvcToken + @nvcCurrentChar

                            --If this is the end TQ
                            IF @intStart <> 0
                                BEGIN
                                    --Fix TQ
                                    SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

                                    IF @nvcToken <> ''
                                        BEGIN
                                            INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
                                            SET @nvcToken = '' 
                                        END
                                    --Reset TQ
                                    SET @intStart = 0 
                                END
                            ELSE
                                BEGIN
                                    SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

                                    IF @nvcToken <> ''
                                        BEGIN
                                            INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
                                            SET @nvcToken = '' 
                                        END

                                    --Mark TQ start position
                                    SET @intStart = @intCounter  
                                END
                        END
                    ELSE IF @intStart = 0 AND @nvcCurrentChar = @nvcDelimiter
                        BEGIN
                            --If not inside TQ, and char is Delimiter
                            SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

                            IF @nvcToken <> ''
                                BEGIN
                                    INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
                                    SET @nvcToken = '' 
                                END
                        END
                    ELSE
                        BEGIN
                            --Current char is not TQ or Delim, add to current token
                            SET @nvcToken = @nvcToken + @nvcCurrentChar
                        END

                    SET @intCounter = @intCounter + 1
                END
        END

    SET @nvcToken = dbo.udfRemoveTQFromToken(@nvcToken, @nvcTQ)

    IF @nvcToken <> ''
        BEGIN
            --Current Token has not been added to table
            INSERT INTO @tblTokens (Token) VALUES (@nvcToken)
        END

    RETURN
END

GO


CREATE FUNCTION [dbo].[udfRemoveTQFromToken]
(
    @nvcToken nvarchar(4000),
    @nvcTQ nvarchar(1)
)
RETURNS nvarchar(4000) AS
BEGIN

    DECLARE @nvcReturn nvarchar(4000)

    --Trim token, needs to be done first, 
    --as we dont want to trim any spaces within the TQ
    --unless it was malformed
    SET @nvcReturn = LTRIM(RTRIM(@nvcToken))

    --If Left char is TQ
    IF LEFT(@nvcReturn, 1) = @nvcTQ
        BEGIN
            --Though both cases perform the removal of the left most char (opening TQ)
            --We need to perform a trim after removal ONLY if it was malformed
            IF RIGHT(@nvcReturn, 1) <> @nvcTQ   
                BEGIN
                    --But no matching end TQ, malformed
                    --fix by removing left most char (the opening TQ)
                    SET @nvcReturn = RIGHT(@nvcReturn, LEN(@nvcReturn) - 1)
                    --Reapply the LTRIM, incase there were spaces after the opening TQ
                    SET @nvcReturn = LTRIM(@nvcReturn)
                END
            ELSE
                BEGIN
                    --has matching end TQ, well-formed
                    --fix by removing left most char (the opening TQ)
                    SET @nvcReturn = RIGHT(@nvcReturn, LEN(@nvcReturn) - 1)
                END
        END

    --Remove the right most char (the closing TQ)
    IF RIGHT(@nvcReturn, 1) = @nvcTQ
        SET @nvcReturn = LEFT(@nvcReturn, LEN(@nvcReturn) - 1)

    RETURN @nvcReturn
END

答案 1 :(得分:1)

这是我的解决方案:

CREATE FUNCTION fnSplitString
(
    @input nvarchar(MAX) 
)
RETURNS @emails TABLE
(
    email nvarchar(MAX) 
)
AS
BEGIN

DECLARE @len int = LEN(@input)
DECLARE @pos int = 1;
DECLARE @start int = 1;
DECLARE @ignore bit = 0;
WHILE(@pos<=@len)
BEGIN

    DECLARE @ch nchar(1) = SUBSTRING(@input, @pos, 1);

    IF ( @ch = '"' or @ch = '''')
    BEGIN
        SET @ignore = 1 - @ignore;
    END

    IF (@ch = ',' AND @ignore = 0)
    BEGIN
        INSERT @emails VALUES (SUBSTRING(@input, @start, @pos-@start));
        SET @start = @pos+1;
    END

    SET @pos = @pos + 1;
END

IF (@start<>@pos)
BEGIN
    INSERT @emails VALUES (SUBSTRING(@input, @start, @pos-@start));
END

RETURN
END
GO

DECLARE @input nvarchar(max) = 'jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>';

select * from fnSplitString(@input)

答案 2 :(得分:-1)

这是一个快速解决方案,它不完美,它没有堆栈,因此它会将引号内的逗号视为分隔符。

alter function fnSplit
(
    @Delim char(1),
    @List nvarchar(4000)
)
returns table as
return
    with 
    Strings(PosIdx) as 
    (
        select 1 
        union all 
        select PosIdx + 1 from Strings where PosIdx < 4000
    )
    select
        ltrim(rtrim(substring(@List, PosIdx, charindex(@Delim, @List + @Delim, PosIdx) - PosIdx))) as value
    from   
        Strings
    where  
        PosIdx <= convert(int, len(@List))
    and substring(@Delim + @List, PosIdx, 1) = @Delim 
go
select * from fnSplit(',', 'jsmith@anywhere.com, "Sally \"Heat\" Jones" <sally@anywhere.com>, "Mark Jones" <mjones@anywhere.com>, "Stone, Ron" <rstone@anywhere.com>') 
    option (maxrecursion 0)