SQL字符替换功能扩展

时间:2014-10-23 07:08:16

标签: sql sql-server replace sql-function

我正在开发一个程序,我想编写一个接受以下格式值的函数

"AAAA BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH"

我想用" _"替换上面的空格。并且需要一个输出,如下所示格式(请注意,这只适用于双引号内的字符串)

"AAAA_BBBB"  CCCC DDDD EEEE "FFFF_GGGG_HHHH"

所以I got the correct answer to this in my previous question

DECLARE @In VARCHAR(50) = '"AAAA BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH"'

DECLARE @Quote SMALLINT = -1, @Index INT = 1, @Char CHAR(1)
WHILE @Index <= LEN(@In) BEGIN
    SET @Char = SUBSTRING(@In, @Index, 1)
    IF @Char = '"'
        SET @Quote = @Quote * -1
    IF @Char = ' ' AND @Quote > 0
        SET @In = STUFF(@In, @Index, 1, '_')
    SET @Index = @Index + 1
END

PRINT @In

现在我想用以下要求修改它

使用所需的格式,您可以注意到,下划线应该替换为仅在封闭的双引号内的空格,但现在如果用户输入如下,则输出错误

"AAAA BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc

它输出为

"AAAA_BBBB"  CCCC DDDD EEEE "FFFF_GGGG_HHHH_cccc

但它必须仅为第一个封闭的双引号字符串提供输出,因为其他一个未关闭,预期输出应如下所示

"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc

3 个答案:

答案 0 :(得分:2)

请参阅此代码。 我认为这段代码会给出一些提示。 此代码不是经过测试的代码。我发帖回答因为至少你得到一些提示来找到你的解决方案。

DECLARE @In VARCHAR(50) = '"AAAA BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH IIIII'

DECLARE @Quote SMALLINT = -1, @Index INT = 1, @Char CHAR(1) ,@New_Char CHAR(100)
WHILE @Index <= LEN(@In) BEGIN
    SET @Char = SUBSTRING(@In, @Index, 1)
    IF @Char = '"'
        SET @Quote = @Quote * -1
        SET @New_Char = SUBSTRING(@In, @Index + 1,213)
    IF @Char = ' ' AND @Quote > 0 AND @New_Char like '%"%'
        SET @In = STUFF(@In, @Index, 1, '_')
    SET @Index = @Index + 1
END

PRINT @In

答案 1 :(得分:2)

以下是我要做的修改

  • 完成所有替换后,退出现有循环(添加WHILE循环条件以及报价识别的逻辑如何促进退出)
  • 删除任意长度的依赖项(CHAR(100),SUBSTRING(...,213))

<强>代码

DECLARE @In VARCHAR(50) = '"AAAA BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH IIIII"'

DECLARE @Quote SMALLINT = 0, @Index INT = 1, @Char CHAR(1)
WHILE @Quote < 2 AND @Index <= LEN(@In) BEGIN
    SET @Char = SUBSTRING(@In, @Index, 1)
    IF @Char = '"'
        SET @Quote = @Quote + 1
    IF @Char = ' ' AND @Quote = 1
        SET @In = STUFF(@In, @Index, 1, '_')
    SET @Index = @Index + 1
END

PRINT @In

<强>输出

"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH IIIII"

答案 2 :(得分:1)

我对你的上一个答案采用不同的方法,并避免循环。第一步是生成一个包含50个连续数字的列表(此数字应与varchar的长度匹配)。这将允许您将varchar拆分为更容易处理的行:

DECLARE @s VARCHAR(50) = '"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc';

WITH Numbers (Number) AS -- 50 ROWS
(   SELECT  TOP (LEN(@s)) ROW_NUMBER() OVER(ORDER BY N1.N)
    FROM    (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) N1 (N)
            CROSS JOIN (VALUES (1), (1), (1), (1), (1)) N2 (N)
)
SELECT  Number,
        Letter = SUBSTRING(@s, Number, 1)
FROM    Numbers;

这就是:

Number | Letter
-------+--------
   1   |   "
   2   |   A
   3   |   A

以类似的方式,您可以使用它来获得&#39;群组&#39;引号:

DECLARE @s VARCHAR(50) = '"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc';

WITH Numbers (Number) AS -- 50 ROWS
(   SELECT  TOP (LEN(@s)) ROW_NUMBER() OVER(ORDER BY N1.N)
    FROM    (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) N1 (N)
            CROSS JOIN (VALUES (1), (1), (1), (1), (1)) N2 (N)
)
SELECT  Number,
        GroupID = CEILING(ROW_NUMBER() OVER(ORDER BY Number) / 2.0)
FROM    Numbers 
WHERE   SUBSTRING(@s, Number, 1) = '"'

给出了:

Number | GroupID
-------+--------
   1   |    1
  11   |    1
  29   |    2

您可以按GroupID分组来获取引号组的开头和结尾位置:

DECLARE @s VARCHAR(50) = '"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc';

WITH Numbers (Number) AS -- 50 ROWS
(   SELECT  TOP (LEN(@s)) ROW_NUMBER() OVER(ORDER BY N1.N)
    FROM    (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) N1 (N)
            CROSS JOIN (VALUES (1), (1), (1), (1), (1)) N2 (N)
), Grouping AS
(   SELECT  Number,
            GroupID = CEILING(ROW_NUMBER() OVER(ORDER BY Number) / 2.0)
    FROM    Numbers 
    WHERE   SUBSTRING(@s, Number, 1) = '"'
)
SELECT  StartPosition = MIN(Number),
        EndPosition = MAX(Number)
FROM    Grouping
GROUP BY GroupID
HAVING COUNT(*) = 2 -- ONLY INCLUDE WHERE THERE IS A START AND AN END

现在您可以将其恢复为原始分割,并使用下划线替换此范围内的任何空格:

DECLARE @s VARCHAR(50) = '"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc';

WITH Numbers (Number) AS -- 50 ROWS
(   SELECT  TOP (LEN(@s)) ROW_NUMBER() OVER(ORDER BY N1.N)
    FROM    (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) N1 (N)
            CROSS JOIN (VALUES (1), (1), (1), (1), (1)) N2 (N)
), Letters AS
(   SELECT  Number,
            Letter = SUBSTRING(@s, Number, 1)
    FROM    Numbers
), Grouping AS
(   SELECT  Number,
            GroupID = CEILING(ROW_NUMBER() OVER(ORDER BY Number) / 2.0)
    FROM    Letters 
    WHERE   Letter = '"'
), Groups AS
(   SELECT  StartPosition = MIN(Number),
            EndPosition = MAX(Number)
    FROM    Grouping
    GROUP BY GroupID
    HAVING COUNT(*) = 2
)
SELECT  l.Number,
        Letter = CASE WHEN g.StartPosition IS NOT NULL AND l.Letter = ' ' THEN '_' ELSE l.Letter END
FROM    Letters AS l
        LEFT JOIN Groups AS g
            ON g.StartPosition <= l.Number
            AND g.EndPosition >= l.Number;

给出了:

Number | Letter
-------+--------
   1   |   "
   2   |   A
   3   |   A
   4   |   A
   5   |   A
   6   |   _
   7   |   B
   8   |   B
   9   |   B
  10   |   B
  11   |   "
  12   |   

然后,您可以使用FOR XML concatenation重建原始字符串:

DECLARE @s VARCHAR(50) = '"AAAA_BBBB"  CCCC DDDD EEEE "FFFF GGGG HHHH cccc';

WITH Numbers (Number) AS -- 50 ROWS
(   SELECT  TOP (LEN(@s)) ROW_NUMBER() OVER(ORDER BY N1.N)
    FROM    (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) N1 (N)
            CROSS JOIN (VALUES (1), (1), (1), (1), (1)) N2 (N)
), Letters AS
(   SELECT  Number,
            Letter = SUBSTRING(@s, Number, 1)
    FROM    Numbers
), Grouping AS
(   SELECT  Number,
            GroupID = CEILING(ROW_NUMBER() OVER(ORDER BY Number) / 2.0)
    FROM    Letters 
    WHERE   Letter = '"'
), Groups AS
(   SELECT  StartPosition = MIN(Number),
            EndPosition = MAX(Number)
    FROM    Grouping
    GROUP BY GroupID
    HAVING COUNT(*) = 2
)
SELECT  String = (  SELECT  CASE WHEN g.StartPosition IS NOT NULL AND l.Letter = ' ' THEN '_' ELSE l.Letter END
                    FROM    Letters AS l
                            LEFT JOIN Groups AS g
                                ON g.StartPosition <= l.Number
                                AND g.EndPosition >= l.Number
                    ORDER BY l.Number
                    FOR XML PATH(''), TYPE
                ).value('.', 'VARCHAR(50)');

最后,我将逻辑包含在内联表值函数中:

CREATE FUNCTION dbo.YourFunctionName (@s VARCHAR(50))
RETURNS TABLE
AS
RETURN
(   WITH Numbers (Number) AS -- 50 ROWS
    (   SELECT  TOP (LEN(@s)) ROW_NUMBER() OVER(ORDER BY N1.N)
        FROM    (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) N1 (N)
                CROSS JOIN (VALUES (1), (1), (1), (1), (1)) N2 (N)
    ), Letters AS
    (   SELECT  Number,
                Letter = SUBSTRING(@s, Number, 1)
        FROM    Numbers
    ), Grouping AS
    (   SELECT  Number,
                GroupID = CEILING(ROW_NUMBER() OVER(ORDER BY Number) / 2.0)
        FROM    Letters 
        WHERE   Letter = '"'
    ), Groups AS
    (   SELECT  StartPosition = MIN(Number),
                EndPosition = MAX(Number),
                Letter = ' '
        FROM    Grouping
        GROUP BY GroupID
        HAVING COUNT(*) = 2
    )
    SELECT  String = (  SELECT  CASE WHEN l.Letter = g.Letter THEN '_' ELSE l.Letter END
                        FROM    Letters AS l
                                LEFT JOIN Groups AS g
                                    ON g.StartPosition <= l.Number
                                    AND g.EndPosition >= l.Number
                        ORDER BY l.Number
                        FOR XML PATH(''), TYPE
                    ).value('.', 'VARCHAR(50)')
);

可以称之为:

SELECT  t.s,
        Replaced = (SELECT String FROM dbo.YourFunctionName (t.s))
FROM    (VALUES 
            ('"AAAA_BBBB" CCCC DDDD EEEE "FFFF GGGG HHHH cccc'),
            ('"AAAA_BBBB" CCCC DDDD EEEE "FFFF GGGG HHHH cccc"'),
            ('"AAAA_BBBB" CCCC "DDDD EEEE" "FFFF GGGG HHHH cccc'),
            ('"AAAA_BBBB" CCCC "DDDD EEEE" "FFFF GGGG HHHH cccc"')
        ) AS t (s);

这可能看起来要复杂得多,但希望我已经正确地解释了逻辑,并且使用内联表值函数的最大优点是定义扩展到主查询中,不像标量udfs那样有光标执行。