SQL Server:HTML解码基于String输入中的HTML名称

时间:2014-08-21 17:51:43

标签: sql sql-server sql-server-2012

我正在尝试使用下面的SQL将& "等HTML名称转换为等效的CHAR值。我在SQL Server 2012中对此进行了测试。

测试1(这很好):

GO
DECLARE @inputString VARCHAR(MAX)= '&testString&'
DECLARE @codePos INT, @codeEncoded VARCHAR(7), @startIndex INT, @resultString varchar(max)
SET @resultString = LTRIM(RTRIM(@inputString))
SELECT @startIndex = PATINDEX('%&%', @resultString)
WHILE @startIndex > 0 
BEGIN
    SELECT @resultString = REPLACE(@resultString, '&', '&'), @startIndex=PATINDEX('%&%', @resultString)
END

PRINT @resultString
Go

输出:

&testString&

测试2(这不起作用): 由于上述工作,我试图扩展这个以处理更多字符如下:

DECLARE @htmlNames TABLE (ID INT IDENTITY(1,1), asciiDecimal INT, htmlName varchar(50))
INSERT INTO @htmlNames
VALUES (34,'"'),(38,'&'),(60,'<'),(62,'>'),(160,' '),(161,'¡'),(162,'¢')
-- I would load the full list of HTML names into this TABLE varaible, but removed for testing purposes
DECLARE @inputString VARCHAR(MAX)= '&testString&'
DECLARE @count INT = 0
DECLARE @id INT = 1
DECLARE @charCode INT, @htmlName VARCHAR(30)
DECLARE @codePos INT, @codeEncoded VARCHAR(7), @startIndex INT
        , @resultString varchar(max)
SELECT @count=COUNT(*) FROM @htmlNames

WHILE @id <=@count
BEGIN
    SELECT @charCode = asciiDecimal, @htmlname = htmlName
    FROM @htmlNames
    WHERE ID = @id

        SET @resultString = LTRIM(RTRIM(@inputString))
        SELECT @startIndex = PATINDEX('%' + @htmlName + '%', @resultString)
        While @startIndex > 0 
        BEGIN
            --PRINT @resultString + '|'  + @htmlName + '|' + NCHAR(@charCode)
            SELECT @resultString = REPLACE(@resultString, @htmlName, NCHAR(@charCode))
            SET @startIndex=PATINDEX('%' + @htmlName + '%', @resultString)
        END
        SET @id=@id + 1
END

PRINT @resultString

GO

输出:

&amp;testString&amp;

我无法弄清楚我哪里出错了?任何帮助将非常感激。

我不想将字符串值加载到应用程序层,然后应用HTMLDecode并保存回数据库。

编辑: 这一行SET @resultString = LTRIM(RTRIM(@inputString))位于WHILE内,因此我用@inputString覆盖了结果。谢谢你,YanireRomero。

我也喜欢@ RichardDeeming的解决方案,但在这种情况下它并不适合我的需要。

4 个答案:

答案 0 :(得分:16)

这是一个不需要循环的简单解决方案:

DECLARE @htmlNames TABLE 
(
    ID INT IDENTITY(1,1), 
    asciiDecimal INT, 
    htmlName varchar(50)
);

INSERT INTO @htmlNames 
VALUES 
    (34,'&quot;'),
    (38,'&amp;'),
    (60,'&lt;'),
    (62,'&gt;'),
    (160,'&nbsp;'),
    (161,'&iexcl;'),
    (162,'&cent;')
;

DECLARE @inputString varchar(max)= '&amp;test&amp;quot;&lt;String&gt;&quot;&amp;';
DECLARE @resultString varchar(max) = @inputString;

-- Simple HTML-decode:
SELECT
    @resultString = Replace(@resultString COLLATE Latin1_General_CS_AS, htmlName, NCHAR(asciiDecimal))
FROM
    @htmlNames
;

SELECT @resultString;
-- Output: &test&quot;<String>"&


-- Multiple HTML-decode:
SET @resultString = @inputString;

DECLARE @temp varchar(max) = '';
WHILE @resultString != @temp
BEGIN
    SET @temp = @resultString;

    SELECT
        @resultString = Replace(@resultString COLLATE Latin1_General_CS_AS, htmlName, NCHAR(asciiDecimal))
    FROM
        @htmlNames
    ;
END;

SELECT @resultString;
-- Output: &test"<String>"&

编辑:根据@tomasofen的建议更改为NCHAR,并根据@TechyGypo的建议,将{1}}函数添加了区分大小写的排序规则。

答案 1 :(得分:5)

为了提高性能,您不应该将其写为T-​​SQL语句或SQL标量值函数。 .NET库提供了出色的,快速的,最重要的是,可靠的 HTML解码。在我看来,您应该将其实现为SQL CLR,如下所示:

using Microsoft.SqlServer.Server;
using System.Data.SqlTypes;
using System.Net;

public partial class UserDefinedFunctions
{
    [Microsoft.SqlServer.Server.SqlFunction(
        IsDeterministic = true,
        IsPrecise = true,
        DataAccess = DataAccessKind.None,
        SystemDataAccess = SystemDataAccessKind.None)]
    [return: SqlFacet(MaxSize = 4000)]
    public static SqlString cfnHtmlDecode([SqlFacet(MaxSize = 4000)] SqlString input)
    {
        if (input.IsNull)
            return null;

        return System.Net.WebUtility.HtmlDecode(input.Value);
    }
}

然后在你的T-SQL中,像这样调用它:

SELECT clr_schema.cfnHtmlDecode(column_name) FROM table_schema.table_name

答案 2 :(得分:2)

嘿,这是一个分配错误:

DECLARE @htmlNames TABLE (ID INT IDENTITY(1,1), asciiDecimal INT, htmlName varchar(50))
INSERT INTO @htmlNames
VALUES (34,'&quot;'),(38,'&amp;'),(60,'&lt;'),(62,'&gt;'),(160,'&nbsp;'),(161,'&iexcl;'),(162,'&cent;')
-- I would load the full list of HTML names into this TABLE varaible, but removed for testing purposes
DECLARE @inputString VARCHAR(MAX)= '&amp;testString&amp;'
DECLARE @count INT = 0
DECLARE @id INT = 1
DECLARE @charCode INT, @htmlName VARCHAR(30)
DECLARE @codePos INT, @codeEncoded VARCHAR(7), @startIndex INT
    , @resultString varchar(max)
SELECT @count=COUNT(*) FROM @htmlNames

SET @resultString = LTRIM(RTRIM(@inputString))

WHILE @id <=@count
BEGIN

    SELECT @charCode = asciiDecimal, @htmlname = htmlName
    FROM @htmlNames
    WHERE ID = @id

        SELECT @startIndex = PATINDEX('%' + @htmlName + '%', @resultString)

        While @startIndex > 0 
        BEGIN
            --PRINT @resultString + '|'  + @htmlName + '|' + NCHAR(@charCode)
            SET @resultString = REPLACE(@resultString, @htmlName, NCHAR(@charCode))
            SET @startIndex=PATINDEX('%' + @htmlName + '%', @resultString)
        END
        SET @id=@id + 1
END

PRINT @resultString

GO

这一行SET @resultString = LTRIM(RTRIM(@inputString))在里面,所以你覆盖了你的结果。

希望它有所帮助。

答案 3 :(得分:2)

“Richard Deeming”响应的一些额外帮助,为将来访问者尝试使用更多代码升级功能安全地打字:

INSERT INTO @htmlNames 
    VALUES 
        (34,'&quot;'),
        (38,'&amp;'),
        (60,'&lt;'),
        (62,'&gt;'),

(160, '&nbsp;'),
(161, '&iexcl;'),
(162, '&cent;'),
(163, '&pound;'),
(164, '&curren;'),
(165, '&yen;'),
(166, '&brvbar;'),
(167, '&sect;'),
(168, '&uml;'),
(169, '&copy;'),
(170, '&ordf;'),
(171, '&laquo;'),
(172, '&not;'),
(173, '&shy;'),
(174, '&reg;'),
(175, '&macr;'),

(176, '&deg;'),
(177, '&plusmn;'),
(178, '&sup2;'),
(179, '&sup3;'),
(180, '&acute;'),
(181, '&micro;'),
(182, '&para;'),
(183, '&middot;'),
(184, '&cedil;'),
(185, '&sup1;'),
(186, '&ordm;'),
(187, '&raquo;'),
(188, '&frac14;'),
(189, '&frac12;'),
(190, '&frac34;'),
(191, '&iquest;'),

(192, '&Agrave;'),
(193, '&Aacute;'),
(194, '&Acirc;'),
(195, '&Atilde;'),
(196, '&Auml;'),
(197, '&Aring;'),
(198, '&AElig;'),
(199, '&Ccedil;'),
(200, '&Egrave;'),
(201, '&Eacute;'),
(202, '&Ecirc;'),
(203, '&Euml;'),
(204, '&Igrave;'),
(205, '&Iacute;'),
(206, '&Icirc;'),
(207, '&Iuml;'),

(208, '&ETH;'),
(209, '&Ntilde;'),
(210, '&Ograve;'),
(211, '&Oacute;'),
(212, '&Ocirc;'),
(213, '&Otilde;'),
(214, '&Ouml;'),
(215, '&times;'),
(216, '&Oslash;'),
(217, '&Ugrave;'),
(218, '&Uacute;'),
(219, '&Ucirc;'),
(220, '&Uuml;'),
(221, '&Yacute;'),
(222, '&THORN;'),
(223, '&szlig;'),

(224, '&agrave;'),
(225, '&aacute;'),
(226, '&acirc;'),
(227, '&atilde;'),
(228, '&auml;'),
(229, '&aring;'),
(230, '&aelig;'),
(231, '&ccedil;'),
(232, '&egrave;'),
(233, '&eacute;'),
(234, '&ecirc;'),
(235, '&euml;'),
(236, '&igrave;'),
(237, '&iacute;'),
(238, '&icirc;'),
(239, '&iuml;'),

(240, '&eth;'),
(241, '&ntilde;'),
(242, '&ograve;'),
(243, '&oacute;'),
(244, '&ocirc;'),
(245, '&otilde;'),
(246, '&ouml;'),
(247, '&divide;'),
(248, '&oslash;'),
(249, '&ugrave;'),
(250, '&uacute;'),
(251, '&ucirc;'),
(252, '&uuml;'),
(253, '&yacute;'),
(254, '&thorn;'),
(255, '&yuml;'),
(8364, '&euro;');

<强> 编辑:

如果您希望欧元符号有效(并且通常ASCII代码超过255),则需要在Richard Deeming代码中使用NCHAR而不是CHAR。