用于生成slugs的T-SQL函数?

时间:2010-06-21 06:44:33

标签: tsql slug

快速检查是否有人知道或者知道能够从给定的nvarchar输入生成slus的T-SQL函数。即;

  

“Hello World”> “你好,世界”
  “这是一个测试”> “这-is-a的测试”

我有一个C#函数,我通常用于这些目的,但在这种情况下我有大量的数据要解析并变成slug,所以在SQL Server上执行它更有意义而不是必须通过电线传输数据。

顺便说一下,我没有对该框的远程桌面访问,因此我无法对其运行代码(.net,Powershell等)

提前致谢。

编辑: 根据要求,这里是我通常用来生成slugs的函数:

public static string GenerateSlug(string n, int maxLength)
{
    string s = n.ToLower();                
    s = Regex.Replace(s, @"[^a-z0-9s-]", "");              
    s = Regex.Replace(s, @"[s-]+", " ").Trim();             
    s = s.Substring(0, s.Length <= maxLength ? s.Length : maxLength).Trim();             
    s = Regex.Replace(s, @"s", "-"); 
    return s;
}

7 个答案:

答案 0 :(得分:13)

您可以使用LOWERREPLACE执行此操作:

SELECT REPLACE(LOWER(origString), ' ', '-')
FROM myTable

对于列的批量更新(代码根据slug列的值设置origString列:

UPDATE myTable
SET slug = REPLACE(LOWER(origString), ' ', '-')

答案 1 :(得分:9)

这就是我提出的解决方案。随意修复/修改需要的地方。

我应该提到我正在开发的数据库是不区分大小写的,因此是LOWER(@str)。

CREATE FUNCTION [dbo].[UDF_GenerateSlug]
(   
    @str VARCHAR(100)
)
RETURNS VARCHAR(100)
AS
BEGIN
DECLARE @IncorrectCharLoc SMALLINT
SET @str = LOWER(@str)
SET @IncorrectCharLoc = PATINDEX('%[^0-9a-z ]%',@str)
WHILE @IncorrectCharLoc > 0
BEGIN
SET @str = STUFF(@str,@incorrectCharLoc,1,'')
SET @IncorrectCharLoc = PATINDEX('%[^0-9a-z ]%',@str)
END
SET @str = REPLACE(@str,' ','-')
RETURN @str
END

提及:http://blog.sqlauthority.com/2007/05/13/sql-server-udf-function-to-parse-alphanumeric-characters-from-string/原始代码。

答案 2 :(得分:4)

以下是Jeremy的回应。从技术上讲,这可能不会让人感到沮丧,因为我正在做一些自定义的事情,比如替换“。”用“-dot-”,并删除撇号。主要的改进是这个也剥离了所有连续的空间,并没有去除先前存在的破折号。

create function dbo.Slugify(@str nvarchar(max)) returns nvarchar(max)
as
begin
    declare @IncorrectCharLoc int
    set @str = replace(replace(lower(@str),'.',' dot '),'''','')

    -- remove non alphanumerics:
    set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
    while @IncorrectCharLoc > 0
    begin
        set @str = stuff(@str,@incorrectCharLoc,1,' ')
        set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
    end
    -- remove consecutive spaces:
    while charindex('  ',@str) > 0
    begin
    set @str = replace(@str, '  ', ' ')
    end
    set @str = replace(@str,' ','-')
return @str
end

答案 3 :(得分:3)

我甚至在替换空格后删除了所有连续破折号,并删除了前导和尾随破折号,从而使杰里米的反应更进一步。

create function dbo.Slugify(@str nvarchar(max)) returns nvarchar(max) as
begin
    declare @IncorrectCharLoc int
    set @str = replace(replace(lower(@str),'.','-'),'''','')

    -- remove non alphanumerics:
    set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
    while @IncorrectCharLoc > 0
    begin
        set @str = stuff(@str,@incorrectCharLoc,1,' ')
        set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
    end

    -- replace all spaces with dashes
    set @str = replace(@str,' ','-')

    -- remove consecutive dashes:
    while charindex('--',@str) > 0
    begin
        set @str = replace(@str, '--', '-')
    end

    -- remove leading dashes
    while charindex('-', @str) = 1
    begin
        set @str = RIGHT(@str, len(@str) - 1)
    end

    -- remove trailing dashes
    while len(@str) > 0 AND substring(@str, len(@str), 1) = '-'
    begin
        set @str = LEFT(@str, len(@str) - 1)
    end
return @str
end

答案 4 :(得分:3)

我知道这是一个旧线程,但对于下一代,我发现一个功能甚至可以处理重音here

CREATE function [dbo].[slugify](@string varchar(4000)) 
    RETURNS varchar(4000) AS BEGIN 
    declare @out varchar(4000)

    --convert to ASCII
    set @out = lower(@string COLLATE SQL_Latin1_General_CP1251_CS_AS)

    declare @pi int 
    --I'm sorry T-SQL have no regex. Thanks for patindex, MS .. :-)
    set @pi = patindex('%[^a-z0-9 -]%',@out)
    while @pi>0 begin
        set @out = replace(@out, substring(@out,@pi,1), '')
        --set @out = left(@out,@pi-1) + substring(@out,@pi+1,8000)
        set @pi = patindex('%[^a-z0-9 -]%',@out)
    end

    set @out = ltrim(rtrim(@out))

   -- replace space to hyphen   
   set @out = replace(@out, ' ', '-')

   -- remove double hyphen
   while CHARINDEX('--', @out) > 0 set @out = replace(@out, '--', '-')

   return (@out)
END

答案 5 :(得分:0)

-- Converts a title such as "This is a Test" to an all lower case string such
-- as "this-is-a-test" for use as the slug in a URL.  All runs of separators
-- (whitespace, underscore, or hyphen) are converted to a single hyphen.
-- This is implemented as a state machine having the following four states:
--
--     0 - initial state
--     1 - in a sequence consisting of valid characters (a-z, A-Z, or 0-9)
--     2 - in a sequence of separators (whitespace, underscore, or hyphen)
--     3 - encountered a character that is neither valid nor a separator
--
-- Once the next state has been determined, the return value string is
-- built based on the transitions from the current state to the next state.
--
-- State 0 skips any initial whitespace.  State 1 includes all valid slug
-- characters.  State 2 converts multiple separators into a single hyphen
-- and skips trailing whitespace.  State 3 skips any punctuation between
-- between characters and, if no additional whitespace is encountered,
-- then the punctuation is not treated as a word separator.
--
CREATE FUNCTION ToSlug(@title AS NVARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
    DECLARE @retval AS VARCHAR(MAX) = ''; -- return value
    DECLARE @i AS INT = 1;                -- title index
    DECLARE @c AS CHAR(1);                -- current character
    DECLARE @state AS INT = 0;            -- current state
    DECLARE @nextState AS INT;            -- next state
    DECLARE @tab AS CHAR(1) = CHAR(9);    -- tab
    DECLARE @lf AS CHAR(1) = CHAR(10);    -- line feed
    DECLARE @cr AS CHAR(1) = CHAR(13);    -- carriage return
    DECLARE @separators AS CHAR(8) = '[' + @tab + @lf + @cr + ' _-]';
    DECLARE @validchars AS CHAR(11) = '[a-zA-Z0-9]';

    WHILE (@i <= LEN(@title))
    BEGIN
        SELECT @c = SUBSTRING(@title, @i, 1),

        @nextState = CASE
            WHEN @c LIKE @validchars THEN 1
            WHEN @state = 0 THEN 0
            WHEN @state = 1 THEN CASE
                WHEN @c LIKE @separators THEN 2
                ELSE 3 -- unknown character
                END
            WHEN @state = 2 THEN 2
            WHEN @state = 3 THEN CASE
                WHEN @c LIKE @separators THEN 2
                ELSE 3 -- stay in state 3
                END
            END,

        @retval = @retval + CASE
            WHEN @nextState != 1 THEN ''
            WHEN @state = 0 THEN LOWER(@c)
            WHEN @state = 1 THEN LOWER(@c)
            WHEN @state = 2 THEN '-' + LOWER(@c)
            WHEN @state = 3 THEN LOWER(@c)
            END,

        @state = @nextState,

        @i = @i + 1
    END
    RETURN @retval;
END

答案 6 :(得分:0)

To slug with Vietnamese unicode    

CREATE function [dbo].[toslug](@string nvarchar(4000)) 
    RETURNS varchar(4000) AS BEGIN 
    declare @out nvarchar(4000)
    declare @from nvarchar(255)
    declare @to varchar(255)
    --convert to ASCII dbo.slugify
    set @string = lower(@string)
    set @out = @string
    set @from = N'ýỳỷỹỵáàảãạâấầẩẫậăắằẳẵặéèẻẽẹêếềểễệúùủũụưứừửữựíìỉĩịóòỏõọơớờởỡợôốồổỗộđ·/_,:;'
    set @to = 'yyyyyaaaaaaaaaaaaaaaaaeeeeeeeeeeeuuuuuuuuuuuiiiiioooooooooooooooood------'
    declare @pi int 
    set @pi = 1
    --I'm sorry T-SQL have no regex. Thanks for patindex, MS .. :-)
    while @pi<=len(@from) begin
        set @out = replace(@out, substring(@from,@pi,1), substring(@to,@pi,1))
        set @pi = @pi + 1
    end
    set @out = ltrim(rtrim(@out))

   -- replace space to hyphen   
   set @out = replace(@out, ' ', '-')

   -- remove double hyphen
   while CHARINDEX('--', @out) > 0 set @out = replace(@out, '--', '-')

   return (@out)
END