快速检查是否有人知道或者知道能够从给定的nvarchar输入生成slus的T-SQL函数。即;
“Hello World”> “你好,世界”
“这是一个测试”> “这-is-a的测试”
我有一个C#函数,我通常用于这些目的,但在这种情况下我有大量的数据要解析并变成slug,所以在SQL Server上执行它更有意义而不是必须通过电线传输数据。
顺便说一下,我没有对该框的远程桌面访问,因此我无法对其运行代码(.net,Powershell等)
提前致谢。
编辑: 根据要求,这里是我通常用来生成slugs的函数:
public static string GenerateSlug(string n, int maxLength)
{
string s = n.ToLower();
s = Regex.Replace(s, @"[^a-z0-9s-]", "");
s = Regex.Replace(s, @"[s-]+", " ").Trim();
s = s.Substring(0, s.Length <= maxLength ? s.Length : maxLength).Trim();
s = Regex.Replace(s, @"s", "-");
return s;
}
答案 0 :(得分:13)
SELECT REPLACE(LOWER(origString), ' ', '-')
FROM myTable
对于列的批量更新(代码根据slug
列的值设置origString
列:
UPDATE myTable
SET slug = REPLACE(LOWER(origString), ' ', '-')
答案 1 :(得分:9)
这就是我提出的解决方案。随意修复/修改需要的地方。
我应该提到我正在开发的数据库是不区分大小写的,因此是LOWER(@str)。
CREATE FUNCTION [dbo].[UDF_GenerateSlug]
(
@str VARCHAR(100)
)
RETURNS VARCHAR(100)
AS
BEGIN
DECLARE @IncorrectCharLoc SMALLINT
SET @str = LOWER(@str)
SET @IncorrectCharLoc = PATINDEX('%[^0-9a-z ]%',@str)
WHILE @IncorrectCharLoc > 0
BEGIN
SET @str = STUFF(@str,@incorrectCharLoc,1,'')
SET @IncorrectCharLoc = PATINDEX('%[^0-9a-z ]%',@str)
END
SET @str = REPLACE(@str,' ','-')
RETURN @str
END
答案 2 :(得分:4)
以下是Jeremy的回应。从技术上讲,这可能不会让人感到沮丧,因为我正在做一些自定义的事情,比如替换“。”用“-dot-”,并删除撇号。主要的改进是这个也剥离了所有连续的空间,并没有去除先前存在的破折号。
create function dbo.Slugify(@str nvarchar(max)) returns nvarchar(max)
as
begin
declare @IncorrectCharLoc int
set @str = replace(replace(lower(@str),'.',' dot '),'''','')
-- remove non alphanumerics:
set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
while @IncorrectCharLoc > 0
begin
set @str = stuff(@str,@incorrectCharLoc,1,' ')
set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
end
-- remove consecutive spaces:
while charindex(' ',@str) > 0
begin
set @str = replace(@str, ' ', ' ')
end
set @str = replace(@str,' ','-')
return @str
end
答案 3 :(得分:3)
我甚至在替换空格后删除了所有连续破折号,并删除了前导和尾随破折号,从而使杰里米的反应更进一步。
create function dbo.Slugify(@str nvarchar(max)) returns nvarchar(max) as
begin
declare @IncorrectCharLoc int
set @str = replace(replace(lower(@str),'.','-'),'''','')
-- remove non alphanumerics:
set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
while @IncorrectCharLoc > 0
begin
set @str = stuff(@str,@incorrectCharLoc,1,' ')
set @IncorrectCharLoc = patindex('%[^0-9a-z -]%',@str)
end
-- replace all spaces with dashes
set @str = replace(@str,' ','-')
-- remove consecutive dashes:
while charindex('--',@str) > 0
begin
set @str = replace(@str, '--', '-')
end
-- remove leading dashes
while charindex('-', @str) = 1
begin
set @str = RIGHT(@str, len(@str) - 1)
end
-- remove trailing dashes
while len(@str) > 0 AND substring(@str, len(@str), 1) = '-'
begin
set @str = LEFT(@str, len(@str) - 1)
end
return @str
end
答案 4 :(得分:3)
我知道这是一个旧线程,但对于下一代,我发现一个功能甚至可以处理重音here:
CREATE function [dbo].[slugify](@string varchar(4000))
RETURNS varchar(4000) AS BEGIN
declare @out varchar(4000)
--convert to ASCII
set @out = lower(@string COLLATE SQL_Latin1_General_CP1251_CS_AS)
declare @pi int
--I'm sorry T-SQL have no regex. Thanks for patindex, MS .. :-)
set @pi = patindex('%[^a-z0-9 -]%',@out)
while @pi>0 begin
set @out = replace(@out, substring(@out,@pi,1), '')
--set @out = left(@out,@pi-1) + substring(@out,@pi+1,8000)
set @pi = patindex('%[^a-z0-9 -]%',@out)
end
set @out = ltrim(rtrim(@out))
-- replace space to hyphen
set @out = replace(@out, ' ', '-')
-- remove double hyphen
while CHARINDEX('--', @out) > 0 set @out = replace(@out, '--', '-')
return (@out)
END
答案 5 :(得分:0)
-- Converts a title such as "This is a Test" to an all lower case string such
-- as "this-is-a-test" for use as the slug in a URL. All runs of separators
-- (whitespace, underscore, or hyphen) are converted to a single hyphen.
-- This is implemented as a state machine having the following four states:
--
-- 0 - initial state
-- 1 - in a sequence consisting of valid characters (a-z, A-Z, or 0-9)
-- 2 - in a sequence of separators (whitespace, underscore, or hyphen)
-- 3 - encountered a character that is neither valid nor a separator
--
-- Once the next state has been determined, the return value string is
-- built based on the transitions from the current state to the next state.
--
-- State 0 skips any initial whitespace. State 1 includes all valid slug
-- characters. State 2 converts multiple separators into a single hyphen
-- and skips trailing whitespace. State 3 skips any punctuation between
-- between characters and, if no additional whitespace is encountered,
-- then the punctuation is not treated as a word separator.
--
CREATE FUNCTION ToSlug(@title AS NVARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @retval AS VARCHAR(MAX) = ''; -- return value
DECLARE @i AS INT = 1; -- title index
DECLARE @c AS CHAR(1); -- current character
DECLARE @state AS INT = 0; -- current state
DECLARE @nextState AS INT; -- next state
DECLARE @tab AS CHAR(1) = CHAR(9); -- tab
DECLARE @lf AS CHAR(1) = CHAR(10); -- line feed
DECLARE @cr AS CHAR(1) = CHAR(13); -- carriage return
DECLARE @separators AS CHAR(8) = '[' + @tab + @lf + @cr + ' _-]';
DECLARE @validchars AS CHAR(11) = '[a-zA-Z0-9]';
WHILE (@i <= LEN(@title))
BEGIN
SELECT @c = SUBSTRING(@title, @i, 1),
@nextState = CASE
WHEN @c LIKE @validchars THEN 1
WHEN @state = 0 THEN 0
WHEN @state = 1 THEN CASE
WHEN @c LIKE @separators THEN 2
ELSE 3 -- unknown character
END
WHEN @state = 2 THEN 2
WHEN @state = 3 THEN CASE
WHEN @c LIKE @separators THEN 2
ELSE 3 -- stay in state 3
END
END,
@retval = @retval + CASE
WHEN @nextState != 1 THEN ''
WHEN @state = 0 THEN LOWER(@c)
WHEN @state = 1 THEN LOWER(@c)
WHEN @state = 2 THEN '-' + LOWER(@c)
WHEN @state = 3 THEN LOWER(@c)
END,
@state = @nextState,
@i = @i + 1
END
RETURN @retval;
END
答案 6 :(得分:0)
To slug with Vietnamese unicode
CREATE function [dbo].[toslug](@string nvarchar(4000))
RETURNS varchar(4000) AS BEGIN
declare @out nvarchar(4000)
declare @from nvarchar(255)
declare @to varchar(255)
--convert to ASCII dbo.slugify
set @string = lower(@string)
set @out = @string
set @from = N'ýỳỷỹỵáàảãạâấầẩẫậăắằẳẵặéèẻẽẹêếềểễệúùủũụưứừửữựíìỉĩịóòỏõọơớờởỡợôốồổỗộđ·/_,:;'
set @to = 'yyyyyaaaaaaaaaaaaaaaaaeeeeeeeeeeeuuuuuuuuuuuiiiiioooooooooooooooood------'
declare @pi int
set @pi = 1
--I'm sorry T-SQL have no regex. Thanks for patindex, MS .. :-)
while @pi<=len(@from) begin
set @out = replace(@out, substring(@from,@pi,1), substring(@to,@pi,1))
set @pi = @pi + 1
end
set @out = ltrim(rtrim(@out))
-- replace space to hyphen
set @out = replace(@out, ' ', '-')
-- remove double hyphen
while CHARINDEX('--', @out) > 0 set @out = replace(@out, '--', '-')
return (@out)
END