我试图用一个空格替换多个非ascii字符,或者只是删除T-SQL中的字符。
'øsmeøø' string should be replaced be 'sme'
我尝试使用以下内容
SELECT STUFF('smeøø string',PATINDEX('%[' + CHAR(127)+ '-' +CHAR(255)+']%
'COLLATE Latin1_General_100_BIN2,'smeøø string'),1,'')
这将返回以下内容,但我希望它继续直到Patindex为0
smeø string
答案 0 :(得分:0)
于20190419更新,以演示不需要您创建NGrams8K函数的解决方案
首先获取NGrams8K的副本
删除“不良”字符:
DECLARE @string VARCHAR(1000) = 'øsmøeøø' COLLATE Latin1_General_100_BIN2;
SELECT
(
SELECT ng.token+''
FROM dbo.ngrams8k(@string,1) AS ng
WHERE ASCII(ng.token) < 127
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
用空格替换“坏”字符:
SELECT
(
SELECT CASE WHEN ASCII(ng.token) < 127 THEN ng.token ELSE ' ' END+''
FROM dbo.ngrams8k(@string,1) AS ng
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
...,如果您正在运行的是2017 ++版本,并且您想要的代码更简洁:
-- Remove bad characters
SELECT STRING_AGG(ng.token,'') WITHIN GROUP (ORDER BY ng.position)
FROM dbo.ngrams8k(@string,1) AS ng
WHERE ASCII(ng.token) < 127;
-- Replace bad characters
SELECT STRING_AGG(IIF(ASCII(ng.token) < 127,ng.token,' '),'') WITHIN GROUP (ORDER BY ng.position)
FROM dbo.ngrams8k(@string,1) AS ng;
使用NGrams8K逻辑转换为子查询的更新解决方案
在查询中记录我的评论...
DECLARE @string VARCHAR(1000) = 'øsmøeøø' COLLATE Latin1_General_100_BIN2, @N INT = 1;
-- Remove bad characters
SELECT
(
SELECT ng.token+''
FROM
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
WHERE ASCII(ng.token) < 127
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
-- Replace bad characters
SELECT
(
SELECT CASE WHEN ASCII(ng.token) < 127 THEN ng.token ELSE ' ' END+''
FROM --dbo.ngrams8k(@string,1) AS ng
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
ORDER BY ng.position
FOR XML PATH(''), TYPE
).value('(text())[1]', 'VARCHAR(8000)');
-- Remove bad characters using STRING_AGG (SQL 2017++)
SELECT STRING_AGG(ng.token,'') WITHIN GROUP (ORDER BY ng.position)
FROM --dbo.ngrams8k(@string,1) AS ng
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng -- dbo.NGrams8K as an inline function
WHERE ASCII(ng.token) < 127;
-- Replace bad characters using STRING_AGG (SQL 2017++)
SELECT STRING_AGG(IIF(ASCII(ng.token) < 127,ng.token,' '),'') WITHIN GROUP (ORDER BY ng.position)
FROM --dbo.ngrams8k(@string,1) AS ng
(
SELECT Position = N,
Token = SUBSTRING(@string,CAST(N AS int),@N)
FROM
(
SELECT TOP(ABS(CONVERT(BIGINT,(DATALENGTH(ISNULL(@string,''))-(ISNULL(@N,1)-1)),0)))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -- Order by a constant to avoid a sort
FROM
( SELECT 1 FROM (VALUES -- 90 "dummy" values used to create the CTE Tally Table
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L1(x),
( SELECT 1 FROM (VALUES -- 90*90=8100, enough for varchar(8000)
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),
(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t(N)) AS L2(x)
) AS iTally(N)
WHERE @N > 0 AND @N <= DATALENGTH(@string)
) AS ng; -- dbo.NGrams8K as an inline function