如何在SQL Server上选择包含非英语字符的记录?
数据列可以包含以下内容的组合:
过滤器应查找包含至少一个非英语字符以及其他字符的记录。
如果记录仅包含英语和/或特殊字符,则它与过滤器不匹配。
数据示例:
过滤器必须获取前3条记录。
答案 0 :(得分:0)
您需要具有英语字符的白名单,然后使用UNICODE()函数循环字符串的字符以确定字符串是否包含不在白名单中的字符,我更喜欢为此验证创建UDF。
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION dbo.fn_Contains_Non_English_Char (
@str_to_Validate NVARCHAR(MAX))
RETURNS BIT
AS
BEGIN
DECLARE @index INT
DECLARE @len INT
DECLARE @char NCHAR(1)
SET @index = 1
SET @len = LEN(@str_to_Validate)
WHILE @index <= @len
BEGIN
SET @char = SUBSTRING(@str_to_Validate, @index, 1)
SET @index = @index + 1
-- You can use your own whitelist here
IF UNICODE(@char) > 127 -- ASC II Chars
--IF UNICODE(@char) > 255 -- Extended ASC II Chars
RETURN 1
END
RETURN 0
END
GO
WITH CTE AS (
SELECT N'I liked كيكة التفاح بالمقلاة' AS NOTE UNION ALL
SELECT N'τράπεζα τους φόρους και τον ΕΝΦΙΑ' AS NOTE UNION ALL
SELECT N'I like தமிழ்' AS NOTE UNION ALL
SELECT N'"Thank you" @stackoverflow.' AS NOTE UNION ALL
SELECT N'https://stackoverflow.com need to fetch first 3 records. please help on this.' AS NOTE
)
SELECT * FROM CTE
WHERE dbo.fn_Contains_Non_English_Char(NOTE) = 1