我有一个很大的nvarchar,我希望传递给HashBytes函数。 我收到错误:
“字符串或二进制文件将被截断。 无法将值NULL插入 列'colname',tbale'table'; 列不允许空值。 UPDATE 失败。声明一直如此 终止“。
由于资源充足,我发现这是由于HashBytes函数的最大限制为8000字节。进一步搜索向我展示了一个“解决方案”,我的大型varchar将被分开并单独进行散列,然后再与此用户定义的函数结合使用:
function [dbo].[udfLargeHashTable] (@algorithm nvarchar(4), @InputDataString varchar(MAX))
RETURNS varbinary(MAX)
AS
BEGIN
DECLARE
@Index int,
@InputDataLength int,
@ReturnSum varbinary(max),
@InputData varbinary(max)
SET @ReturnSum = 0
SET @Index = 1
SET @InputData = convert(binary,@InputDataString)
SET @InputDataLength = DATALENGTH(@InputData)
WHILE @Index <= @InputDataLength
BEGIN
SET @ReturnSum = @ReturnSum + HASHBYTES(@algorithm, SUBSTRING(@InputData, @Index, 8000))
SET @Index = @Index + 8000
END
RETURN @ReturnSum
END
我打电话给:
set @ReportDefinitionHash=convert(int,dbo.[udfLargeHashTable]('SHA1',@ReportDefinitionForLookup))
其中@ReportDefinitionHash是int,而@ReportDefinitionForLookup是varchar
传递像'test'这样的简单char会产生与我的UDF不同的int,而不是对HashBytes的正常调用。
有关此问题的任何建议吗?
答案 0 :(得分:14)
如果您无法创建功能并且必须使用数据库中已存在的内容:
sys.fn_repl_hash_binary
可以使用以下语法使工作:
sys.fn_repl_hash_binary(cast('some really long string' as varbinary(max)))
取自:http://www.sqlnotes.info/2012/01/16/generate-md5-value-from-big-data/
答案 1 :(得分:9)
只需使用此功能(取自Hashing large data strings with a User Defined Function):
create function dbo.fn_hashbytesMAX
( @string nvarchar(max)
, @Algo varchar(10)
)
returns varbinary(20)
as
/************************************************************
*
* Author: Brandon Galderisi
* Last modified: 15-SEP-2009 (by Denis)
* Purpose: uses the system function hashbytes as well
* as sys.fn_varbintohexstr to split an
* nvarchar(max) string and hash in 8000 byte
* chunks hashing each 8000 byte chunk,,
* getting the 40 byte output, streaming each
* 40 byte output into a string then hashing
* that string.
*
*************************************************************/
begin
declare @concat nvarchar(max)
,@NumHash int
,@HASH varbinary(20)
set @NumHash = ceiling((datalength(@string)/2)/(4000.0))
/* HashBytes only supports 8000 bytes so split the string if it is larger */
if @NumHash>1
begin
-- # * 4000 character strings
;with a as (select 1 as n union all select 1) -- 2
,b as (select 1 as n from a ,a a1) -- 4
,c as (select 1 as n from b ,b b1) -- 16
,d as (select 1 as n from c ,c c1) -- 256
,e as (select 1 as n from d ,d d1) -- 65,536
,f as (select 1 as n from e ,e e1) -- 4,294,967,296 = 17+ TRILLION characters
,factored as (select row_number() over (order by n) rn from f)
,factors as (select rn,(rn*4000)+1 factor from factored)
select @concat = cast((
select right(sys.fn_varbintohexstr
(
hashbytes(@Algo, substring(@string, factor - 4000, 4000))
)
, 40) + ''
from Factors
where rn <= @NumHash
for xml path('')
) as nvarchar(max))
set @HASH = dbo.fn_hashbytesMAX(@concat ,@Algo)
end
else
begin
set @HASH = convert(varbinary(20), hashbytes(@Algo, @string))
end
return @HASH
end
结果如下:
select
hashbytes('sha1', N'test') --native function with nvarchar input
,hashbytes('sha1', 'test') --native function with varchar input
,dbo.fn_hashbytesMAX('test', 'sha1') --Galderisi's function which casts to nvarchar input
,dbo.fnGetHash('sha1', 'test') --your function
输出:
0x87F8ED9157125FFC4DA9E06A7B8011AD80A53FE1
0xA94A8FE5CCB19BA61C4C0873D391E987982FBBD3
0x87F8ED9157125FFC4DA9E06A7B8011AD80A53FE1
0x00000000AE6DBA4E0F767D06A97038B0C24ED720662ED9F1
答案 2 :(得分:6)
我已经接受了接受的答案,并通过以下改进进行了一些修改:
通过这些更改,现在可以在持久计算列中使用这些函数,因为它们在创建时现在标记为确定性。
CREATE FUNCTION dbo.fnHashBytesNVARCHARMAX
(
@Algorithm VARCHAR(10),
@Text NVARCHAR(MAX)
)
RETURNS VARBINARY(8000)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @NumHash INT;
DECLARE @HASH VARBINARY(8000);
SET @NumHash = CEILING(DATALENGTH(@Text) / (8000.0));
/* HashBytes only supports 8000 bytes so split the string if it is larger */
WHILE @NumHash > 1
BEGIN
-- # * 4000 character strings
WITH a AS
(SELECT 1 AS n UNION ALL SELECT 1), -- 2
b AS
(SELECT 1 AS n FROM a, a a1), -- 4
c AS
(SELECT 1 AS n FROM b, b b1), -- 16
d AS
(SELECT 1 AS n FROM c, c c1), -- 256
e AS
(SELECT 1 AS n FROM d, d d1), -- 65,536
f AS
(SELECT 1 AS n FROM e, e e1), -- 4,294,967,296 = 17+ TRILLION characters
factored AS
(SELECT ROW_NUMBER() OVER (ORDER BY n) rn FROM f),
factors AS
(SELECT rn, (rn * 4000) + 1 factor FROM factored)
SELECT @Text = CAST
(
(
SELECT CONVERT(VARCHAR(MAX), HASHBYTES(@Algorithm, SUBSTRING(@Text, factor - 4000, 4000)), 1)
FROM factors
WHERE rn <= @NumHash
FOR XML PATH('')
) AS NVARCHAR(MAX)
);
SET @NumHash = CEILING(DATALENGTH(@Text) / (8000.0));
END;
SET @HASH = CONVERT(VARBINARY(8000), HASHBYTES(@Algorithm, @Text));
RETURN @HASH;
END;
CREATE FUNCTION dbo.fnHashBytesVARCHARMAX
(
@Algorithm VARCHAR(10),
@Text VARCHAR(MAX)
)
RETURNS VARBINARY(8000)
WITH SCHEMABINDING
AS
BEGIN
DECLARE @NumHash INT;
DECLARE @HASH VARBINARY(8000);
SET @NumHash = CEILING(DATALENGTH(@Text) / (8000.0));
/* HashBytes only supports 8000 bytes so split the string if it is larger */
WHILE @NumHash > 1
BEGIN
-- # * 4000 character strings
WITH a AS
(SELECT 1 AS n UNION ALL SELECT 1), -- 2
b AS
(SELECT 1 AS n FROM a, a a1), -- 4
c AS
(SELECT 1 AS n FROM b, b b1), -- 16
d AS
(SELECT 1 AS n FROM c, c c1), -- 256
e AS
(SELECT 1 AS n FROM d, d d1), -- 65,536
f AS
(SELECT 1 AS n FROM e, e e1), -- 4,294,967,296 = 17+ TRILLION characters
factored AS
(SELECT ROW_NUMBER() OVER (ORDER BY n) rn FROM f),
factors AS
(SELECT rn, (rn * 8000) + 1 factor FROM factored)
SELECT @Text = CAST
(
(
SELECT CONVERT(VARCHAR(MAX), HASHBYTES(@Algorithm, SUBSTRING(@Text, factor - 8000, 8000)), 1)
FROM factors
WHERE rn <= @NumHash
FOR XML PATH('')
) AS NVARCHAR(MAX)
);
SET @NumHash = CEILING(DATALENGTH(@Text) / (8000.0));
END;
SET @HASH = CONVERT(VARBINARY(8000), HASHBYTES(@Algorithm, @Text));
RETURN @HASH;
END;
答案 3 :(得分:1)
您可以编写SQL CLR函数:
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlBinary BigHashBytes(SqlString algorithm, SqlString data)
{
var algo = HashAlgorithm.Create(algorithm.Value);
var bytes = Encoding.UTF8.GetBytes(data.Value);
return new SqlBinary(algo.ComputeHash(bytes));
}
然后可以在SQL中调用它:
--these return the same value
select HASHBYTES('md5', 'test stuff')
select dbo.BigHashBytes('md5', 'test stuff')
仅当长度超过8k时才需要BigHashBytes
。
答案 4 :(得分:1)
测试和工作 选择master.sys.fn_repl_hash_binary(someVarbinaryMaxValue) 而且并不复杂:)
答案 5 :(得分:0)
这也可以用作功能体:
DECLARE @A NVARCHAR(MAX) = N'test'
DECLARE @res VARBINARY(MAX) = 0x
DECLARE @position INT = 1
,@len INT = DATALENGTH(@A)
WHILE 1 = 1
BEGIN
SET @res = @res + HASHBYTES('SHA2_256', SUBSTRING(@A, @position, 4000))
SET @position = @position+4000
IF @Position > @len
BREAK
END
SELECT HASHBYTES('SHA2_256',@res)
HASH
4000
字符串NVARCHAR(MAX)
部分的HASH
概念,并将结果连接起来。然后到{{1}}后一个结果。
答案 6 :(得分:0)
似乎最简单的解决方案是编写一个递归哈希算法,将输入文本值解析为子varchar(8000)
段。
我随意选择将输入字符串切片为7500个字符段
散列算法返回varbinary(20)
,可以轻松转换为varchar(20)
ALTER FUNCTION [dbo].[BigHash]
(
@TextValue nvarchar(max)
)
RETURNS varbinary(20)
AS
BEGIN
if @TextValue = null
return hashbytes('SHA1', 'null')
Declare @FirstPart as varchar(7500)
Declare @Remainder as varchar(max)
Declare @RemainderHash as varbinary(20)
Declare @BinaryValue as varbinary(20)
Declare @TextLength as integer
Set @TextLength = len(@TextValue)
if @TextLength > 7500
Begin
Set @FirstPart = substring(@TextValue, 1, 7500)
Set @Remainder = substring(@TextValue, 7501, @TextLength - 7500)
Set @RemainderHash = dbo.BigHash(@Remainder)
Set @BinaryValue = hashbytes('SHA1', @FirstPart + convert( varchar(20), @RemainderHash, 2 ))
return @BinaryValue
End
else
Begin
Set @FirstPart = substring(@TextValue, 1, @TextLength)
Set @BinaryValue = hashbytes('SHA1', @FirstPart)
return @BinaryValue
End
return null
END