我有许多样本处理原语,如:
function Add8(A, B: Byte): Byte; {$IFDEF CODEINLINING}inline;{$ENDIF}
begin
Result := A + B;
end;
function Sub16(A, B: Word): Word; {$IFDEF CODEINLINING}inline;{$ENDIF}
begin
Result := A - B;
end;
{ et cetera }
这些函数是数据处理的主力,可以为每个输入样本(数百万个)调用。按设计Result
类型必须与参数(操作数)大小相同。
出现问题,然后操作结果超出Low(Result)..High(Result)
的定义范围,截断最高有效位并有效地使结果不正确。例如:
向峰值Add8(240, 22)
添加低值会消除峰值,我最好有255
。
为了减去近基线水平Sub16(32000, 33000)
的两个值,我最好有0
。
我的问题是:如何通过这样的操作将结果值以性能方式钳制到范围内?对于所有算术和所有基类型(8位,16位,无符号,有符号)是否有通用解决方案?
答案 0 :(得分:4)
因为您处理大型数据处理,我建议尝试一些汇编程序 - MMX,SSE2命令专门用于此类任务。例如, paddusb 指令可以在饱和状态下一次添加16对字节(钳位结果为字节范围)。 (不要忘记正确对齐数据块)
用于32位编译器的示例(未经过全面测试) 。它比pascal版快9倍,用于处理256M阵列(604 vs 5100 ms,10次重复)。请注意,对于合理的数据大小,pascal版本也非常快。
program Project1;
{$APPTYPE CONSOLE}
uses SysUtils;
procedure AddBytesSat(const A, B, Res: PByteArray; Len: Integer);
//adds byte arrays Res[i] := A[i] + B[i] with saturation
//arrays should be aligned to 16-byte border, length divisible by 16
//three parameters in eax, edx, ecx registers, fourth on the stack
asm
push esi
mov esi, ecx // save Res pointer
mov ecx, Len
shr ecx, 4 // Len div 16
@@start:
movdqa xmm0, [eax] //copies 16 bytes (aligned) to sse register
paddusb xmm0, [edx] // adds 16 unsigned values with saturation
movdqa [esi], xmm0 // move result bytes back to memory
add eax, 16 // move array pointers
add edx, 16
add esi, 16
loop @@start //go to next iteration
pop esi
end;
var
A, B, C: PByteArray;
i: integer;
begin
//ensure that memory manager returns properly aligned blocks
SetMinimumBlockAlignment(System.mba16Byte);
GetMem(A, 32);
GetMem(B, 32);
GetMem(C, 32);
for i := 0 to 31 do begin
A[i] := 8 * i;
B[i] := 200;
end;
AddBytesSat(A, B, C, 32);
//clamping demonstration
for i := 0 to 15 do
Writeln(C[i]);
Readln;
end.
答案 1 :(得分:0)
如果要钳位的值刚好大于255或小于零,则可以使用查找表。只要表适合通常出现的值的L1缓存内,这种查找表方法就非常快。编译器将byte_clamp_lut + 256转换为表中间的单个内存引用。我不知道Pascal / Delphi是否允许宏,但是在C / C ++中是这样完成的:
#define byte_clamp(v) ((uint8_t*)(byte_clamp_lut+256))[v]
static uint8_t byte_clamp768[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
256,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
};