如何在Delphi中快速将值“钳制”到范围内?

时间:2014-02-18 13:28:35

标签: delphi optimization signal-processing

我有许多样本处理原语,如:

function Add8(A, B: Byte): Byte; {$IFDEF CODEINLINING}inline;{$ENDIF}
begin
  Result := A + B;
end;

function Sub16(A, B: Word): Word; {$IFDEF CODEINLINING}inline;{$ENDIF}
begin
  Result := A - B;
end;

{ et cetera }

这些函数是数据处理的主力,可以为每个输入样本(数百万个)调用。按设计Result类型必须与参数(操作数)大小相同。

出现问题,然后操作结果超出Low(Result)..High(Result)的定义范围,截断最高有效位并有效地使结果不正确。例如: 向峰值Add8(240, 22)添加低值会消除峰值,我最好有255。 为了减去近基线水平Sub16(32000, 33000)的两个值,我最好有0

我的问题是:如何通过这样的操作将结果值以性能方式钳制到范围内?对于所有算术和所有基类型(8位,16位,无符号,有符号)是否有通用解决方案?

2 个答案:

答案 0 :(得分:4)

因为您处理大型数据处理,我建议尝试一些汇编程序 - MMX,SSE2命令专门用于此类任务。例如, paddusb 指令可以在饱和状态下一次添加16对字节(钳位结果为字节范围)。 (不要忘记正确对齐数据块)

用于32位编译器的示例(未经过全面测试) 。它比pascal版快9倍,用于处理256M阵列(604 vs 5100 ms,10次重复)。请注意,对于合理的数据大小,pascal版本也非常快。

program Project1;
{$APPTYPE CONSOLE}
uses SysUtils;

procedure AddBytesSat(const A, B, Res: PByteArray; Len: Integer);
//adds byte arrays Res[i] := A[i] + B[i] with saturation
//arrays should be aligned to 16-byte border, length divisible by 16
//three parameters in eax, edx, ecx registers, fourth on the stack
asm
  push esi
  mov esi, ecx // save Res pointer
  mov ecx, Len
  shr ecx, 4   // Len div 16
@@start:
  movdqa xmm0, [eax]  //copies 16 bytes (aligned) to sse register

  paddusb xmm0, [edx] // adds 16 unsigned values with saturation

  movdqa [esi], xmm0  // move result bytes back to memory
  add eax, 16  // move array pointers
  add edx, 16
  add esi, 16
  loop @@start  //go to next iteration 
  pop esi
end;

var
  A, B, C: PByteArray;
  i: integer;
begin
  //ensure that memory manager returns properly aligned blocks
  SetMinimumBlockAlignment(System.mba16Byte);

  GetMem(A, 32);
  GetMem(B, 32);
  GetMem(C, 32);

  for i := 0 to 31 do begin
    A[i] := 8 * i;
    B[i] := 200;
  end;

  AddBytesSat(A, B, C, 32);

  //clamping demonstration
  for i := 0 to 15 do
    Writeln(C[i]);

  Readln;
end.

答案 1 :(得分:0)

如果要钳位的值刚好大于255或小于零,则可以使用查找表。只要表适合通常出现的值的L1缓存内,这种查找表方法就非常快。编译器将byte_clamp_lut + 256转换为表中间的单个内存引用。我不知道Pascal / Delphi是否允许宏,但是在C / C ++中是这样完成的:

#define byte_clamp(v) ((uint8_t*)(byte_clamp_lut+256))[v]

static uint8_t byte_clamp768[] = {
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
  32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
  48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
  64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
  80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
  96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
  256,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
};