Question

我有一个以小端顺序交错的有符号24位整数（复数）数组，我想将其转换为复数浮点数或双精度数组。通过交错，我的意思是：

R1 R2 R3 I1 I2 I3 R4 R5 R6 I4 I5 I6 . . .

每个项目都是一个8位字节，每个项目都是一个24位的int，其中R = real，I = imaginary。

在C＃中执行此操作的最有效方法是什么？代码必须运行多次，所以我试图挤出它的最后一个循环。我希望能比蛮力转移或施放更有效率。

在这种情况下，我不介意使用不安全的代码，如果有帮助的话。

这里是基线，暴力方法，其中第二个数字被注释掉了，暂时忽略了符号处理，以简化IDL：

class Program
{
    const int Size = 10000000;
    static void Main(string[] args)
    {
        //
        // Array of little-endian 24-bit complex ints
        // (least significant byte first)
        //
        byte[] buf = new byte[3 * 2 * Size];
        float[] real = new float[Size];
        //float[] imag = new float[Size];

        //
        // The brute-force way
        //
        int j = 0;
        Stopwatch timer = new Stopwatch();
        timer.Start();
        for (int i = 0; i < Size; i++)
        {
            real[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
            j += 3;
            // imag[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
            j += 3;
        }
        timer.Stop();
        Console.WriteLine("result = " + 
            (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
            " microseconds per complex number");
        Console.ReadLine();
    }
}

和相关的IDL：

  IL_0024:  ldc.i4.0
  IL_0025:  stloc.s    i
  IL_0027:  br.s       IL_0050
  IL_0029:  ldloc.1
  IL_002a:  ldloc.s    i
  IL_002c:  ldloc.0
  IL_002d:  ldloc.2
  IL_002e:  ldelem.u1
  IL_002f:  ldloc.0
  IL_0030:  ldloc.2
  IL_0031:  ldc.i4.1
  IL_0032:  add
  IL_0033:  ldelem.u1
  IL_0034:  ldc.i4.8
  IL_0035:  shl
  IL_0036:  or
  IL_0037:  ldloc.0
  IL_0038:  ldloc.2
  IL_0039:  ldc.i4.2
  IL_003a:  add
  IL_003b:  ldelem.u1
  IL_003c:  ldc.i4.s   16
  IL_003e:  shl
  IL_003f:  or
  IL_0040:  conv.r4
  IL_0041:  stelem.r4
  IL_0042:  ldloc.2
  IL_0043:  ldc.i4.3
  IL_0044:  add
  IL_0045:  stloc.2
  IL_0046:  ldloc.2
  IL_0047:  ldc.i4.3
  IL_0048:  add
  IL_0049:  stloc.2
  IL_004a:  ldloc.s    i
  IL_004c:  ldc.i4.1
  IL_004d:  add
  IL_004e:  stloc.s    i
  IL_0050:  ldloc.s    i
  IL_0052:  ldc.i4     0x989680
  IL_0057:  blt.s      IL_0029

Answer 1

聚会迟到了，但这看起来很有趣;-)

下面的几个实验（使用不安全）。 Method1() 是你的。在我的笔记本电脑上，使用 AnyCPU Release 版本，Method2() 有 20% 多的持续改进，而 Method3() 没有显着的额外好处。（时间超过 100_000_000 次迭代。）

我正在寻找没有（显式）移位（屏蔽不可避免）的指针。

一些典型的结果...

result = 0.0075 microseconds per complex number
result = 0.00542 microseconds per complex number
result = 0.00516 microseconds per complex number

result = 0.00753 microseconds per complex number
result = 0.0052 microseconds per complex number
result = 0.00528 microseconds per complex number

代码...

using System;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace SO_20210326
{
  //  Enable unsafe code
  [StructLayout(LayoutKind.Explicit, Pack = 1, Size = 6)]
  struct NumPair
  {
    [FieldOffset(0)] public int r;
    [FieldOffset(3)] public int i;
  }

  class Program
  {
    const int Size = 100000000;
    static void Method1()
    {
      //
      // Array of little-endian 24-bit complex ints
      // (least significant byte first)
      //
      byte[] buf = new byte[3 * 2 * Size];
      float[] real = new float[Size];
      float[] imag = new float[Size];

      //
      // The brute-force way
      //
      int j = 0;
      Stopwatch timer = new Stopwatch();
      timer.Start();
      for (int i = 0; i < Size; i++)
      {
        real[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
        j += 3;
        imag[i] = (float)(buf[j] | (buf[j + 1] << 8) | (buf[j + 2] << 16));
        j += 3;
      }
      timer.Stop();
      Console.WriteLine("result = " + 
                        (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
                        " microseconds per complex number");
    }

    static void Method2()
    {
      NumPair[] buf = new NumPair[Size];
      float[] real = new float[Size];
      float[] imag = new float[Size];

      Stopwatch timer = new Stopwatch();
      timer.Start();
      for (int i = 0; i < Size; i++)
      {
        real[i] = buf[i].r & 0xffffff00;
        imag[i] = buf[i].i & 0xffffff00;

      }
      timer.Stop();
      Console.WriteLine("result = " + 
                        (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
                        " microseconds per complex number");
    }

    static void Method3()
    {
      unsafe
      {
        NumPair[] buf = new NumPair[Size];
        float[] real = new float[Size];
        float[] imag = new float[Size];

        Stopwatch timer = new Stopwatch();
        timer.Start();
        fixed (void* pvalue = &buf[0])
        {
          var p = (byte*)pvalue;
          for (int i = 0; i < Size; i++)
          {
            real[i] = *(int*)p & 0xffffff00;
            p += 3;
            imag[i] = *(int*)p & 0xffffff00;
            p += 3;
          }
        }

        timer.Stop();
        Console.WriteLine("result = " + 
                          (float)(timer.ElapsedMilliseconds * 1000.0f / Size) + 
                          " microseconds per complex number");
      }
    }

    static void Main(string[] args)
    {
      Method1();
      Method2();
      Method3();
      Console.ReadLine();
    }
  }
}

Answer 2

没有解决方案比转移更快。使用memcpy将花费你一个函数调用，这是因为许多其他后果，如堆栈推/弹，跳...它会慢得多，并且它不会使你从乘法中获得索引。当然你可以将指针增加3并且不需要乘法但是使用解决方案@Alexei Levenkov说它只需要在每个循环后增加指针12个字节并且根本不需要乘法。

unsigned int *data;
unsigned int i = 0;
unsigned int real[SIZE], imag[SIZE];

for (data = dataIn; data != dataIn + size; data += 3)
{
    // D1--------- D2--------- D3---------
    // R1 R2 R3 I1 I2 I3 R4 R5 R6 I4 I5 I6
    real[i] = data[0] >> 8;
    imag[i] = ((data[0] & 0xff) << 16) | (data[1] >> 16);
    real[i + 1] = ((data[1] & 0xffff) << 8) | (data[2] >> 24);
    imag[i + 1] = data[2] & 0xffffff;
    i++;
}

如果大小不是12的倍数，则在循环外的末尾将提取剩余的字节。

在C＃中有效地将24位整数数组转换为浮点数或双精度数

2 个答案: