Question

在C ++中，我想编写一个类似于脚本语言的应用程序：
在“设置时间”期间的一些输入中，它将定义一个大的全局数组，其中每个变量将被定位，并且在不同的数组上定义要调用的函数序列（“LogicElement”）（包括它们的参数，如要使用的变量）

一种实现可能如下所示：

class LogicElement_Generic
{
public:
  virtual void calc() const = 0;
};

class LogicElement_Mul : public LogicElement_Generic
{
  int &to;
  const int &from1;
  const int &from2;

public:
  LogicElement_Mul( int &_to, const int &_from1, const int &_from2 ) : to(_to), from1(_from1), from2(_from2)
  {}

  void calc() const
  {
    to = from1 * from2;
  }
};

char globalVariableBuffer[1000]; // a simple binary buffer
LogicElement_Generic *le[10];

int main( void )
{
  // just a demo, this would be setup from e.g. an input file:
  int *to    = (int*)globalVariableBuffer;
  int *from1 = (int*)(globalVariableBuffer + sizeof(int));
  int *from2 = (int*)(globalVariableBuffer + 2*sizeof(int));

  *from1 = 2;
  *from2 = 3;

  le[0] = new LogicElement_Mul( *to, *from1, *from2 );

  // doing all calculations:
  // finally it would be a loop iterating over all calculation functions,
  // over and over again - the area in the code where all the resources
  // would be burned...
  le[0]->calc();

  return *to;
}

尽管按预期工作，但查看创建的程序集：

  78                    .section    .text._ZNK16LogicElement_Mul4calcEv,"axG",@progbits,_ZNK16LogicElement_Mul4calcEv,comdat
  79                    .align 2
  80                    .weak   _ZNK16LogicElement_Mul4calcEv
  82                _ZNK16LogicElement_Mul4calcEv:
  83                .LFB6:
  17:.../src/test.cpp ****   void calc() const
  84                    .loc 1 17 0
  85                    .cfi_startproc
  86 0000 55            pushq   %rbp
  87                .LCFI6:
  88                    .cfi_def_cfa_offset 16
  89                    .cfi_offset 6, -16
  90 0001 4889E5        movq    %rsp, %rbp
  91                .LCFI7:
  92                    .cfi_def_cfa_register 6
  93 0004 48897DF8      movq    %rdi, -8(%rbp)
  18:.../src/test.cpp ****   {
  19:.../src/test.cpp ****     to = from1 * from2;
  94                    .loc 1 19 0
  95 0008 488B45F8      movq    -8(%rbp), %rax
  96 000c 488B4008      movq    8(%rax), %rax
  97 0010 488B55F8      movq    -8(%rbp), %rdx
  98 0014 488B5210      movq    16(%rdx), %rdx
  99 0018 8B0A          movl    (%rdx), %ecx
 100 001a 488B55F8      movq    -8(%rbp), %rdx
 101 001e 488B5218      movq    24(%rdx), %rdx
 102 0022 8B12          movl    (%rdx), %edx
 103 0024 0FAFD1        imull   %ecx, %edx
 104 0027 8910          movl    %edx, (%rax)
  20:.../src/test.cpp ****   }
 105                    .loc 1 20 0
 106 0029 5D            popq    %rbp
 107                .LCFI8:
 108                    .cfi_def_cfa 7, 8
 109 002a C3            ret
 110                    .cfi_endproc

查看装配线95 .. 104，您可以看到每个变量使用三个间接值。

由于这部分代码（calc（）方法）最终会被非常快速地调用，我希望尽可能使用最少的CPU周期和内存带宽（通用C / C ++）。

我还想实现（在上面的代码中没有显示）有两个具有完全相同布局的变量缓冲区，以便能够在多线程方法中进行双缓冲以限制必要的锁定（具体的实现细节也是如此）这个问题的详细信息）。

所以最重要的问题是：

如何更改架构以减少calc（）中的内存间接量？
（我希望只有两个：一个用于获取变量数组中的偏移地址，另外一个用于获取变量本身 - 但我的实验更改上面的代码以使用偏移量使事情变得更糟！）
有没有更好的方法来设置类，从而设置LogicElements数组，以便调用计算方法将使用最少量的资源？

Answer 1

感谢@Ed S.的提示，我改变了引用（我希望编译器可以更好地优化）。

但我做的更重要的一步是比较激活优化后生成的程序集（只是一个简单的-O2确实这样做了。）（我一开始并没有这样做，因为我想对生成的“纯”机器代码有一个更清晰的画面，而不是智能编译器修复一个愚蠢的程序员 - 但似乎编译器太“愚蠢”了。 ..）

所以现在变量数组的结果非常好：

class LogicElement_Generic
{
public:
  virtual void calc(void * const base) const = 0;
};

class LogicElement_Mul : public LogicElement_Generic
{
  int const to;
  int const from1;
  int const from2;

public:
  LogicElement_Mul( int const _to, int const _from1, int const _from2 ) : to(_to), from1(_from1), from2(_from2)
  {}

  void calc(void * const base) const
  {
    *((int*)(base+to)) = *((int*)(base+from1)) * *((int*)(base+from2));
  }
};

char globalVariableBuffer[1000]; // a simple binary buffer
LogicElement_Generic *le[10];

int main( void )
{
  int to    = 0;
  int from1 = sizeof(int);
  int from2 = 2*sizeof(int);

  *((int*)(globalVariableBuffer+from1)) = 2;
  *((int*)(globalVariableBuffer+from2)) = 3;

  le[0] = new LogicElement_Mul( to, from1, from2 );
  le[0]->calc(globalVariableBuffer);

  return *((int*)(globalVariableBuffer+to));
}

与大会的相关部分：

  17:.../src/test.cpp ****   void calc(void * const base) const
  12                    .loc 1 17 0
  13                    .cfi_startproc
  14                .LVL0:
  18:.../src/test.cpp ****   {
  19:.../src/test.cpp ****     *((int*)(base+to)) = *((int*)(base+from1)) * *((int*)(base+from2));
  15                    .loc 1 19 0
  16 0000 4863470C      movslq  12(%rdi), %rax
  17 0004 48634F10      movslq  16(%rdi), %rcx
  18 0008 48635708      movslq  8(%rdi), %rdx
  19 000c 8B0406        movl    (%rsi,%rax), %eax
  20 000f 0FAF040E      imull   (%rsi,%rcx), %eax
  21 0013 890416        movl    %eax, (%rsi,%rdx)
  20:.../src/test.cpp ****   }
  22                    .loc 1 20 0
  23 0016 C3            ret
  24                    .cfi_endproc

所以我回答了第一个问题！：）

第二个仍然是开放的（现在更多，因为指针算法可能是有效的C ++ - 但非常难看......）

用于脚本应用程序的高效数据结构？

1 个答案: