C中空指针的计时行为

时间:2014-08-09 09:17:42

标签: c

以下内容主要针对Windows 7上的Microsoft CL版本17.00.50727.1进行了测试,但我看到类似于g ++的内容。我很确定逻辑功能是正确的。问题只是时间问题。

基本上我有一个函数可以根据需要动态返回新的“数据块”。如果它的“页面”中的空间不足,则会生成一个新页面。

块的目的是匹配传入的数据键。如果找到钥匙,这很好。如果不是,则将新数据密钥添加到块中。如果块空间不足,则创建一个新块并链接到旧块。

无论块生成函数是否将新块中的“下一个”指针显式设置为NULL,代码都有效。理论上,calloc()已经将内容设置为0。

第一个奇怪的事情是,当“下一个”指针显式设置为NULL时,块制作函数需要大约5倍(!)的时间才能运行。然而,那就完成了,那么整个示例的时序就像预期的那样:匹配新密钥所需的线性时间越长,密钥列表中的条目就越多。当添加一个导致获取新块的键时,唯一的区别就出现了。这样做的开销类似于调用块制作函数所花费的时间。

唯一的问题是块制作功能慢得令人无法接受。

当指针未显式设置为NULL时,块制作功能变得美观而快速 - 可能是键匹配功能的一半到四分之一,而不是很长甚至更长。

然后,键匹配功能开始表现出奇怪的时序行为。它主要与键的数量线性增加。它仍然以16和32键跳转(因为列表长度为16)。但它在0号键上也有一个很大的跳跃,它在17,33等键上有很大的跳跃。

当程序首先要查看“下一个”指针时,这些是关键数字。显然,需要很长时间才能发现calloc的0值实际上是一个NULL指针?一旦知道了这一点,下一次就会更快。

第二个奇怪的是,如果数据结构只包含密钥,则效果会消失。现在,无论“下一个”指针是否显式设置为NULL,跳转到0,17,33等都会消失。但是当“int unused [4]”也在struct中时,效果会返回。

也许编译器(使用选项/ O2或使用-O3 for g ++)在结构由单个数字组成时优化结构?但我仍然不明白为什么会以这种方式影响时序行为。

我试图尽可能地从实际代码中简化示例,但我很抱歉它仍然很长。但这并不复杂。

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>

void timer_start(int n);
void timer_end(int n);
void print_times();

// There are pages of blocks, and data entries per block.
// We don't know ahead of time how many there will be.
// GetNextBlock() returns new blocks, and if necessary
// makes new pages.  MatchOrStore() goes through data in
// a block to try to match the key.  It won't ever match 
// in this example, so the function makes a new data entry.


struct dataType
{
  // Surprise number 1:  If the line with "unused" is
  // commented out, things behave as expected, even if
  // Surprise number 2 is in effect.
  int           unused[4];
  int           key;
};

#define DATA_PER_BLOCK  16

struct blockType
{
  char          nextEntryNo;
  struct dataType   list[DATA_PER_BLOCK];
  struct blockType  * next;
};

struct pageType
{
  int           nextBlockNo;
  struct blockType  * list;
  struct pageType   * next;
  struct pageType   * prev;
};


struct blockType * GetNextBlock();

void MatchOrStore(
  struct dataType   * dp,
  struct blockType  * bp);

struct pageType     * pagep;


int main(int argc, char * argv[])
{
  pagep = (struct pageType *) 0;
  struct dataType data;

  for (int j = 0; j < 50000; j++)
  {
    struct blockType * blockp = GetNextBlock();
    // Make different keys each time.
    for (data.key = 0; data.key < 40; data.key++)
    {
      // One timer per key number, useful for statistics.
      timer_start(data.key);
      MatchOrStore(&data, blockp);
      timer_end(data.key);
    }
  }

  print_times();
  exit(0);
}


#define BLOCKS_PER_PAGE 5000

struct blockType * GetNextBlock()
{
  if (pagep == NULL || 
      pagep->nextBlockNo == BLOCKS_PER_PAGE)
  {
    // If this runs out of page space, it makes some more.
    struct pageType * newpagep = (struct pageType *)
      calloc(1, sizeof(struct pageType));

    newpagep->list = (struct blockType *)
      calloc(BLOCKS_PER_PAGE, sizeof(struct blockType));

    // I never actually free this, but you get the idea.
    newpagep->nextBlockNo = 0;
    newpagep->next        = NULL;
    newpagep->prev        = pagep;

    if (pagep)
      pagep->next = newpagep;
    pagep = newpagep;
  }

  struct blockType * bp = &pagep->list[ pagep->nextBlockNo++ ];

  // Surprise number 2:  If this line is active, then the
  // timing behaves as expected.  If it is commented out,
  // then presumably calloc still sets next to NULL.
  // But the timing changes in an unexpected way.
  // bp->next = (struct blockType *) 0;

  return bp;
}


void MatchOrStore(
  struct dataType   * dp,
  struct blockType  * blockp)
{
  struct blockType * bp = blockp;

  while (1)
  {
    for (int i = 0; i < bp->nextEntryNo; i++)
    {
      // This will spend some time traversing the list of
      // blocks, failing to find the key, because that's
      // the way I've set up the data for this example.
      if (bp->list[i].key != dp->key) continue;
      // It will never match.
      return;
    }
    if (! bp->next) break;
    bp = bp->next;
  }

  if (bp->nextEntryNo == DATA_PER_BLOCK)
  {
    // Once in a while it will run out of space, so it
    // will make a new block and add it to the list.
    timer_start(99);
    struct blockType * bptemp = GetNextBlock();
    bp->next = bptemp;
    bp       = bptemp;
    timer_end(99);
  }

  // Since it didn't find the key, it will store the key
  // in the list here.
  bp->list[ bp->nextEntryNo++ ].key = dp->key;
}

#define NUM_TIMERS 100

#ifdef _WIN32
  #include <time.h>
  LARGE_INTEGER 
    tu0[NUM_TIMERS], 
    tu1[NUM_TIMERS];
#else
  #include <sys/time.h>
  struct timeval
    tu0[NUM_TIMERS], 
    tu1[NUM_TIMERS];
#endif

int ctu[NUM_TIMERS],
    number[NUM_TIMERS];


void timer_start(int no)
{
  number[no]++;
#ifdef _WIN32
  QueryPerformanceCounter(&tu0[no]);
#else
  gettimeofday(&tu0[no], NULL);
#endif
}


void timer_end(int no)
{
#ifdef _WIN32
  QueryPerformanceCounter(&tu1[no]);
  ctu[no] += (tu1[no].QuadPart - tu0[no].QuadPart);
#else
  gettimeofday(&tu1[no], NULL);
  ctu[no] += 1000000 * (tu1[no].tv_sec  - tu0[no].tv_sec )
              +        (tu1[no].tv_usec - tu0[no].tv_usec);
#endif
}


void print_times()
{
  printf("%5s  %10s  %10s  %8s\n",
    "n", "Number", "User ticks", "Avg");

  for (int n = 0; n < NUM_TIMERS; n++)
  {
    if (number[n] == 0)
      continue;

    printf("%5d  %10d  %10d  %8.2f\n",
      n, 
      number[n],
      ctu[n],
      ctu[n] / (double) number[n]);
  }
}

0 个答案:

没有答案