Userland中断定时器访问,例如通过KeQueryInterruptTime(或类似)

时间:2011-11-21 12:30:23

标签: windows winapi timer

是否存在“Nt”或类似(即非内核模式驱动程序)函数等效于KeQueryInterruptTime或类似的东西?似乎没有NtQueryInterruptTime这样的东西,至少我没有找到它。

我想要的是某种合理准确和可靠的单调定时器(因此 QPC),它具有相当的效率,并且没有作为溢出的32位计数器的意外,并且没有必要“聪明”,没有时区或复杂的结构。

理想情况下,我想要timeGetTime之类的64位值。它甚至不必是相同的计时器 存在GetTickCount64从Vista开始,这是可以接受的,但我不想仅仅因为这样一个愚蠢的原因而打破XP支持。

0x7FFE0008处读取四字,如图所示here ......好吧,工作 ......它证明了实际的内部计数器在XP下确实是64位(它的速度也尽可能快,但是......但是......我们不要谈论读取一些未知的,硬编码的内存位置是多么令人讨厌的黑客。

在调用人为惊愕(将64位计数器缩减到32位)高级API函数和读取原始内存地址之间肯定存在某种关系吗?

3 个答案:

答案 0 :(得分:3)

这是一个GetTickCount()的线程安全包装器的示例,它将tick计数值扩展到64位,并且相当于GetTickCount64()。

为避免意外的反转,请务必每49.7天调用此功能几次。你甚至可以拥有一个专用线程,其唯一目的是调用这个函数,然后在无限循环中睡20天。

ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= GetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp == curCount1)
  {
    return curCount2;
  }
  else
  {
    return tmp;
  }
}

编辑:这是一个测试MyGetTickCount64()的完整应用程序。

// Compiled with Open Watcom C 1.9: wcl386.exe /we /wx /q gettick.c

#include <windows.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>

//
// The below code is an ugly implementation of InterlockedCompareExchange64()
// that is apparently missing in Open Watcom C 1.9.
// It must work with MSVC++ too, however.
//
UINT8 Cmpxchg8bData[] =
{
  0x55,             // push      ebp
  0x89, 0xE5,       // mov       ebp, esp
  0x57,             // push      edi
  0x51,             // push      ecx
  0x53,             // push      ebx
  0x8B, 0x7D, 0x10, // mov       edi, [ebp + 0x10]
  0x8B, 0x07,       // mov       eax, [edi]
  0x8B, 0x57, 0x04, // mov       edx, [edi + 0x4]
  0x8B, 0x7D, 0x0C, // mov       edi, [ebp + 0xc]
  0x8B, 0x1F,       // mov       ebx, [edi]
  0x8B, 0x4F, 0x04, // mov       ecx, [edi + 0x4]
  0x8B, 0x7D, 0x08, // mov       edi, [ebp + 0x8]
  0xF0,             // lock:
  0x0F, 0xC7, 0x0F, // cmpxchg8b [edi]
  0x5B,             // pop       ebx
  0x59,             // pop       ecx
  0x5F,             // pop       edi
  0x5D,             // pop       ebp
  0xC3              // ret
};

LONGLONG (__cdecl *Cmpxchg8b)(LONGLONG volatile* Dest, LONGLONG* Exch, LONGLONG* Comp) =
  (LONGLONG (__cdecl *)(LONGLONG volatile*, LONGLONG*, LONGLONG*))Cmpxchg8bData;

LONGLONG MyInterlockedCompareExchange64(LONGLONG volatile* Destination,
                                        LONGLONG Exchange,
                                        LONGLONG Comparand)
{
  return Cmpxchg8b(Destination, &Exchange, &Comparand);
}

#ifdef InterlockedCompareExchange64
#undef InterlockedCompareExchange64
#endif

#define InterlockedCompareExchange64(Destination, Exchange, Comparand) \
  MyInterlockedCompareExchange64(Destination, Exchange, Comparand)

//
// This stuff makes a thread-safe printf().
// We don't want characters output by one thread to be mixed
// with characters output by another. We want printf() to be
// "atomic".
// We use a critical section around vprintf() to achieve "atomicity".
//
static CRITICAL_SECTION PrintfCriticalSection;

int ts_printf(const char* Format, ...)
{
  int count;
  va_list ap;

  EnterCriticalSection(&PrintfCriticalSection);

  va_start(ap, Format);
  count = vprintf(Format, ap);
  va_end(ap);

  LeaveCriticalSection(&PrintfCriticalSection);

  return count;
}

#define TICK_COUNT_10MS_INCREMENT 0x800000

//
// This is the simulated tick counter.
// Its low 32 bits are going to be returned by
// our, simulated, GetTickCount().
//
// TICK_COUNT_10MS_INCREMENT is what the counter is
// incremented by every time. The value is so chosen
// that the counter quickly overflows in its
// low 32 bits.
//
static volatile LONGLONG SimulatedTickCount = 0;

//
// This is our simulated 32-bit GetTickCount()
// that returns a count that often overflows.
//
ULONG SimulatedGetTickCount(void)
{
  return (ULONG)SimulatedTickCount;
}

//
// This thread function will increment the simulated tick counter
// whose value's low 32 bits we'll be reading in SimulatedGetTickCount().
//
DWORD WINAPI SimulatedTickThread(LPVOID lpParameter)
{
  UNREFERENCED_PARAMETER(lpParameter);

  for (;;)
  {
    LONGLONG c;

    Sleep(10);

    // Get the counter value, add TICK_COUNT_10MS_INCREMENT to it and
    // store the result back.
    c = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);
    InterlockedCompareExchange64(&SimulatedTickCount, c + TICK_COUNT_10MS_INCREMENT, c) != c);
  }

  return 0;
}

volatile LONG CountOfObserved32bitOverflows = 0;
volatile LONG CountOfObservedUpdateRaces = 0;

//
// This prints statistics that includes the true 64-bit value of
// SimulatedTickCount that we can't get from SimulatedGetTickCount() as it
// returns only its lower 32 bits.
//
// The stats also include:
// - the number of times that MyGetTickCount64() observes an overflow of
//   SimulatedGetTickCount()
// - the number of times MyGetTickCount64() fails to update its internal
//   counter because of a concurrent update in another thread.
//
void PrintStats(void)
{
  LONGLONG true64bitCounter = InterlockedCompareExchange64(&SimulatedTickCount, 0, 0);

  ts_printf("  0x%08X`%08X <- true 64-bit count; ovfs: ~%d; races: %d\n",
            (ULONG)(true64bitCounter >> 32),
            (ULONG)true64bitCounter,
            CountOfObserved32bitOverflows,
            CountOfObservedUpdateRaces);
}

//
// This is our poor man's implementation of GetTickCount64()
// on top of GetTickCount().
//
// It's thread safe.
//
// When used with actual GetTickCount() instead of SimulatedGetTickCount()
// it must be called at least a few times in 49.7 days to ensure that
// it doesn't miss any overflows in GetTickCount()'s return value.
//
ULONGLONG MyGetTickCount64(void)
{
  static volatile LONGLONG Count = 0;
  LONGLONG curCount1, curCount2;
  LONGLONG tmp;

  curCount1 = InterlockedCompareExchange64(&Count, 0, 0);

  curCount2 = curCount1 & 0xFFFFFFFF00000000;
  curCount2 |= SimulatedGetTickCount();

  if ((ULONG)curCount2 < (ULONG)curCount1)
  {
    curCount2 += 0x100000000;

    InterlockedIncrement(&CountOfObserved32bitOverflows);
  }

  tmp = InterlockedCompareExchange64(&Count, curCount2, curCount1);

  if (tmp != curCount1)
  {
    curCount2 = tmp;

    InterlockedIncrement(&CountOfObservedUpdateRaces);
  }

  return curCount2;
}

//
// This is an error counter. If a thread that uses MyGetTickCount64() notices
// any problem with what MyGetTickCount64() returns, it bumps up this error
// counter and stops. If one of threads sees a non-zero value in this
// counter due to an error in another thread, it stops as well.
//
volatile LONG Error = 0;

//
// This is a thread function that will be using MyGetTickCount64(),
// validating its return value and printing some stats once in a while.
//
// This function is meant to execute concurrently in multiple threads
// to create race conditions inside of MyGetTickCount64() and test it.
//
DWORD WINAPI TickUserThread(LPVOID lpParameter)
{
  DWORD user = (DWORD)lpParameter; // thread number
  ULONGLONG ticks[4];

  ticks[3] = ticks[2] = ticks[1] = MyGetTickCount64();

  while (!Error)
  {
    ticks[0] = ticks[1];
    ticks[1] = MyGetTickCount64();

    // Every ~100 ms sleep a little (slightly lowers CPU load, to about 90%)
    if (ticks[1] > ticks[2] + TICK_COUNT_10MS_INCREMENT * 10L)
    {
      ticks[2] = ticks[1];
      Sleep(1 + rand() % 20);
    }

    // Every ~1000 ms print the last value from MyGetTickCount64().
    // Thread 1 also prints stats here.
    if (ticks[1] > ticks[3] + TICK_COUNT_10MS_INCREMENT * 100L)
    {
      ticks[3] = ticks[1];
      ts_printf("%u:0x%08X`%08X\n", user, (ULONG)(ticks[1] >> 32), (ULONG)ticks[1]);

      if (user == 1)
      {
        PrintStats();
      }
    }

    if (ticks[0] > ticks[1])
    {
      ts_printf("%u:Non-monotonic tick counts: 0x%016llX > 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }
    else if (ticks[0] + 0x100000000 <= ticks[1])
    {
      ts_printf("%u:Too big tick count jump: 0x%016llX -> 0x%016llX!\n",
                user,
                ticks[0],
                ticks[1]);
      PrintStats();
      InterlockedIncrement(&Error);
      return -1;
    }

    Sleep(0); // be nice, yield to other threads.
  }

  return 0;
}

//
// This prints stats upon Ctrl+C and terminates the program.
//
BOOL WINAPI ConsoleEventHandler(DWORD Event)
{
  if (Event == CTRL_C_EVENT)
  {
    PrintStats();
  }

  return FALSE;
}

int main(void)
{
  HANDLE simulatedTickThreadHandle;
  HANDLE tickUserThreadHandle;
  DWORD dummy;

  // This is for the missing InterlockedCompareExchange64() workaround.
  VirtualProtect(Cmpxchg8bData, sizeof(Cmpxchg8bData), PAGE_EXECUTE_READWRITE, &dummy);

  InitializeCriticalSection(&PrintfCriticalSection);

  if (!SetConsoleCtrlHandler(&ConsoleEventHandler, TRUE))
  {
    ts_printf("SetConsoleCtrlHandler(&ConsoleEventHandler) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start the tick simulator thread.

  simulatedTickThreadHandle = CreateThread(NULL, 0, &SimulatedTickThread, NULL, 0, NULL);

  if (simulatedTickThreadHandle == NULL)
  {
    ts_printf("CreateThread(&SimulatedTickThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // Start one thread that'll be using MyGetTickCount64().

  tickUserThreadHandle = CreateThread(NULL, 0, &TickUserThread, (LPVOID)2, 0, NULL);
  if (tickUserThreadHandle == NULL)
  {
    ts_printf("CreateThread(&TickUserThread) failed with error 0x%X\n", GetLastError());
    return -1;
  }

  // The other thread using MyGetTickCount64() will be the main thread.

  TickUserThread((LPVOID)1);

  //
  // The app terminates upon any error condition detected in TickUserThread()
  // in any of the threads or by Ctrl+C.
  //

  return 0;
}

作为一项测试我在Windows XP下运行此测试应用程序5个小时以上的空闲机器有2个CPU(空闲,以避免潜在的长时间饥饿,因此避免每5秒钟丢失一次计数器溢出并且它仍然表现良好。

这是控制台的最新输出:

2:0x00000E1B`C8800000
1:0x00000E1B`FA800000
  0x00000E1B`FA800000 <- true 64-bit count; ovfs: ~3824; races: 110858

正如您所看到的,MyGetTickCount64()已经观察到3824个32位溢出,并且无法使用其第二个Count 110858次更新InterlockedCompareExchange64()的值。因此,溢出确实发生,最后一个数字意味着该变量实际上是由两个线程同时更新的。

您还可以看到两个线程在MyGetTickCount64()中从TickUserThread()接收的64位滴答计数在前32位中没有任何缺失,并且非常接近实际的64位SimulatedTickCount中的位滴答计数,其{32}的低位由SimulatedGetTickCount()返回。由于线程调度和不频繁的统计打印,0x00000E1BC8800000在视觉上落后于0x00000E1BFA800000,它落后于正好100 * TICK_COUNT_10MS_INCREMENT或1秒。当然,在内部,差异要小得多。

现在,InterlockedCompareExchange64()的可用性......有点奇怪,它是officially available since Windows Vista and Windows Server 2003。事实上,Server 2003是使用与Windows XP相同的代码库构建的。

但最重要的是,这个函数是建立在Pentium CMPXCHG8B指令之上的,该指令自1998年或之前的(1)(2)开始提供。我可以在Windows XP的(SP3)二进制文件中看到这条指令。它在 ntkrnlpa.exe / ntoskrnl.exe (内核)和 ntdll.dll (导出内核的 NtXxxx()函数的DLL中,一切都是建立在。)查找 0xF0,0x0F,0xC7 的字节序列,并反汇编该地方的代码,看看这些字节是不是巧合。

您可以通过CPUID指令(CPUID函数0x00000001的EDX位8和函数0x80000001)检查该指令的可用性,如果指令不在那里,则拒绝运行而不是崩溃,但是现在你这样做了不太可能找到不支持此指令的机器。如果你这样做,它对于Windows XP和你的应用程序来说都不是一台好机器。

答案 1 :(得分:1)

这是另一种方法,Alex的包装器的变体,但仅使用32位互锁。它实际上只返回一个60位的数字,但这仍然有利于大约三千六百万年。 : - )

确实需要更频繁地调用,至少每三天调用一次。这不应该是一个主要的缺点。

ULONGLONG MyTickCount64(void)
{
    static volatile DWORD count = 0xFFFFFFFF;
    DWORD previous_count, current_tick32, previous_count_zone, current_tick32_zone;
    ULONGLONG current_tick64;

    previous_count = InterlockedCompareExchange(&count, 0, 0);
    current_tick32 = GetTickCount();

    if (previous_count == 0xFFFFFFFF)
    {
        // count has never been written
        DWORD initial_count;
        initial_count = current_tick32 >> 28;
        previous_count = InterlockedCompareExchange(&count, initial_count, 0xFFFFFFFF);

        if (previous_count == 0xFFFFFFFF)
        {   // This thread wrote the initial value for count
            previous_count = initial_count;
        }
        else if (previous_count != initial_count)
        {   // Another thread wrote the initial value for count,
            // and it differs from the one we calculated
            current_tick32 = GetTickCount();
        }
    }

    previous_count_zone = previous_count & 15;
    current_tick32_zone = current_tick32 >> 28;

    if (current_tick32_zone == previous_count_zone)
    {
        // The top four bits of the 32-bit tick count haven't changed since count was last written.
        current_tick64 = previous_count;
        current_tick64 <<= 28;
        current_tick64 += current_tick32 & 0x0FFFFFFF;
        return current_tick64;
    }

    if (current_tick32_zone == previous_count_zone + 1 || (current_tick32_zone == 0 && previous_count_zone == 15))
    {
        // The top four bits of the 32-bit tick count have been incremented since count was last written.
        InterlockedCompareExchange(&count, previous_count + 1, previous_count);
        current_tick64 = previous_count + 1;
        current_tick64 <<= 28;
        current_tick64 += current_tick32 & 0x0FFFFFFF;
        return current_tick64;
    }

    // Oops, we weren't called often enough, we're stuck
    return 0xFFFFFFFF;
}

答案 2 :(得分:1)

感谢Google Books免费提供相关文献,我想出了一个简单快速的GetTickCount64实现,它在Vista之前的系统上运行得非常好(并且它仍然比从硬编码的内存地址读取值。)

事实上,调用中断0x2A很容易,因为中断0x2A映射到KiGetTickCount。在GCC内联汇编中,这给出了:

static __inline__ __attribute__((always_inline)) unsigned long long get_tick_count64()
{
    unsigned long long ret;
    __asm__ __volatile__ ("int $0x2a" : "=A"(ret) : : );
    return ret;
}

由于KiGetTickCount的工作方式,函数应该更好地称为GetTickCount46,因为它执行右移18,返回46位,而不是64位。虽然对于原版Vista版本。

请注意KiGetTickCount clobbers edx,如果您计划实施自己更快的32位版本实现(在这种情况下必须将edx添加到clobber列表中),这是相关的!)。