TSC MSR和序列化

时间:2015-08-01 15:54:07

标签: x86-64

关于MSR IA32_TIME_STAMP_COUNTER(10h): 它遵循哪种序列化规则? rdtsc或rdtscp?还是其他?

如果没有序列化,我应该提供一个cpuid"屏障"在进行任何数学计算之前?

- 修改 -

到目前为止,我已经实施了两种障碍:cpuid和fences。

使用 cpuid

#define RDCOUNTER(_val,  _cnt)                      \
asm volatile                                        \
(                                                   \
    "xorq   %%rax, %%rax    \n\t"                   \
    "cpuid                  \n\t"                   \
    "movq   %1, %%rcx       \n\t"                   \
    "rdmsr                  \n\t"                   \
    "push   %%rax           \n\t"                   \
    "push   %%rdx           \n\t"                   \
    "xorq   %%rax, %%rax    \n\t"                   \
    "cpuid                  \n\t"                   \
    "pop    %%rdx           \n\t"                   \
    "pop    %%rax           \n\t"                   \
    "shlq   $32, %%rdx      \n\t"                   \
    "orq    %%rdx, %%rax    \n\t"                   \
    "movq   %%rax, %0"                              \
    : "=m" (_val)                                   \
    : "i" (_cnt)                                    \
    : "%rax", "%rbx", "%rcx", "%rdx", "memory"      \
)

使用 fence

#define RDCOUNTER(_val,  _cnt)                      \
asm volatile                                        \
(                                                   \
    "movq   %1, %%rcx       \n\t"                   \
    "mfence                 \n\t"                   \
    "rdmsr                  \n\t"                   \
    "mfence                 \n\t"                   \
    "shlq   $32, %%rdx      \n\t"                   \
    "orq    %%rdx, %%rax    \n\t"                   \
    "movq   %%rax, %0"                              \
    : "=m" (_val)                                   \
    : "i" (_cnt)                                    \
    : "%rax", "%rbx", "%rcx", "%rdx", "memory"      \
)

我项目的部分内容是尝试估算处理器的外部时钟频率(FSB或BCLK)。

  • 算法分配结构化内存的数组,以读取和测量时间戳计数器的增量。
  • 此内存块被分配为驻留在处理器缓存中。
  • 与BSP建立cpu亲和性,调度程序和中断在计算时暂停。
  • 完成TSC读取的几个循环以强制缓存驻留;并且大多数相同结果被声明为最佳频率。

我期望在几次运行后获得恒定的频率。

不幸的是,无论是否使用屏障指令,我仍然存在差异。

结果相当封闭,过去时间至少为3位小数,但永远不会

(这是在Core 2和Core i7上测试的)

DECLARE_COMPLETION(bclk_job_complete);

typedef struct {
    unsigned long long V[2], D;
} TSC_STRUCT;

#define OCCURENCES 32
signed int Compute_Clock(void *arg)
{
CLOCK *clock=(CLOCK *) arg;
unsigned int ratio=clock->Q;
unsigned long long overhead=0;
struct kmem_cache *hardwareCache=kmem_cache_create(
            "IntelClockCache",
            OCCURENCES * sizeof(TSC_STRUCT), 0,
            SLAB_HWCACHE_ALIGN, NULL);
TSC_STRUCT *TSC=kmem_cache_alloc(hardwareCache, GFP_KERNEL);
unsigned int loop=0, best=0, top=0;

// No preemption, no interrupt.
unsigned long flags;
preempt_disable();
raw_local_irq_save(flags);
// Warm-up
RDCOUNTER(TSC[loop].V[0], MSR_IA32_TSC);
RDCOUNTER(TSC[loop].V[1], MSR_IA32_TSC);
// Overhead
RDCOUNTER(TSC[loop].V[0], MSR_IA32_TSC);
RDCOUNTER(TSC[loop].V[1], MSR_IA32_TSC);
overhead=TSC[loop].V[1] - TSC[loop].V[0];
// Pick-up
for(loop=0; loop < OCCURENCES; loop++)
{
    RDCOUNTER(TSC[loop].V[0], MSR_IA32_TSC);
    udelay(100);
    RDCOUNTER(TSC[loop].V[1], MSR_IA32_TSC);
}
// Restore interrupt and preemption.
raw_local_irq_restore(flags);
preempt_enable();

for(loop=0; loop < OCCURENCES; loop++)
    TSC[loop].D=TSC[loop].V[1] - TSC[loop].V[0] - overhead;
for(loop=0; loop < OCCURENCES; loop++) {
    unsigned int inner=0, count=0;
    for(inner=loop; inner < OCCURENCES; inner++)
        if(TSC[loop].D == TSC[inner].D)
            count++;
    if((count > top)
    ||((count == top) && (TSC[loop].D < TSC[best].D))) {
        top=count;
        best=loop;
    }
/*  printk("%3u x D[%02u]=%llu\t%llu - %llu\n",
    count, loop, TSC[loop].D, TSC[loop].V[1], TSC[loop].V[0]); */
}
printk("Overhead=%llu\tBest=%llu\n", overhead, TSC[best].D);

clock->Q=TSC[best].D / (ratio * PRECISION);
clock->R=TSC[best].D % (ratio * PRECISION);

kmem_cache_free(hardwareCache, TSC);
kmem_cache_destroy(hardwareCache);

complete_and_exit(&bclk_job_complete, 0);
}

0 个答案:

没有答案