我正在使用constant_tsc
和nonstop_tsc
$ grep -m 1 ^flags /proc/cpuinfo | sed 's/ /\n/g' | egrep "constant_tsc|nonstop_tsc"
constant_tsc
nonstop_tsc
第1步:计算tsc的滴答率:
我将_ticks_per_ns
计算为多次观察的中位数。我使用rdtscp
来确保按顺序执行。
static const int trials = 13;
std::array<double, trials> rates;
for (int i = 0; i < trials; ++i)
{
timespec beg_ts, end_ts;
uint64_t beg_tsc, end_tsc;
clock_gettime(CLOCK_MONOTONIC, &beg_ts);
beg_tsc = rdtscp();
uint64_t elapsed_ns;
do
{
clock_gettime(CLOCK_MONOTONIC, &end_ts);
end_tsc = rdtscp();
elapsed_ns = to_ns(end_ts - beg_ts); // calculates ns between two timespecs
}
while (elapsed_ns < 10 * 1e6); // busy spin for 10ms
rates[i] = (double)(end_tsc - beg_tsc) / (double)elapsed_ns;
}
std::nth_element(rates.begin(), rates.begin() + trials/2, rates.end());
_ticks_per_ns = rates[trials/2];
第2步:计算起始挂钟时间和tsc
uint64_t beg, end;
timespec ts;
// loop to ensure we aren't interrupted between the two tsc reads
while (1)
{
beg = rdtscp();
clock_gettime(CLOCK_REALTIME, &ts);
end = rdtscp();
if ((end - beg) <= 2000) // max ticks per clock call
break;
}
_start_tsc = end;
_start_clock_time = to_ns(ts); // converts timespec to ns since epoch
第3步:创建一个可以从tsc返回挂钟时间的函数
uint64_t tsc_to_ns(uint64_t tsc)
{
int64_t diff = tsc - _start_tsc;
return _start_clock_time + (diff / _ticks_per_ns);
}
第4步:在循环中运行,从clock_gettime
和rdtscp
打印挂钟时间
// lock the test to a single core
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(6, &mask);
sched_setaffinity(0, sizeof(cpu_set_t), &mask);
while (1)
{
timespec utc_now;
clock_gettime(CLOCK_REALTIME, &utc_now);
uint64_t utc_ns = to_ns(utc_now);
uint64_t tsc_ns = tsc_to_ns(rdtscp());
uint64_t ns_diff = tsc_ns - utc_ns;
std::cout << "clock_gettime " << ns_to_str(utc_ns) << '\n';
std::cout << "tsc_time " << ns_to_str(tsc_ns) << " diff=" << ns_diff << "ns\n";
sleep(10);
}
输出:
clock_gettime 11:55:34.824419837 tsc_time 11:55:34.824419840 diff=3ns clock_gettime 11:55:44.826260245 tsc_time 11:55:44.826260736 diff=491ns clock_gettime 11:55:54.826516358 tsc_time 11:55:54.826517248 diff=890ns clock_gettime 11:56:04.826683578 tsc_time 11:56:04.826684672 diff=1094ns clock_gettime 11:56:14.826853056 tsc_time 11:56:14.826854656 diff=1600ns clock_gettime 11:56:24.827013478 tsc_time 11:56:24.827015424 diff=1946ns
问题:
很明显,用这两种方式计算的时间会迅速分开。
我假设constant_tsc
和nonstop_tsc
表示tsc率是常数。
这是漂移的板载时钟吗?当然它不会以这个速度漂移吗?
这种漂移的原因是什么?
我可以做些什么来保持同步(除非经常在步骤2中重新计算_start_tsc
和_start_clock_time
)?
答案 0 :(得分:3)
OP中出现漂移的原因,至少在我的机器上,是每秒的TSC滴答偏离其原始值_ticks_per_ns
。以下结果来自这台机器:
don@HAL:~/UNIX/OS/3EZPcs/Ch06$ uname -a
Linux HAL 4.4.0-81-generic #104-Ubuntu SMP Wed Jun 14 08:17:06 UTC 2017 x86_64 x86_64 x86_64 GNU/Linux
don@HAL:~/UNIX/OS/3EZPcs/Ch06$ cat /sys/devices/system/clocksource/clocksource0/current_clocksource
tsc
cat /proc/cpuinfo
显示constant_tsc
和nonstop_tsc
个标记。
viewRates.cc来查看计算机上每ns的当前TSC Ticks:
rdtscp.h:
static inline unsigned long rdtscp_start(void) {
unsigned long var;
unsigned int hi, lo;
__asm volatile ("cpuid\n\t"
"rdtsc\n\t" : "=a" (lo), "=d" (hi)
:: "%rbx", "%rcx");
var = ((unsigned long)hi << 32) | lo;
return (var);
}
static inline unsigned long rdtscp_end(void) {
unsigned long var;
unsigned int hi, lo;
__asm volatile ("rdtscp\n\t"
"mov %%edx, %1\n\t"
"mov %%eax, %0\n\t"
"cpuid\n\t" : "=r" (lo), "=r" (hi)
:: "%rax", "%rbx", "%rcx", "%rdx");
var = ((unsigned long)hi << 32) | lo;
return (var);
}
/*see https://www.intel.com/content/www/us/en/embedded/training/ia-32-ia-64-benchmark-code-execution-paper.html
*/
viewRates.cc:
#include <time.h>
#include <unistd.h>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include "rdtscp.h"
using std::cout; using std::cerr; using std::endl;
#define CLOCK CLOCK_REALTIME
uint64_t to_ns(const timespec &ts); // Converts a struct timespec to ns (since epoch).
void view_ticks_per_ns(int runs =10, int sleep =10);
int main(int argc, char **argv) {
int runs = 10, sleep = 10;
if (argc != 1 && argc != 3) {
cerr << "Usage: " << argv[0] << " [ RUNS SLEEP ] \n";
exit(1);
} else if (argc == 3) {
runs = std::atoi(argv[1]);
sleep = std::atoi(argv[2]);
}
view_ticks_per_ns(runs, sleep);
}
void view_ticks_per_ns(int RUNS, int SLEEP) {
// Prints out stream of RUNS tsc ticks per ns, each calculated over a SLEEP secs interval.
timespec clock_start, clock_end;
unsigned long tsc1, tsc2, tsc_start, tsc_end;
unsigned long elapsed_ns, elapsed_ticks;
double rate; // ticks per ns from each run.
clock_getres(CLOCK, &clock_start);
cout << "Clock resolution: " << to_ns(clock_start) << "ns\n\n";
cout << " tsc ticks " << "ns " << " tsc ticks per ns\n";
for (int i = 0; i < RUNS; ++i) {
tsc1 = rdtscp_start();
clock_gettime(CLOCK, &clock_start);
tsc2 = rdtscp_end();
tsc_start = (tsc1 + tsc2) / 2;
sleep(SLEEP);
tsc1 = rdtscp_start();
clock_gettime(CLOCK, &clock_end);
tsc2 = rdtscp_end();
tsc_end = (tsc1 + tsc2) / 2;
elapsed_ticks = tsc_end - tsc_start;
elapsed_ns = to_ns(clock_end) - to_ns(clock_start);
rate = static_cast<double>(elapsed_ticks) / elapsed_ns;
cout << elapsed_ticks << " " << elapsed_ns << " " << std::setprecision(12) << rate << endl;
}
}
linearExtrapolator.cc可以运行以重新创建OP的实验:
linearExtrapolator.cc:
#include <time.h>
#include <unistd.h>
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <array>
#include "rdtscp.h"
using std::cout; using std::endl; using std::array;
#define CLOCK CLOCK_REALTIME
uint64_t to_ns(const timespec &ts); // Converts a struct timespec to ns (since epoch).
void set_ticks_per_ns(bool set_rate); // Display or set tsc ticks per ns, _ticks_per_ns.
void get_start(); // Sets the 'start' time point: _start_tsc[in ticks] and _start_clock_time[in ns].
uint64_t tsc_to_ns(uint64_t tsc); // Convert tsc ticks since _start_tsc to ns (since epoch) linearly using
// _ticks_per_ns with origin(0) at the 'start' point set by get_start().
uint64_t _start_tsc, _start_clock_time; // The 'start' time point as both tsc tick number, start_tsc, and as
// clock_gettime ns since epoch as _start_clock_time.
double _ticks_per_ns; // Calibrated in set_ticks_per_ns()
int main() {
set_ticks_per_ns(true); // Set _ticks_per_ns as the initial TSC ticks per ns.
uint64_t tsc1, tsc2, tsc_now, tsc_ns, utc_ns;
int64_t ns_diff;
bool first_pass{true};
for (int i = 0; i < 10; ++i) {
timespec utc_now;
if (first_pass) {
get_start(); //Get start time in both ns since epoch (_start_clock_time), and tsc tick number(_start_tsc)
cout << "_start_clock_time: " << _start_clock_time << ", _start_tsc: " << _start_tsc << endl;
utc_ns = _start_clock_time;
tsc_ns = tsc_to_ns(_start_tsc); // == _start_clock_time by definition.
tsc_now = _start_tsc;
first_pass = false;
} else {
tsc1 = rdtscp_start();
clock_gettime(CLOCK, &utc_now);
tsc2 = rdtscp_end();
tsc_now = (tsc1 + tsc2) / 2;
tsc_ns = tsc_to_ns(tsc_now);
utc_ns = to_ns(utc_now);
}
ns_diff = tsc_ns - (int64_t)utc_ns;
cout << "elapsed ns: " << utc_ns - _start_clock_time << ", elapsed ticks: " << tsc_now - _start_tsc
<< ", ns_diff: " << ns_diff << '\n' << endl;
set_ticks_per_ns(false); // Display current TSC ticks per ns (does not alter original _ticks_per_ns).
}
}
void set_ticks_per_ns(bool set_rate) {
constexpr int RUNS {1}, SLEEP{10};
timespec clock_start, clock_end;
uint64_t tsc1, tsc2, tsc_start, tsc_end;
uint64_t elapsed_ns[RUNS], elapsed_ticks[RUNS];
array<double, RUNS> rates; // ticks per ns from each run.
if (set_rate) {
clock_getres(CLOCK, &clock_start);
cout << "Clock resolution: " << to_ns(clock_start) << "ns\n";
}
for (int i = 0; i < RUNS; ++i) {
tsc1 = rdtscp_start();
clock_gettime(CLOCK, &clock_start);
tsc2 = rdtscp_end();
tsc_start = (tsc1 + tsc2) / 2;
sleep(SLEEP);
tsc1 = rdtscp_start();
clock_gettime(CLOCK, &clock_end);
tsc2 = rdtscp_end();
tsc_end = (tsc1 + tsc2) / 2;
elapsed_ticks[i] = tsc_end - tsc_start;
elapsed_ns[i] = to_ns(clock_end) - to_ns(clock_start);
rates[i] = static_cast<double>(elapsed_ticks[i]) / elapsed_ns[i];
}
cout << " tsc ticks " << "ns " << "tsc ticks per ns" << endl;
for (int i = 0; i < RUNS; ++i)
cout << elapsed_ticks[i] << " " << elapsed_ns[i] << " " << std::setprecision(12) << rates[i] << endl;
if (set_rate)
_ticks_per_ns = rates[RUNS-1];
}
constexpr uint64_t BILLION {1000000000};
uint64_t to_ns(const timespec &ts) {
return ts.tv_sec * BILLION + ts.tv_nsec;
}
void get_start() { // Get start time both in tsc ticks as _start_tsc, and in ns since epoch as _start_clock_time
timespec ts;
uint64_t beg, end;
// loop to ensure we aren't interrupted between the two tsc reads
while (1) {
beg = rdtscp_start();
clock_gettime(CLOCK, &ts);
end = rdtscp_end();
if ((end - beg) <= 2000) // max ticks per clock call
break;
}
_start_tsc = (end + beg) / 2;
_start_clock_time = to_ns(ts); // converts timespec to ns since epoch
}
uint64_t tsc_to_ns(uint64_t tsc) { // Convert tsc ticks into absolute ns:
// Absolute ns is defined by this linear extrapolation from the start point where
//_start_tsc[in ticks] corresponds to _start_clock_time[in ns].
uint64_t diff = tsc - _start_tsc;
return _start_clock_time + static_cast<uint64_t>(diff / _ticks_per_ns);
}
以下是viewRates
后续linearExtrapolator
:
don@HAL:~/UNIX/OS/3EZPcs/Ch06$ ./viewRates
Clock resolution: 1ns
tsc ticks ns tsc ticks per ns
28070466526 10000176697 2.8069970538
28070500272 10000194599 2.80699540335
28070489661 10000196097 2.80699392179
28070404159 10000170879 2.80699245029
28070464811 10000197285 2.80699110338
28070445753 10000195177 2.80698978932
28070430538 10000194298 2.80698851457
28070427907 10000197673 2.80698730414
28070409903 10000195492 2.80698611597
28070398177 10000195328 2.80698498942
don@HAL:~/UNIX/OS/3EZPcs/Ch06$ ./linearExtrapolator
Clock resolution: 1ns
tsc ticks ns tsc ticks per ns
28070385587 10000197480 2.8069831264
_start_clock_time: 1497966724156422794, _start_tsc: 4758879747559
elapsed ns: 0, elapsed ticks: 0, ns_diff: 0
tsc ticks ns tsc ticks per ns
28070364084 10000193633 2.80698205596
elapsed ns: 10000247486, elapsed ticks: 28070516229, ns_diff: -3465
tsc ticks ns tsc ticks per ns
28070358445 10000195130 2.80698107188
elapsed ns: 20000496849, elapsed ticks: 56141027929, ns_diff: -10419
tsc ticks ns tsc ticks per ns
28070350693 10000195646 2.80698015186
elapsed ns: 30000747550, elapsed ticks: 84211534141, ns_diff: -20667
tsc ticks ns tsc ticks per ns
28070324772 10000189692 2.80697923105
elapsed ns: 40000982325, elapsed ticks: 112281986547, ns_diff: -34158
tsc ticks ns tsc ticks per ns
28070340494 10000198352 2.80697837242
elapsed ns: 50001225563, elapsed ticks: 140352454025, ns_diff: -50742
tsc ticks ns tsc ticks per ns
28070325598 10000196057 2.80697752704
elapsed ns: 60001465937, elapsed ticks: 168422905017, ns_diff: -70335
^C
viewRates
输出显示每ns的TSC滴答数相当快地减少,其时间对应于上图中的一个急剧下降。与{1}}中一样,linearExtrapolator
输出显示clock_gettime()
报告的经过时间ns与使用_ticks_per_ns
=将经过的TSC刻度转换为经过的ns所获得的经过时间ns之间的差值在开始时获得= 2.8069831264。而不是在sleep(10);
,elapsed ns
,elapsed ticks
的每次打印之间ns_diff
,而是使用10s窗口重新运行每ns计算的TSC滴答;这会打印出当前的tsc ticks per ns
比率。可以看出,viewRates
输出中从linearExtrapolator
输出观察到的每ns的TSC滴答数减少的趋势在整个elapsed ticks
期间持续。
将_ticks_per_ns
除以elapsed ns
并减去相应的ns_diff
会得到_ticks_per_ns
,例如:(84211534141 / 2.8069831264) - 30000747550 = -20667。但这不是0,主要是由于每秒TSC滴答的漂移。如果我们使用从过去10s间隔获得的每ns的2.80698015186个滴答,则结果将是:(84211534141 / 2.80698015186) - 30000747550 = 11125.在过去10s间隔期间累积的额外误差,-20667 - -10419 = - 10248,当每个ns值的正确TSC滴答用于该间隔时几乎消失:(84211534141 - 56141027929)/ 2.80698015186 - (30000747550 - 20000496849)= 349。
如果在每个ns的TSC滴答数不变的情况下运行了linearExtrapolator,那么准确性将受到(常数)_ticks_per_ns
的确定程度的限制,然后它将付出代价例如,几个估计的中位数。如果ns_diff
以固定的十亿分之40的速度关闭,那么预计每10秒就会有大约400ns的漂移,因此#include <time.h>
#include <unistd.h>
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <array>
#include "rdtscp.h"
using std::cout; using std::cerr; using std::endl; using std::array;
double get_ticks_per_ns(long &ticks, long &ns); // Get median tsc ticks per ns, ticks and ns.
long ts_to_ns(const timespec &ts);
#define CLOCK CLOCK_REALTIME // clock_gettime() clock to use.
#define TIMESTEP 10
#define NSTEPS 10000
#define RUNS 5 // Number of RUNS and SLEEP interval used for each sample in get_ticks_per_ns().
#define SLEEP 1
int main() {
timespec ts;
clock_getres(CLOCK, &ts);
cerr << "CLOCK resolution: " << ts_to_ns(ts) << "ns\n";
clock_gettime(CLOCK, &ts);
int start_time = ts.tv_sec;
double ticks_per_ns;
int running_elapsed_time = 0; //approx secs since start_time to center of the sampling done by get_ticks_per_ns()
long ticks, ns;
for (int timestep = 0; timestep < NSTEPS; ++timestep) {
clock_gettime(CLOCK, &ts);
ticks_per_ns = get_ticks_per_ns(ticks, ns);
running_elapsed_time = ts.tv_sec - start_time + RUNS * SLEEP / 2;
cout << running_elapsed_time << ' ' << ticks << ' ' << ns << ' '
<< std::setprecision(12) << ticks_per_ns << endl;
sleep(10);
}
}
double get_ticks_per_ns(long &ticks, long &ns) {
// get the median over RUNS runs of elapsed tsc ticks, CLOCK ns, and their ratio over a SLEEP secs time interval
timespec clock_start, clock_end;
long tsc_start, tsc_end;
array<long, RUNS> elapsed_ns, elapsed_ticks;
array<double, RUNS> rates; // arrays from each run from which to get medians.
for (int i = 0; i < RUNS; ++i) {
clock_gettime(CLOCK, &clock_start);
tsc_start = rdtscp_end(); // minimizes time between clock_start and tsc_start.
sleep(SLEEP);
clock_gettime(CLOCK, &clock_end);
tsc_end = rdtscp_end();
elapsed_ticks[i] = tsc_end - tsc_start;
elapsed_ns[i] = ts_to_ns(clock_end) - ts_to_ns(clock_start);
rates[i] = static_cast<double>(elapsed_ticks[i]) / elapsed_ns[i];
}
// get medians:
std::nth_element(elapsed_ns.begin(), elapsed_ns.begin() + RUNS/2, elapsed_ns.end());
std::nth_element(elapsed_ticks.begin(), elapsed_ticks.begin() + RUNS/2, elapsed_ticks.end());
std::nth_element(rates.begin(), rates.begin() + RUNS/2, rates.end());
ticks = elapsed_ticks[RUNS/2];
ns = elapsed_ns[RUNS/2];
return rates[RUNS/2];
}
constexpr long BILLION {1000000000};
long ts_to_ns(const timespec &ts) {
return ts.tv_sec * BILLION + ts.tv_nsec;
}
每10秒会增长/缩小400次。
genTimeSeriesofRates.cc可用于生成上述图表的数据: genTimeSeriesofRates.cc:
<pre>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char** argv) {
char szBuffer[128];
char * psz = getenv("LD_LIBRARY_PATH");
printf("Initial LD_LIBRARY_PATH=%s\n",psz);
putenv("LD_LIBRARY_PATH=/tmp/somedir1");
psz = getenv("LD_LIBRARY_PATH");
sprintf(szBuffer, "%s:tmp/somedir2",psz);
printf("LD_LIBRARY_PATH=%s\n",psz);
setenv("LD_LIBRARY_PATH", szBuffer,1);
psz = getenv("LD_LIBRARY_PATH");
sprintf(szBuffer, "%s:tmp/somedir3",psz);
printf("LD_LIBRARY_PATH=%s\n",psz);
setenv("LD_LIBRARY_PATH", szBuffer,1);
psz = getenv("LD_LIBRARY_PATH");
printf("LD_LIBRARY_PATH=%s\n",psz);
return 0;
}
</pre>
output:
<pre>
Initial LD_LIBRARY_PATH=/tmp/dir1
LD_LIBRARY_PATH=/tmp/somedir1
LD_LIBRARY_PATH=/tmp/somedir1:tmp/somedir2
LD_LIBRARY_PATH=/tmp/somedir1:tmp/somedir2:tmp/somedir3
</pre>
答案 1 :(得分:0)
这是漂移的机载时钟吗?当然它不会以这个速度漂移吗? 不,他们不应该漂移
这种漂移的原因是什么?
NTP服务或运行您的操作系统的类似服务。它们会影响clock_gettime(CLOCK_REALTIME,...);
我能做些什么来让它们保持同步(除非经常在步骤2中重新计算_start_tsc和_start_clock_time)? 是的,你可以缓解这个问题。
1您可以尝试使用CLOCK_MONOTONIC而不是CLOCK_REALTIME。
2您可以根据时间将差异计算为线性函数,并应用它来补偿漂移。但它不会非常可靠,因为时间服务不会将时间调整为线性函数。但它会给你一些更准确的。您可以定期进行重新调整。
你可以得到一些漂移,因为你没有准确地计算_ticks_per_ns。您可以通过多次运行程序来检查它。如果结果不可重现,则表示您错误地计算了_ticks_per_ns。最好使用统计方法,然后只使用平均值。
另请注意,_ticks_per_ns是使用与TSC相关的CLOCK_MONOTONIC计算的。
接下来,您正在使用CLOCK_REALTIME。它提供系统时间。如果您的系统具有NTP或类似服务,则会调整时间。
你的差异大约是每分钟2微秒。每天0.002 * 24 * 60 = 2.9毫秒。它对CPU时钟来说非常准确。每天3毫秒,每年1秒。
答案 2 :(得分:0)
TSC与类似CLOCK_MONOTONIC
之类的关系不会完全不变。即使您根据CLOCK_MONOTONIC
“校准” TSC,校准也几乎会在完成后立即过期!
他们长期无法保持同步的原因:
CLOCK_MONOTONIC
受NTP时钟速率调整的影响。 NTP将不断检查网络时间,并巧妙地减慢或加快系统时钟的速度以匹配网络时间。这会导致在真实的CLOCK_MONOTONIC
频率下产生某种振荡模式,因此您的校准总是会稍微偏离一点,尤其是下次NTP应用速率调整时。您可以将其与CLOCK_MONOTONIC_RAW
进行比较以消除这种影响。CLOCK_MONOTONIC
和TSC几乎可以肯定是基于完全不同的底层振荡器。经常说现代操作系统使用TSC进行计时,但这只是对其他一些底层慢速时钟应用一个小的“本地”偏移量以提供非常精确的时间(例如,“慢速时间”可能在每个计时器滴答声中进行更新,然后使用TSC在计时器滴答声之间进行插值)。底层时钟(例如HPET或APIC时钟)决定了CLOCK_MONOTONIC
的长期行为。但是,TSC本身是一个独立的自由运行时钟,它的时钟频率来自另一个振荡器,位于芯片组/主板的不同位置,并且会产生不同的自然波动(特别是对温度变化的响应不同)。这是上面两个中的最基本的(2):这意味着即使没有任何NTP调整(或者如果使用不受其限制的时钟),您也会看到随时间推移的漂移如果基础时钟基于不同的物理振荡器。