1
/* Copyright (C) 2008, 2009 Sun Microsystems, Inc
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
17
rdtsc3 -- multi-platform timer code
18
pgulutzan@mysql.com, 2005-08-29
23
my_timer_cycles ulonglong cycles
24
my_timer_nanoseconds ulonglong nanoseconds
25
my_timer_microseconds ulonglong "microseconds"
26
my_timer_milliseconds ulonglong milliseconds
27
my_timer_ticks ulonglong ticks
28
my_timer_init initialization / test
30
We'll call the first 5 functions (the ones that return
31
a ulonglong) "my_timer_xxx" functions.
32
Each my_timer_xxx function returns a 64-bit timing value
33
since an arbitrary 'epoch' start. Since the only purpose
34
is to determine elapsed times, wall-clock time-of-day
35
is not known and not relevant.
37
The my_timer_init function is necessary for initializing.
38
It returns information (underlying routine name,
39
frequency, resolution, overhead) about all my_timer_xxx
40
functions. A program should call my_timer_init once,
41
use the information to decide what my_timer_xxx function
42
to use, and subsequently call that function by function
45
A typical use would be:
46
my_timer_init() ... once, at program start
48
time1= my_timer_xxx() ... time before start
50
time2= my_timer_xxx() ... time after end
51
elapsed_time= (time2 - time1) - overhead
64
#if TIME_WITH_SYS_TIME
66
#include <time.h> /* for clock_gettime */
68
#ifdef HAVE_SYS_TIME_H
70
#elif defined(HAVE_TIME_H)
76
#if defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
77
#include <asm/msr.h> /* for rdtscll */
80
#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
81
#include <sys/timeb.h> /* for ftime */
84
#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
85
#include <sys/times.h> /* for times */
88
#if defined(__NETWARE__)
89
#include <nks/time.h> /* for NXGetTime */
92
#if defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
93
#include <ia64intrin.h> /* for __GetReg */
96
#if defined(__APPLE__) && defined(__MACH__)
97
#include <mach/mach_time.h>
100
#if defined(__SUNPRO_CC) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
101
extern "C" ulonglong my_timer_cycles_il_sparc64();
102
#elif defined(__SUNPRO_CC) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
103
extern "C" ulonglong my_timer_cycles_il_sparc32();
104
#elif defined(__SUNPRO_CC) && defined(__i386) && defined(_ILP32)
105
extern "C" ulonglong my_timer_cycles_il_i386();
106
#elif defined(__SUNPRO_CC) && defined(__x86_64) && defined(_LP64)
107
extern "C" ulonglong my_timer_cycles_il_x86_64();
108
#elif defined(__SUNPRO_C) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
109
ulonglong my_timer_cycles_il_sparc64();
110
#elif defined(__SUNPRO_C) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
111
ulonglong my_timer_cycles_il_sparc32();
112
#elif defined(__SUNPRO_C) && defined(__i386) && defined(_ILP32)
113
ulonglong my_timer_cycles_il_i386();
114
#elif defined(__SUNPRO_C) && defined(__x86_64) && defined(_LP64)
115
ulonglong my_timer_cycles_il_x86_64();
118
#if defined(__INTEL_COMPILER)
120
icc warning #1011 is:
121
missing return statement at end of non-void function
123
#pragma warning (disable:1011)
127
For cycles, we depend on RDTSC for x86 platforms,
128
or on time buffer (which is not really a cycle count
129
but a separate counter with less than nanosecond
130
resolution) for most PowerPC platforms, or on
131
gethrtime which is okay for hpux and solaris, or on
132
clock_gettime(CLOCK_SGI_CYCLE) for Irix platforms,
133
or on read_real_time for aix platforms. There is
134
nothing for Alpha platforms, they would be tricky.
137
ulonglong my_timer_cycles(void)
139
#if defined(__GNUC__) && defined(__i386__)
140
/* This works much better if compiled with "gcc -O3". */
142
__asm__ __volatile__ ("rdtsc" : "=A" (result));
144
#elif defined(__SUNPRO_C) && defined(__i386)
146
#elif defined(__GNUC__) && defined(__x86_64__)
148
__asm__ __volatile__ ("rdtsc\n\t" \
149
"shlq $32,%%rdx\n\t" \
151
: "=a" (result) :: "%edx");
153
#elif defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
159
#elif defined(_WIN32) && defined(_M_IX86)
161
#elif defined(_WIN64) && defined(_M_X64)
162
/* For 64-bit Windows: unsigned __int64 __rdtsc(); */
164
#elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
165
return (ulonglong) __getReg(_IA64_REG_AR_ITC); /* (3116) */
166
#elif defined(__GNUC__) && defined(__ia64__)
169
__asm __volatile__ ("mov %0=ar.itc" : "=r" (result));
172
#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (defined(__64BIT__) || defined(_ARCH_PPC64))
175
__asm __volatile__ ("mftb %0" : "=r" (result));
178
#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
181
mftbu means "move from time-buffer-upper to result".
182
The loop is saying: x1=upper, x2=lower, x3=upper,
183
if x1!=x3 there was an overflow so repeat.
185
unsigned int x1, x2, x3;
189
__asm __volatile__ ( "mftbu %0" : "=r"(x1) );
190
__asm __volatile__ ( "mftb %0" : "=r"(x2) );
191
__asm __volatile__ ( "mftbu %0" : "=r"(x3) );
195
return ( result << 32 ) | x2;
197
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
198
return (my_timer_cycles_il_sparc64());
199
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
200
return (my_timer_cycles_il_sparc32());
201
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
202
/* This is probably redundant for __SUNPRO_C. */
203
return (my_timer_cycles_il_i386());
204
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
205
return (my_timer_cycles_il_x86_64());
206
#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64) && (__GNUC__>2)
209
__asm __volatile__ ("rd %%tick,%0" : "=r" (result));
212
#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64) && (__GNUC__>2)
215
ulonglong wholeresult;
221
__asm __volatile__ ("rd %%tick,%1; srlx %1,32,%0" : "=r" (result.splitresult.high), "=r" (result.splitresult.low));
222
return result.wholeresult;
224
#elif defined(__sgi) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE)
227
clock_gettime(CLOCK_SGI_CYCLE, &tp);
228
return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
230
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
231
/* gethrtime may appear as either cycle or nanosecond counter */
232
return (ulonglong) gethrtime();
238
#if defined(__INTEL_COMPILER)
239
/* re-enable warning#1011 which was only for my_timer_cycles() */
240
/* There may be an icc bug which means we must leave disabled. */
241
#pragma warning (default:1011)
245
For nanoseconds, most platforms have nothing available that
246
(a) doesn't require bringing in a 40-kb librt.so library
247
(b) really has nanosecond resolution.
250
ulonglong my_timer_nanoseconds(void)
252
#if defined(HAVE_READ_REAL_TIME)
255
read_real_time(&tr, TIMEBASE_SZ);
256
return (ulonglong) tr.tb_high * 1000000000 + (ulonglong) tr.tb_low;
258
#elif defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_REALTIME)
261
clock_gettime(CLOCK_REALTIME, &tp);
262
return (ulonglong) tp.tv_sec * 1000000000 + (ulonglong) tp.tv_nsec;
264
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
265
/* SunOS 5.10+, Solaris, HP-UX: hrtime_t gethrtime(void) */
266
return (ulonglong) gethrtime();
267
#elif defined(__NETWARE__)
270
NXGetTime(NX_SINCE_1970, NX_NSECONDS, &tm);
271
return (ulonglong) tm;
273
#elif defined(__APPLE__) && defined(__MACH__)
276
static mach_timebase_info_data_t timebase_info= {0,0};
277
if (timebase_info.denom == 0)
278
(void) mach_timebase_info(&timebase_info);
279
tm= mach_absolute_time();
280
return (tm * timebase_info.numer) / timebase_info.denom;
288
For microseconds, gettimeofday() is available on
289
almost all platforms. On Windows we use
290
QueryPerformanceCounter which will usually tick over
291
3.5 million times per second, and we don't throw
292
away the extra precision. (On Windows Server 2003
293
the frequency is same as the cycle frequency.)
296
ulonglong my_timer_microseconds(void)
298
#if defined(HAVE_GETTIMEOFDAY)
300
static ulonglong last_value= 0;
302
if (gettimeofday(&tv, NULL) == 0)
303
last_value= (ulonglong) tv.tv_sec * 1000000 + (ulonglong) tv.tv_usec;
307
There are reports that gettimeofday(2) can have intermittent failures
308
on some platform, see for example Bug#36819.
309
We are not trying again or looping, just returning the best value possible
310
under the circumstances ...
316
#elif defined(_WIN32)
318
/* QueryPerformanceCounter usually works with about 1/3 microsecond. */
321
QueryPerformanceCounter(&t_cnt);
322
return (ulonglong) t_cnt.QuadPart;
324
#elif defined(__NETWARE__)
327
NXGetTime(NX_SINCE_1970, NX_USECONDS, &tm);
328
return (ulonglong) tm;
336
For milliseconds, we use ftime() if it's supported
337
or time()*1000 if it's not. With modern versions of
338
Windows and with HP Itanium, resolution is 10-15
342
ulonglong my_timer_milliseconds(void)
344
#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
345
/* ftime() is obsolete but maybe the platform is old */
348
return (ulonglong)ft.time * 1000 + (ulonglong)ft.millitm;
349
#elif defined(HAVE_TIME)
350
return (ulonglong) time(NULL) * 1000;
351
#elif defined(_WIN32)
353
GetSystemTimeAsFileTime( &ft );
354
return ((ulonglong)ft.dwLowDateTime +
355
(((ulonglong)ft.dwHighDateTime) << 32))/10000;
356
#elif defined(__NETWARE__)
359
NXGetTime(NX_SINCE_1970, NX_MSECONDS, &tm);
360
return (ulonglong)tm;
368
For ticks, which we handle with times(), the frequency
369
is usually 100/second and the overhead is surprisingly
370
bad, sometimes even worse than gettimeofday's overhead.
373
ulonglong my_timer_ticks(void)
375
#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
377
struct tms times_buf;
378
return (ulonglong) times(×_buf);
380
#elif defined(__NETWARE__)
383
NXGetTime(NX_SINCE_BOOT, NX_TICKS, &tm);
384
return (ulonglong) tm;
386
#elif defined(_WIN32)
387
return (ulonglong) GetTickCount();
394
The my_timer_init() function and its sub-functions
395
have several loops which call timers. If there's
396
something wrong with a timer -- which has never
397
happened in tests -- we want the loop to end after
398
an arbitrary number of iterations, and my_timer_info
399
will show a discouraging result. The arbitrary
402
#define MY_TIMER_ITERATIONS 1000000
405
Calculate overhead. Called from my_timer_init().
406
Usually best_timer_overhead = cycles_overhead or
407
nanoseconds_overhead, so returned amount is in
408
cycles or nanoseconds. We repeat the calculation
409
ten times, so that we can disregard effects of
410
caching or interrupts. Result is quite consistent
411
for cycles, at least. But remember it's a minimum.
414
static void my_timer_init_overhead(ulonglong *overhead,
415
ulonglong (*cycle_timer)(void),
416
ulonglong (*this_timer)(void),
417
ulonglong best_timer_overhead)
419
ulonglong time1, time2;
422
/* *overhead, least of 20 calculations - cycles_overhead */
423
for (i= 0, *overhead= 1000000000; i < 20; ++i)
425
time1= cycle_timer();
426
this_timer(); /* rather than 'time_tmp= timer();' */
427
time2= cycle_timer() - time1;
428
if (*overhead > time2)
431
*overhead-= best_timer_overhead;
435
Calculate Resolution. Called from my_timer_init().
436
If a timer goes up by jumps, e.g. 1050, 1075, 1100, ...
437
then the best resolution is the minimum jump, e.g. 25.
438
If it's always divisible by 1000 then it's just a
439
result of multiplication of a lower-precision timer
440
result, e.g. nanoseconds are often microseconds * 1000.
441
If the minimum jump is less than an arbitrary passed
442
figure (a guess based on maximum overhead * 2), ignore.
443
Usually we end up with nanoseconds = 1 because it's too
444
hard to detect anything <= 100 nanoseconds.
445
Often GetTickCount() has resolution = 15.
446
We don't check with ticks because they take too long.
448
static ulonglong my_timer_init_resolution(ulonglong (*this_timer)(void),
449
ulonglong overhead_times_2)
451
ulonglong time1, time2;
453
int i, jumps, divisible_by_1000, divisible_by_1000000;
455
divisible_by_1000= divisible_by_1000000= 0;
457
for (i= jumps= 0; jumps < 3 && i < MY_TIMER_ITERATIONS * 10; ++i)
468
if (!(time2 % 1000000))
469
++divisible_by_1000000;
471
if (best_jump > time2)
473
/* For milliseconds, one jump is enough. */
474
if (overhead_times_2 == 0)
480
if (jumps == divisible_by_1000000)
482
if (jumps == divisible_by_1000)
485
if (best_jump > overhead_times_2)
491
Calculate cycle frequency by seeing how many cycles pass
492
in a 200-microsecond period. I tried with 10-microsecond
493
periods originally, and the result was often very wrong.
496
static ulonglong my_timer_init_frequency(MY_TIMER_INFO *mti)
499
ulonglong time1, time2, time3, time4;
500
time1= my_timer_cycles();
501
time2= my_timer_microseconds();
502
time3= time2; /* Avoids a Microsoft/IBM compiler warning */
503
for (i= 0; i < MY_TIMER_ITERATIONS; ++i)
505
time3= my_timer_microseconds();
506
if (time3 - time2 > 200) break;
508
time4= my_timer_cycles() - mti->cycles_overhead;
509
time4-= mti->microseconds_overhead;
510
return (mti->microseconds_frequency * (time4 - time1)) / (time3 - time2);
514
Call my_timer_init before the first call to my_timer_xxx().
515
If something must be initialized, it happens here.
516
Set: what routine is being used e.g. "asm_x86"
517
Set: function, overhead, actual frequency, resolution.
520
void my_timer_init(MY_TIMER_INFO *mti)
522
ulonglong (*best_timer)(void);
523
ulonglong best_timer_overhead;
524
ulonglong time1, time2;
528
mti->cycles_frequency= 1000000000;
529
#if defined(__GNUC__) && defined(__i386__)
530
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_X86;
531
#elif defined(__SUNPRO_C) && defined(__i386)
532
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_X86;
533
#elif defined(__GNUC__) && defined(__x86_64__)
534
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_X86_64;
535
#elif defined(HAVE_ASM_MSR_H) && defined(HAVE_RDTSCLL)
536
mti->cycles_routine= MY_TIMER_ROUTINE_RDTSCLL;
537
#elif defined(_WIN32) && defined(_M_IX86)
538
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_X86_WIN;
539
#elif defined(_WIN64) && defined(_M_X64)
540
mti->cycles_routine= MY_TIMER_ROUTINE_RDTSC;
541
#elif defined(__INTEL_COMPILER) && defined(__ia64__) && defined(HAVE_IA64INTRIN_H)
542
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_IA64;
543
#elif defined(__GNUC__) && defined(__ia64__)
544
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_IA64;
545
#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (defined(__64BIT__) || defined(_ARCH_PPC64))
546
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_PPC64;
547
#elif defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__) || (defined(_POWER) && defined(_AIX52))) && (!defined(__64BIT__) && !defined(_ARCH_PPC64))
548
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_PPC;
549
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv9) && defined(_LP64) && !defined(__SunOS_5_7)
550
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC64;
551
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__sparcv8plus) && defined(_ILP32) && !defined(__SunOS_5_7)
552
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_SUNPRO_SPARC32;
553
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__i386) && defined(_ILP32)
554
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_SUNPRO_I386;
555
#elif (defined(__SUNPRO_CC) || defined(__SUNPRO_C)) && defined(__x86_64) && defined(_LP64)
556
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_SUNPRO_X86_64;
557
#elif defined(__GNUC__) && defined(__sparcv9) && defined(_LP64) && (__GNUC__>2)
558
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC64;
559
#elif defined(__GNUC__) && defined(__sparc__) && !defined(_LP64) && (__GNUC__>2)
560
mti->cycles_routine= MY_TIMER_ROUTINE_ASM_GCC_SPARC32;
561
#elif defined(__sgi) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE)
562
mti->cycles_routine= MY_TIMER_ROUTINE_SGI_CYCLE;
563
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
564
mti->cycles_routine= MY_TIMER_ROUTINE_GETHRTIME;
566
mti->cycles_routine= 0;
569
if (!mti->cycles_routine || !my_timer_cycles())
571
mti->cycles_routine= 0;
572
mti->cycles_resolution= 0;
573
mti->cycles_frequency= 0;
574
mti->cycles_overhead= 0;
578
mti->nanoseconds_frequency= 1000000000; /* initial assumption */
579
#if defined(HAVE_READ_REAL_TIME)
580
mti->nanoseconds_routine= MY_TIMER_ROUTINE_READ_REAL_TIME;
581
#elif defined(HAVE_CLOCK_GETTIME)
582
mti->nanoseconds_routine= MY_TIMER_ROUTINE_CLOCK_GETTIME;
583
#elif defined(HAVE_SYS_TIMES_H) && defined(HAVE_GETHRTIME)
584
mti->nanoseconds_routine= MY_TIMER_ROUTINE_GETHRTIME;
585
#elif defined(__NETWARE__)
586
mti->nanoseconds_routine= MY_TIMER_ROUTINE_NXGETTIME;
587
#elif defined(__APPLE__) && defined(__MACH__)
588
mti->nanoseconds_routine= MY_TIMER_ROUTINE_MACH_ABSOLUTE_TIME;
590
mti->nanoseconds_routine= 0;
592
if (!mti->nanoseconds_routine || !my_timer_nanoseconds())
594
mti->nanoseconds_routine= 0;
595
mti->nanoseconds_resolution= 0;
596
mti->nanoseconds_frequency= 0;
597
mti->nanoseconds_overhead= 0;
601
mti->microseconds_frequency= 1000000; /* initial assumption */
602
#if defined(HAVE_GETTIMEOFDAY)
603
mti->microseconds_routine= MY_TIMER_ROUTINE_GETTIMEOFDAY;
604
#elif defined(_WIN32)
607
/* Windows: typical frequency = 3579545, actually 1/3 microsecond. */
608
if (!QueryPerformanceFrequency(&li))
609
mti->microseconds_routine= 0;
612
mti->microseconds_frequency= li.QuadPart;
613
mti->microseconds_routine= MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER;
616
#elif defined(__NETWARE__)
617
mti->microseconds_routine= MY_TIMER_ROUTINE_NXGETTIME;
619
mti->microseconds_routine= 0;
621
if (!mti->microseconds_routine || !my_timer_microseconds())
623
mti->microseconds_routine= 0;
624
mti->microseconds_resolution= 0;
625
mti->microseconds_frequency= 0;
626
mti->microseconds_overhead= 0;
630
mti->milliseconds_frequency= 1000; /* initial assumption */
631
#if defined(HAVE_SYS_TIMEB_H) && defined(HAVE_FTIME)
632
mti->milliseconds_routine= MY_TIMER_ROUTINE_FTIME;
633
#elif defined(_WIN32)
634
mti->milliseconds_routine= MY_TIMER_ROUTINE_GETSYSTEMTIMEASFILETIME;
635
#elif defined(__NETWARE__)
636
mti->milliseconds_routine= MY_TIMER_ROUTINE_NXGETTIME;
637
#elif defined(HAVE_TIME)
638
mti->milliseconds_routine= MY_TIMER_ROUTINE_TIME;
640
mti->milliseconds_routine= 0;
642
if (!mti->milliseconds_routine || !my_timer_milliseconds())
644
mti->milliseconds_routine= 0;
645
mti->milliseconds_resolution= 0;
646
mti->milliseconds_frequency= 0;
647
mti->milliseconds_overhead= 0;
651
mti->ticks_frequency= 100; /* permanent assumption */
652
#if defined(HAVE_SYS_TIMES_H) && defined(HAVE_TIMES)
653
mti->ticks_routine= MY_TIMER_ROUTINE_TIMES;
654
#elif defined(__NETWARE__)
655
mti->ticks_routine= MY_TIMER_ROUTINE_NXGETTIME;
656
#elif defined(_WIN32)
657
mti->ticks_routine= MY_TIMER_ROUTINE_GETTICKCOUNT;
659
mti->ticks_routine= 0;
661
if (!mti->ticks_routine || !my_timer_ticks())
663
mti->ticks_routine= 0;
664
mti->ticks_resolution= 0;
665
mti->ticks_frequency= 0;
666
mti->ticks_overhead= 0;
670
Calculate overhead in terms of the timer that
671
gives the best resolution: cycles or nanoseconds.
672
I doubt it ever will be as bad as microseconds.
674
if (mti->cycles_routine)
675
best_timer= &my_timer_cycles;
678
if (mti->nanoseconds_routine)
680
best_timer= &my_timer_nanoseconds;
683
best_timer= &my_timer_microseconds;
686
/* best_timer_overhead = least of 20 calculations */
687
for (i= 0, best_timer_overhead= 1000000000; i < 20; ++i)
690
time2= best_timer() - time1;
691
if (best_timer_overhead > time2)
692
best_timer_overhead= time2;
694
if (mti->cycles_routine)
695
my_timer_init_overhead(&mti->cycles_overhead,
698
best_timer_overhead);
699
if (mti->nanoseconds_routine)
700
my_timer_init_overhead(&mti->nanoseconds_overhead,
702
&my_timer_nanoseconds,
703
best_timer_overhead);
704
if (mti->microseconds_routine)
705
my_timer_init_overhead(&mti->microseconds_overhead,
707
&my_timer_microseconds,
708
best_timer_overhead);
709
if (mti->milliseconds_routine)
710
my_timer_init_overhead(&mti->milliseconds_overhead,
712
&my_timer_milliseconds,
713
best_timer_overhead);
714
if (mti->ticks_routine)
715
my_timer_init_overhead(&mti->ticks_overhead,
718
best_timer_overhead);
721
Calculate resolution for nanoseconds or microseconds
722
or milliseconds, by seeing if it's always divisible
723
by 1000, and by noticing how much jumping occurs.
724
For ticks, just assume the resolution is 1.
726
if (mti->cycles_routine)
727
mti->cycles_resolution= 1;
728
if (mti->nanoseconds_routine)
729
mti->nanoseconds_resolution=
730
my_timer_init_resolution(&my_timer_nanoseconds, 20000);
731
if (mti->microseconds_routine)
732
mti->microseconds_resolution=
733
my_timer_init_resolution(&my_timer_microseconds, 20);
734
if (mti->milliseconds_routine)
736
if (mti->milliseconds_routine == MY_TIMER_ROUTINE_TIME)
737
mti->milliseconds_resolution= 1000;
739
mti->milliseconds_resolution=
740
my_timer_init_resolution(&my_timer_milliseconds, 0);
742
if (mti->ticks_routine)
743
mti->ticks_resolution= 1;
746
Calculate cycles frequency,
747
if we have both a cycles routine and a microseconds routine.
748
In tests, this usually results in a figure within 2% of
749
what "cat /proc/cpuinfo" says.
750
If the microseconds routine is QueryPerformanceCounter
751
(i.e. it's Windows), and the microseconds frequency is >
752
500,000,000 (i.e. it's Windows Server so it uses RDTSC)
753
and the microseconds resolution is > 100 (i.e. dreadful),
754
then calculate cycles frequency = microseconds frequency.
756
if (mti->cycles_routine
757
&& mti->microseconds_routine)
759
if (mti->microseconds_routine ==
760
MY_TIMER_ROUTINE_QUERYPERFORMANCECOUNTER
761
&& mti->microseconds_frequency > 500000000
762
&& mti->microseconds_resolution > 100)
763
mti->cycles_frequency= mti->microseconds_frequency;
766
ulonglong time1, time2;
767
time1= my_timer_init_frequency(mti);
768
/* Repeat once in case there was an interruption. */
769
time2= my_timer_init_frequency(mti);
770
if (time1 < time2) mti->cycles_frequency= time1;
771
else mti->cycles_frequency= time2;
776
Calculate milliseconds frequency =
777
(cycles-frequency/#-of-cycles) * #-of-milliseconds,
778
if we have both a milliseconds routine and a cycles
780
This will be inaccurate if milliseconds resolution > 1.
781
This is probably only useful when testing new platforms.
783
if (mti->milliseconds_routine
784
&& mti->milliseconds_resolution < 1000
785
&& mti->microseconds_routine
786
&& mti->cycles_routine)
789
ulonglong time1, time2, time3, time4;
790
time1= my_timer_cycles();
791
time2= my_timer_milliseconds();
792
time3= time2; /* Avoids a Microsoft/IBM compiler warning */
793
for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
795
time3= my_timer_milliseconds();
796
if (time3 - time2 > 10) break;
798
time4= my_timer_cycles();
799
mti->milliseconds_frequency=
800
(mti->cycles_frequency * (time3 - time2)) / (time4 - time1);
804
Calculate ticks_frequency =
805
(cycles-frequency/#-of-cycles * #-of-ticks,
806
if we have both a ticks routine and a cycles
808
This is probably only useful when testing new platforms.
810
if (mti->ticks_routine
811
&& mti->microseconds_routine
812
&& mti->cycles_routine)
815
ulonglong time1, time2, time3, time4;
816
time1= my_timer_cycles();
817
time2= my_timer_ticks();
818
time3= time2; /* Avoids a Microsoft/IBM compiler warning */
819
for (i= 0; i < MY_TIMER_ITERATIONS * 1000; ++i)
821
time3= my_timer_ticks();
822
if (time3 - time2 > 10) break;
824
time4= my_timer_cycles();
825
mti->ticks_frequency=
826
(mti->cycles_frequency * (time3 - time2)) / (time4 - time1);
834
This is for timing, i.e. finding out how long a piece of code
835
takes. If you want time of day matching a wall clock, the
836
my_timer_xxx functions won't help you.
838
The best timer is the one with highest frequency, lowest
839
overhead, and resolution=1. The my_timer_info() routine will tell
840
you at runtime which timer that is. Usually it will be
841
my_timer_cycles() but be aware that, although it's best,
842
it has possible flaws and dangers. Depending on platform:
843
- The frequency might change. We don't test for this. It
844
happens on laptops for power saving, and on blade servers
845
for avoiding overheating.
846
- The overhead that my_timer_init() returns is the minimum.
847
In fact it could be slightly greater because of caching or
848
because you call the routine by address, as recommended.
849
It could be hugely greater if there's an interrupt.
850
- The x86 cycle counter, RDTSC doesn't "serialize". That is,
851
if there is out-of-order execution, rdtsc might be processed
852
after an instruction that logically follows it.
853
(We could force serialization, but that would be slower.)
854
- It is possible to set a flag which renders RDTSC
855
inoperative. Somebody responsible for the kernel
856
of the operating system would have to make this
857
decision. For the platforms we've tested with, there's
859
- With a multi-processor arrangement, it's possible
860
to get the cycle count from one processor in
861
thread X, and the cycle count from another processor
862
in thread Y. They may not always be in synch.
863
- You can't depend on a cycle counter being available for
864
all platforms. On Alphas, the
865
cycle counter is only 32-bit, so it would overflow quickly,
866
so we don't bother with it. On platforms that we haven't
867
tested, there might be some if/endif combination that we
868
didn't expect, or some assembler routine that we didn't
871
The recommended way to use the timer routines is:
872
1. Somewhere near the beginning of the program, call
873
my_timer_init(). This should only be necessary once,
874
although you can call it again if you think that the
875
frequency has changed.
876
2. Determine the best timer based on frequency, resolution,
877
overhead -- all things that my_timer_init() returns.
878
Preserve the address of the timer and the my_timer_into
879
results in an easily-accessible place.
880
3. Instrument the code section that you're monitoring, thus:
881
time1= my_timer_xxx();
883
time2= my_timer_xxx();
884
elapsed_time= (time2 - time1) - overhead;
885
If the timer is always on, then overhead is always there,
886
so don't subtract it.
887
4. Save the elapsed time, or add it to a totaller.
888
5. When all timing processes are complete, transfer the
889
saved / totalled elapsed time to permanent storage.
890
Optionally you can convert cycles to microseconds at
891
this point. (Don't do so every time you calculate
892
elapsed_time! That would waste time and lose precision!)
893
For converting cycles to microseconds, use the frequency
894
that my_timer_init() returns. You'll also need to convert
895
if the my_timer_microseconds() function is the Windows
896
function QueryPerformanceCounter(), since that's sometimes
897
a counter with precision slightly better than microseconds.
899
Since we recommend calls by function pointer, we supply
902
Some comments on the many candidate routines for timing ...
904
clock() -- We don't use because it would overflow frequently.
906
clock_gettime() -- Often we don't use this even when it exists.
907
In configure.in, we use AC_CHECK_FUNCS(clock_gettime). Not
908
AC_CHECK_LIB(rc,clock_gettime)
909
AC_CHECK_FUNCS(clock_gettime)
910
If we had the above lines in configure.in, we'd have to use
911
/usr/lib/librt.so or /usr/lib64/librt.so when linking, and
912
the size of librt.so is 40KB. In tests, clock_gettime often
913
had resolution = 1000.
915
ftime() -- A "man ftime" says: "This function is obsolete.
916
Don't use it." On every platform that we tested, if ftime()
917
was available, then so was gettimeofday(), and gettimeofday()
918
overhead was always at least as good as ftime() overhead.
920
gettimeofday() -- available on most platforms, though not
921
on Windows. There is a hardware timer (sometimes a Programmable
922
Interrupt Timer or "PIT") (sometimes a "HPET") used for
923
interrupt generation. When it interrupts (a "tick" or "jiffy",
924
typically 1 centisecond) it sets xtime. For gettimeofday, a
925
Linux kernel routine usually gets xtime and then gets rdtsc
926
to get elapsed nanoseconds since the last tick. On Red Hat
927
Enterprise Linux 3, there was once a bug which caused the
928
resolution to be 1000, i.e. one centisecond. We never check
929
for time-zone change.
931
getnstimeofday() -- something to watch for in future Linux
933
do_gettimeofday() -- exists on Linux but not for "userland"
935
get_cycles() -- a multi-platform function, worth watching
936
in future Linux versions. But we found platform-specific
937
functions which were better documented in operating-system
938
manuals. And get_cycles() can fail or return a useless
939
32-bit number. It might be available on some platforms,
940
such as arm, which we didn't test. Using
941
"include <linux/timex.h>" or "include <asm/timex.h>"
942
can lead to autoconf or compile errors, depending on system.
944
rdtsc, __rdtsc, rdtscll: available for x86 with Linux BSD,
945
Solaris, Windows. See "possible flaws and dangers" comments.
947
times(): what we use for ticks. Should just read the last
948
(xtime) tick count, therefore should be fast, but usually
951
GetTickCount(): we use this for my_timer_ticks() on
952
Windows. Actually it really is a tick counter, so resolution
953
>= 10 milliseconds unless you have a very old Windows version.
954
With Windows 95 or 98 or ME, timeGetTime() has better resolution than
955
GetTickCount (1ms rather than 55ms). But with Windows NT or XP or 2000,
956
they're both getting from a variable in the Process Environment Block
957
(PEB), and the variable is set by the programmable interrupt timer, so
958
the resolution is the same (usually 10-15 milliseconds). Also timeGetTime
959
is slower on old machines:
960
http://www.doumo.jp/aon-java/jsp/postgretips/tips.jsp?tips=74.
961
Also timeGetTime requires linking winmm.lib,
962
Therefore we use GetTickCount.
963
It will overflow every 49 days because the return is 32-bit.
964
There is also a GetTickCount64 but it requires Vista or Windows Server 2008.
965
(As for GetSystemTimeAsFileTime, its precision is spurious, it
966
just reads the tick variable like the other functions do.
967
However, we don't expect it to overflow every 49 days, so we
968
will prefer it for my_timer_milliseconds().)
970
QueryPerformanceCounter() we use this for my_timer_microseconds()
971
on Windows. 1-PIT-tick (often 1/3-microsecond). Usually reads
972
the PIT so it's slow. On some Windows variants, uses RDTSC.
974
GetLocalTime() this is available on Windows but we don't use it.
976
getclock(): documented for Alpha, but not found during tests.
978
mach_absolute_time() and UpTime() are recommended for Apple.
979
Inititally they weren't tried, because asm_ppc seems to do the job.
980
But now we use mach_absolute_time for nanoseconds.
982
Any clock-based timer can be affected by NPT (ntpd program),
984
- full-second correction can occur for leap second
985
- tiny corrections can occcur approimately every 11 minutes
986
(but I think they only affect the RTC which isn't the PIT).
988
We define "precision" as "frequency" and "high precision" is
989
"frequency better than 1 microsecond". We define "resolution"
990
as a synonym for "granularity". We define "accuracy" as
991
"closeness to the truth" as established by some authoritative
992
clock, but we can't measure accuracy.
994
Do not expect any of our timers to be monotonic; we
995
won't guarantee that they return constantly-increasing
998
We tested with AIX, Solaris (x86 + Sparc), Linux (x86 +
999
Itanium), Windows, 64-bit Windows, QNX, FreeBSD, HPUX,
1000
Irix, Mac. We didn't test with NetWare or SCO.