4
// @(#) $Revision: 4.14 $ $Source: /judy/src/apps/benchmark/timeit.h $
6
// Timing and timers header file with example program.
8
// You should compile with one of these defined:
10
// JU_HPUX_PA # control register available (via asm()).
11
// JU_HPUX_IPF # TBD, see below.
12
// JU_LINUX_IA32 # control register available (via get_cycles()).
13
// JU_LINUX_IPF # control register available (via get_cycles()).
14
// JU_WIN_IA32 # uses clock().
16
// Otherwise default (low-res) timing code using gettimeofday() results. This
17
// mode is only accurate to usecs, and fuzzy due to syscall overhead.
19
// Public macros; the *_HRTm() forms are much faster than the others:
21
// TIMER_vars(T) - declare variables to use for timers
22
// STARTTm(T) - start the timer with variable T
23
// ENDTm(D,T) - compute usec from last STARTTm(T), save result in double D
24
// START_HRTm(T) - high-res for short intervals (< 2^32[64] clock ticks) only
25
// END_HRTm(D,T) - high-res for short intervals (< 2^32[64] clock ticks) only
29
// __START_HRTm(T) - read high-res control register, save in T
30
// __END_HRTm(T) - read high-res control register, save in T
31
// __HRONLY(D,T) - use high-res clock only
33
// Note: The __*_HRTm and __HRONLY macros are only available on platforms with
34
// control registers for high-res clocks. On hpux_pa this is a 32-bit register
35
// and gettimeofday() must be used to handle rollover; on linux_* this is a
39
#include <sys/time.h> // Win32 uses a whole different paradigm.
40
#include <unistd.h> // for getopt(), which Win32 lacks.
47
extern double USecPerClock; // defined in timeit.c.
49
// __HRONLY is used for multiple platforms, but only in cases where there is a
50
// high-res clock and find_CPU_speed() is available from timeit.c:
52
#define __HRONLY(D,T) \
54
if (USecPerClock == 0.0) USecPerClock = find_CPU_speed(); \
55
(D) = ((double) (__stop_##T - __start_##T)) * USecPerClock; \
62
// ********************* HPUX PA ****************************
64
// Define __START_HRTm and __END_HRTm only, and let later common code add
67
// TBD: On hpux_pa or hpux_ipf 64-bit, is CR_IT a 64-bit register? If so, it's
68
// unnecessary and wasteful to use gettimeofday() later to watch for rollover.
70
// TBD: JU_HPUX_IPF does not recognize CR_IT ("Undeclared variable 'CR_IT'"),
71
// so for now treat that platform as having no high-res clock and do not
72
// included it in this section.
75
#include <machine/reg.h>
76
#include <sys/pstat.h>
78
double find_CPU_speed(void);
80
// Note: On hpux_*, at least older compilers, it is neither necessary nor even
81
// allowed to mark the __start_* and __stop_* variables as volatile; the
82
// compiler does not optimize out the code even without it:
84
#define TIMER_vars(T) \
85
register unsigned long __start_##T, __stop_##T; \
86
struct timeval __TVBeg_##T, __TVEnd_##T
88
#define __START_HRTm(T) _asm("MFCTL", CR_IT, __start_##T)
89
#define __END_HRTm(T) _asm("MFCTL", CR_IT, __stop_##T )
91
#endif // (JU_HPUX_PA)
93
// ********************* LINUX IA32 **************************
98
double find_CPU_speed(void);
100
#define TIMER_vars(T) \
101
register unsigned long __start_##T, __stop_##T; \
102
struct timeval __TVBeg_##T, __TVEnd_##T
104
#define __START_HRTm(T) rdtscl(__start_##T)
105
#define __END_HRTm(T) rdtscl(__stop_##T)
107
#endif // JU_LINUX_IA32
109
// ********************* LINUX_IPF **************************
111
// Define __START_HRTm and __END_HRTm, and also STARTTm and ENDTm in terms of
112
// the former (no need for gettimeofday()).
116
#include <asm/timex.h>
118
double find_CPU_speed(void);
120
// Using cycles_t rather than unsigned long [long] should be more portable;
121
// and, it appears necessary to mark __start_* and __end_* as volatile so the
122
// gcc compiler does not optimize out the register access:
124
#define TIMER_vars(T) \
125
register volatile cycles_t __start_##T, __stop_##T; \
126
struct timeval __TVBeg_##T, __TVEnd_##T
128
// This seems required for linux_ia32:
131
// Older code (see 4.13) used rdtscl(), but this is not portable and does not
132
// result in a 64-bit value, unlike get_cycles(), which apparently takes
133
// advantage of a 64-bit control register on both IA32 and IPF => always
134
// high-res timing with no rollover issues. Note, cycles_t is unsigned, so the
135
// math works even in case of a rollover.
137
#define __START_HRTm(T) __start_##T = get_cycles()
138
#define __END_HRTm(T) __stop_##T = get_cycles()
140
#define STARTTm(T) __START_HRTm(T)
141
#define ENDTm(D,T) { __END_HRTm(T); __HRONLY(D,T); }
143
#endif // JU_LINUX_IPF
146
// ********************* WIN IA32 *****************************
148
// WIN IA32 has no way to access the control register (?), so define STARTTm
149
// and ENDTm directly using clock():
155
#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
157
#define STARTTm(T) __TVBeg_##T = clock()
158
#define ENDTm(D,T) { __TVEnd_##T = clock(); \
159
(D) = ((double) (__TVEnd_##T - __TVBeg_##T)); }
161
#endif // JU_WIN_IA32
164
// ********************* OTHER *****************************
166
// Default to using the low-res, slow-access clock only.
170
#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
172
#define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
174
#define ENDTm(D,T) gettimeofday(&__TVEnd_##T, NULL); \
175
(D) = (((double) __TVEnd_##T.tv_sec * ((double) 1E6)) \
176
+ (double) __TVEnd_##T.tv_usec) \
177
- (((double) __TVBeg_##T.tv_sec * ((double) 1E6)) \
178
+ (double) __TVBeg_##T.tv_usec)
179
#endif // ! TIMER_vars
182
// COMMON CODE FOR SYSTEMS WITH HIGH-RES CLOCKS (CONTROL REGISTERS):
186
// Platforms that define __START_HRTm but not STARTTm (presently only hpux_pa)
187
// use gettimeofday() for the low-res clock and __START_HRTm/__END_HRTm for
188
// the high-res clock. If the low-res clock did not "roll over", use the
189
// high-res clock; see __HRONLY.
191
// Note: Rollover is defined conservatively as 1E5 usec (= 1/10 sec). This
192
// would require a 40 GHz 32-bit system to be violated.
197
gettimeofday(&__TVBeg_##T, NULL); __START_HRTm(T); \
202
__END_HRTm(T); gettimeofday(&__TVEnd_##T, NULL); \
204
(D) = (((double) __TVEnd_##T.tv_sec * ((double) 1E6)) \
205
+ (double) __TVEnd_##T.tv_usec) \
206
- (((double) __TVBeg_##T.tv_sec * ((double) 1E6)) \
207
+ (double) __TVBeg_##T.tv_usec); \
209
if ((D) < 1E5) __HRONLY(D,T); \
213
// Faster forms for heavy/frequent use in code loops where intervals are less
214
// than 2^32[64] clock ticks:
216
#define START_HRTm(T) __START_HRTm(T)
217
#define END_HRTm(D,T) { __END_HRTm(T); __HRONLY(D,T); }
219
#endif // __START_HRTm
221
#endif // #ifndef _TIMEIT_H