3
* Copyright (C) 2013-2015 Christian Hergert <christian@hergert.me>
5
* This file is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU Lesser General Public License as
7
* published by the Free Software Foundation; either version 3 of the
8
* License, or (at your option) any later version.
10
* This file is distributed in the hope that it will be useful, but
11
* WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Lesser General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program. If not, see <http://www.gnu.org/licenses/>.
28
* EggCounter is a performance counter based on ideas from previous work
29
* on high performance counters. They are not guaranteed to be 100%
30
* correct, but they approach that with no synchronization given new
31
* enough hardware. In particular, we use %ecx from rdtscp (the core id)
32
* to determine which cachline to increment the counter within.
34
* Given a counter, the value will be split up int NCPU cachelines where
35
* NCPU is the number of cores returned from get_nprocs() (on Linux).
37
* Updating the counter is very cheap, reading back the counter requires
38
* a volatile read of each cacheline. Again, no correctness is guaranteed.
40
* In practice, very few values are lost even during tight competing loops.
41
* A loss can happen when the thread is pre-empted between the %rdtscp
42
* instruction and the addq increment (on x86_64).
48
* To define a counter, you must have support for constructor attributes.
50
* EGG_DEFINE_COUNTER (Symbol, "Category", "Name", "Description")
52
* To increment the counter in a function of your choice (but within the
53
* same module), use EGG_COUNTER_ADD, EGG_COUNTER_INC, EGG_COUNTER_DEC.
55
* EGG_COUNTER_ADD (Symbol);
58
* Architecture Support
59
* ====================
61
* If you are not on x86_64, or are missing the rdtscp instruction, a 64-bit
62
* atomic will be performed using __sync_fetch_and_add8(). Clearly, we can
63
* do some more work here to abstract which implementation is used, but we
64
* only support GCC and Clang today, which both have that intrinsic. Some
65
* architectures may not have it (such as 32-bit PPC), but I'm not too
66
* concerned about that at the moment.
68
* The counters are mapped into a shared memory zone using shm_open() and
69
* mmap(). An external program can then discover the available counters
70
* and print them without blocking the target program. It simply must
71
* perform the reads in a volatile manner just like the target process
72
* would need to do for readback.
74
* EggCounterArena provides a helper to walk through the counters in the
75
* shared memory zone. egg_counter_arena_foreach().
77
* You cannot remove a counter once it has been registered.
80
* Accessing Counters Remotely
81
* ===========================
83
* You can access the counters from out of process. By opening the SHM zone
84
* and reading the contents from each cachline, you can get the approximate
85
* state of the target application without blocking it.
87
* EggCounterArena provides a helper for you to do this.
89
* EggCounterArena *arena;
91
* arena = egg_counter_arena_new_for_pid (other_process_pid);
92
* egg_counter_arena_foreach (arena, my_counter_callback, user_data);
98
* The layout of the shared memory zone is broken into "cells". Each cell
99
* is an approximate cacheline (64-bytes) on modern Intel hardware. Indexes
100
* to data locations are represented in cells to simplify the math and
101
* allow the compiler to know we are working with properly aligned structures.
103
* The base pointer in EggCounter.values is not 64-byte aligned! It is 8-byte
104
* aligned and points to the offset within the cacheline for that counter.
105
* We pack 8 64-bit counters into a single cacheline. This allows us to avoid
106
* an extra MOV instruction when incrementing since we only need to perform
107
* the offset from the base pointer.
109
* The first two cells are the header which contain information about the
110
* underlying shm file and how large the mmap() range should be.
112
* After that, begin the counters.
114
* The counters are layed out in groups of 8 counters.
116
* [8 CounterInfo Structs (128-bytes each)][N_CPU Data Zones (64-byte each)]
118
* See egg-counter.c for more information on the contents of these structures.
121
* Build System Requirements
122
* =========================
124
* We need to know if rdtscp is available at compile time. In an effort
125
* to keep the headers as portable as possible (if that matters here?) we
126
* require that you define HAVE_RDTSCP if the instruction is supported.
128
* An example for autoconf might be similar to the following:
130
* AC_MSG_CHECKING([for fast counters with rdtscp])
133
* #include <x86intrin.h>
134
* int main (int argc, char *argv[]) { int cpu; __builtin_ia32_rdtscp (&cpu); return 0; }]])],
137
* AC_MSG_RESULT([$have_rdtscp])
138
* AS_IF([test "$have_rdtscp" = "yes"],
139
* [CFLAGS="$CFLAGS -DHAVE_RDTSCP"])
145
# include <x86intrin.h>
147
egg_get_current_cpu (void)
150
__builtin_ia32_rdtscp (&cpu);
153
#elif defined(__linux__)
158
# define egg_get_current_cpu() sched_getcpu()
160
# define egg_get_current_cpu() 0
161
# define EGG_COUNTER_REQUIRES_ATOMIC 1
165
* EGG_DEFINE_COUNTER:
166
* @Identifier: The symbol name of the counter
167
* @Category: A string category for the counter.
168
* @Name: A string name for the counter.
169
* @Description: A string description for the counter.
171
* |[<!-- language="C" -->
172
* EGG_DEFINE_COUNTER (my_counter, "My", "Counter", "My Counter Description");
175
#define EGG_DEFINE_COUNTER(Identifier, Category, Name, Description) \
176
static EggCounter Identifier##_ctr = { NULL, Category, Name, Description }; \
177
static void Identifier##_ctr_init (void) __attribute__((constructor)); \
179
Identifier##_ctr_init (void) \
181
egg_counter_arena_register (egg_counter_arena_get_default(), &Identifier##_ctr); \
186
* @Identifier: The identifier of the counter.
188
* Increments the counter @Identifier by 1.
190
#define EGG_COUNTER_INC(Identifier) EGG_COUNTER_ADD(Identifier, G_GINT64_CONSTANT(1))
194
* @Identifier: The identifier of the counter.
196
* Decrements the counter @Identifier by 1.
198
#define EGG_COUNTER_DEC(Identifier) EGG_COUNTER_SUB(Identifier, G_GINT64_CONSTANT(1))
202
* @Identifier: The identifier of the counter.
203
* @Count: the amount to subtract.
205
* Subtracts from the counter identified by @Identifier by @Count.
207
#define EGG_COUNTER_SUB(Identifier, Count) EGG_COUNTER_ADD(Identifier, (-(Count)))
211
* @Identifier: The identifier of the counter.
212
* @Count: the amount to add to the counter.
214
* Adds @Count to @Identifier.
216
* This operation is not guaranteed to have full correctness. It tries to find
217
* a happy medium between fast, and accurate. When possible, the %rdtscp
218
* instruction is used to get a cacheline owned by the executing CPU, to avoid
219
* collisions. However, this is not guaranteed as the thread could be swapped
220
* between the calls to %rdtscp and %addq (on 64-bit Intel).
222
* Other platforms have fallbacks which may give different guarantees, such as
223
* using atomic operations (and therefore, memory barriers).
225
* See #EggCounter for more information.
227
#ifdef EGG_COUNTER_REQUIRES_ATOMIC
228
# define EGG_COUNTER_ADD(Identifier, Count) \
230
__sync_add_and_fetch ((gint64 *)&Identifier##_ctr.values[0], ((gint64)(Count))); \
233
# define EGG_COUNTER_ADD(Identifier, Count) \
235
Identifier##_ctr.values[egg_get_current_cpu()].value += ((gint64)(Count)); \
239
typedef struct _EggCounter EggCounter;
240
typedef struct _EggCounterArena EggCounterArena;
241
typedef struct _EggCounterValue EggCounterValue;
244
* EggCounterForeachFunc:
245
* @counter: the counter.
246
* @user_data: data supplied to egg_counter_arena_foreach().
248
* Function prototype for callbacks provided to egg_counter_arena_foreach().
250
typedef void (*EggCounterForeachFunc) (EggCounter *counter,
256
EggCounterValue *values;
257
const gchar *category;
259
const gchar *description;
260
} __attribute__ ((aligned(8)));
262
struct _EggCounterValue
264
volatile gint64 value;
266
} __attribute__ ((aligned(8)));
268
EggCounterArena *egg_counter_arena_get_default (void);
269
EggCounterArena *egg_counter_arena_new_for_pid (GPid pid);
270
EggCounterArena *egg_counter_arena_ref (EggCounterArena *arena);
271
void egg_counter_arena_unref (EggCounterArena *arena);
272
void egg_counter_arena_register (EggCounterArena *arena,
273
EggCounter *counter);
274
void egg_counter_arena_foreach (EggCounterArena *arena,
275
EggCounterForeachFunc func,
277
void egg_counter_reset (EggCounter *counter);
278
gint64 egg_counter_get (EggCounter *counter);
282
#endif /* EGG_COUNTER_H */