1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
* License, v. 2.0. If a copy of the MPL was not distributed with this
4
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
11
/* This variant of nsIPerfMeasurement uses the perf_event interface
12
* added in Linux 2.6.31. We key compilation of this file off the
13
* existence of <linux/perf_event.h>.
16
#include <linux/perf_event.h>
18
#include <sys/syscall.h>
19
#include <sys/ioctl.h>
24
// As of July 2010, this system call has not been added to the
25
// C library, so we have to provide our own wrapper function.
26
// If this code runs on a kernel that does not implement the
27
// system call (2.6.30 or older) nothing unpredictable will
28
// happen - it will just always fail and return -1.
30
sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
31
int group_fd, unsigned long flags)
33
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
38
using JS::PerfMeasurement;
39
typedef PerfMeasurement::EventMask EventMask;
41
// Additional state required by this implementation.
44
// Each active counter corresponds to an open file descriptor.
47
int f_cache_references;
49
int f_branch_instructions;
53
int f_major_page_faults;
54
int f_context_switches;
57
// Counter group leader, for Start and Stop.
60
// Whether counters are running.
66
EventMask init(EventMask toMeasure);
68
void stop(PerfMeasurement* counters);
71
// Mapping from our event bitmask to codes passed into the kernel, and
72
// to fields in the PerfMeasurement and PerfMeasurement::impl structures.
78
uint64_t PerfMeasurement::* counter;
80
} kSlots[PerfMeasurement::NUM_MEASURABLE_EVENTS] = {
81
#define HW(mask, constant, fieldname) \
82
{ PerfMeasurement::mask, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##constant, \
83
&PerfMeasurement::fieldname, &Impl::f_##fieldname }
84
#define SW(mask, constant, fieldname) \
85
{ PerfMeasurement::mask, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##constant, \
86
&PerfMeasurement::fieldname, &Impl::f_##fieldname }
88
HW(CPU_CYCLES, CPU_CYCLES, cpu_cycles),
89
HW(INSTRUCTIONS, INSTRUCTIONS, instructions),
90
HW(CACHE_REFERENCES, CACHE_REFERENCES, cache_references),
91
HW(CACHE_MISSES, CACHE_MISSES, cache_misses),
92
HW(BRANCH_INSTRUCTIONS, BRANCH_INSTRUCTIONS, branch_instructions),
93
HW(BRANCH_MISSES, BRANCH_MISSES, branch_misses),
94
HW(BUS_CYCLES, BUS_CYCLES, bus_cycles),
95
SW(PAGE_FAULTS, PAGE_FAULTS, page_faults),
96
SW(MAJOR_PAGE_FAULTS, PAGE_FAULTS_MAJ, major_page_faults),
97
SW(CONTEXT_SWITCHES, CONTEXT_SWITCHES, context_switches),
98
SW(CPU_MIGRATIONS, CPU_MIGRATIONS, cpu_migrations),
107
f_cache_references(-1),
109
f_branch_instructions(-1),
113
f_major_page_faults(-1),
114
f_context_switches(-1),
115
f_cpu_migrations(-1),
123
// Close all active counter descriptors. Take care to do the group
124
// leader last (this may not be necessary, but it's unclear what
125
// happens if you close the group leader out from under a group).
126
for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
127
int fd = this->*(kSlots[i].fd);
128
if (fd != -1 && fd != group_leader)
132
if (group_leader != -1)
137
Impl::init(EventMask toMeasure)
139
JS_ASSERT(group_leader == -1);
143
EventMask measured = EventMask(0);
144
struct perf_event_attr attr;
145
for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
146
if (!(toMeasure & kSlots[i].bit))
149
memset(&attr, 0, sizeof(attr));
150
attr.size = sizeof(attr);
152
// Set the type and config fields to indicate the counter we
153
// want to enable. We want read format 0, and we're not using
154
// sampling, so leave those fields unset.
155
attr.type = kSlots[i].type;
156
attr.config = kSlots[i].config;
158
// If this will be the group leader it should start off
159
// disabled. Otherwise it should start off enabled (but blocked
160
// on the group leader).
161
if (group_leader == -1)
164
// The rest of the bit fields are really poorly documented.
165
// For instance, I have *no idea* whether we should be setting
166
// the inherit, inherit_stat, or task flags. I'm pretty sure
167
// we do want to set mmap and comm, and not any of the ones I
168
// haven't mentioned.
172
int fd = sys_perf_event_open(&attr,
176
0 /* no flags presently defined */);
180
measured = EventMask(measured | kSlots[i].bit);
181
this->*(kSlots[i].fd) = fd;
182
if (group_leader == -1)
191
if (running || group_leader == -1)
195
ioctl(group_leader, PERF_EVENT_IOC_ENABLE, 0);
199
Impl::stop(PerfMeasurement* counters)
201
// This scratch buffer is to ensure that we have read all the
202
// available data, even if that's more than we expect.
203
unsigned char buf[1024];
205
if (!running || group_leader == -1)
208
ioctl(group_leader, PERF_EVENT_IOC_DISABLE, 0);
211
// read out and reset all the counter values
212
for (int i = 0; i < PerfMeasurement::NUM_MEASURABLE_EVENTS; i++) {
213
int fd = this->*(kSlots[i].fd);
217
if (read(fd, buf, sizeof(buf)) == sizeof(uint64_t)) {
219
memcpy(&cur, buf, sizeof(uint64_t));
220
counters->*(kSlots[i].counter) += cur;
223
// Reset the counter regardless of whether the read did what
225
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
229
} // anonymous namespace
234
#define initCtr(flag) ((eventsMeasured & flag) ? 0 : -1)
236
PerfMeasurement::PerfMeasurement(PerfMeasurement::EventMask toMeasure)
237
: impl(OffTheBooks::new_<Impl>()),
238
eventsMeasured(impl ? static_cast<Impl*>(impl)->init(toMeasure)
240
cpu_cycles(initCtr(CPU_CYCLES)),
241
instructions(initCtr(INSTRUCTIONS)),
242
cache_references(initCtr(CACHE_REFERENCES)),
243
cache_misses(initCtr(CACHE_MISSES)),
244
branch_instructions(initCtr(BRANCH_INSTRUCTIONS)),
245
branch_misses(initCtr(BRANCH_MISSES)),
246
bus_cycles(initCtr(BUS_CYCLES)),
247
page_faults(initCtr(PAGE_FAULTS)),
248
major_page_faults(initCtr(MAJOR_PAGE_FAULTS)),
249
context_switches(initCtr(CONTEXT_SWITCHES)),
250
cpu_migrations(initCtr(CPU_MIGRATIONS))
256
PerfMeasurement::~PerfMeasurement()
258
js::Foreground::delete_(static_cast<Impl*>(impl));
262
PerfMeasurement::start()
265
static_cast<Impl*>(impl)->start();
269
PerfMeasurement::stop()
272
static_cast<Impl*>(impl)->stop(this);
276
PerfMeasurement::reset()
278
for (int i = 0; i < NUM_MEASURABLE_EVENTS; i++) {
279
if (eventsMeasured & kSlots[i].bit)
280
this->*(kSlots[i].counter) = 0;
282
this->*(kSlots[i].counter) = -1;
287
PerfMeasurement::canMeasureSomething()
289
// Find out if the kernel implements the performance measurement
290
// API. If it doesn't, syscall(__NR_perf_event_open, ...) is
291
// guaranteed to return -1 and set errno to ENOSYS.
293
// We set up input parameters that should provoke an EINVAL error
294
// from a kernel that does implement perf_event_open, but we can't
295
// be sure it will (newer kernels might add more event types), so
296
// we have to take care to close any valid fd it might return.
298
struct perf_event_attr attr;
299
memset(&attr, 0, sizeof(attr));
300
attr.size = sizeof(attr);
301
attr.type = PERF_TYPE_MAX;
303
int fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
308
return errno != ENOSYS;